Skip to content
This repository has been archived by the owner on Feb 22, 2025. It is now read-only.

Commit

Permalink
Merge pull request #810 from rix1337/dev
Browse files Browse the repository at this point in the history
Improve RegEx search for BY
  • Loading branch information
rix1337 authored Oct 3, 2024
2 parents cc4aa0b + b6be404 commit 9e54fce
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 11 deletions.
22 changes: 17 additions & 5 deletions feedcrawler/external_sites/feed_search/sites/content_all_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@ def __init__(self, filename):
self.url = self.hostnames.get('by')
self.password = self.url.split('.')[0]

if "List_ContentAll_Seasons" not in filename:
if "_Regex" in filename:
self.URL = 'https://' + self.url + "/?cat="
self.FEED_URLS = [self.URL + "1", self.URL + "2"]
elif "List_ContentAll_Seasons" not in filename:
self.URL = 'https://' + self.url + "/?cat=1"
self.FEED_URLS = [self.URL]
else:
self.URL = 'https://' + self.url + "/?cat=2"
self.FEED_URLS = [self.URL]
self.FEED_URLS = [self.URL]

self.config = CrawlerConfig("ContentAll")
self.feedcrawler = CrawlerConfig("FeedCrawler")
Expand All @@ -49,9 +53,17 @@ def __init__(self, filename):
search = int(CrawlerConfig("ContentAll").get("search"))
i = 2
while i <= search:
page_url = self.URL + "&start=" + str(i)
if page_url not in self.FEED_URLS:
self.FEED_URLS.append(page_url)
if "_Regex" in filename:
page_url_1 = self.URL + "1&start=" + str(i)
page_url_2 = self.URL + "2&start=" + str(i)
if page_url_1 not in self.FEED_URLS:
self.FEED_URLS.append(page_url_1)
if page_url_2 not in self.FEED_URLS:
self.FEED_URLS.append(page_url_2)
else:
page_url = self.URL + "&start=" + str(i)
if page_url not in self.FEED_URLS:
self.FEED_URLS.append(page_url)
i += 1
self.cdc = FeedDb('cdc')

Expand Down
2 changes: 1 addition & 1 deletion feedcrawler/providers/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def keep_alphanumeric_with_special_characters(string):

def keep_alphanumeric_with_regex_characters(string):
string = replace_with_stripped_ascii(string)
return re.sub(r'[^0-9a-zA-Z\s\-.*+()|\[\]?!]', '', string)
return re.sub(r'[^0-9a-zA-Z\s\-.*+()|\[\]\\{},?!]', '', string)


def keep_numbers(string):
Expand Down
2 changes: 1 addition & 1 deletion feedcrawler/providers/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def get_version():
return "20.1.8"
return "20.1.9"


def create_version_file():
Expand Down
4 changes: 2 additions & 2 deletions feedcrawler/web_interface/vuejs_frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion feedcrawler/web_interface/vuejs_frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "feedcrawler-web",
"version": "20.1.8",
"version": "20.1.9",
"type": "module",
"scripts": {
"dev": "vite",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ function submitLists() {
@click="showRegExHelp">RegEx-Suche</span>
</h5><!-- Setting variables in label is unsupported -->
<FormKit v-model="store.lists.mb.regex"
:validation="[['?matches', /^[a-zA-Z0-9ÄäÖöÜüß\-\s.*+()|\[\]?!]+$/]]"
:validation="[['?matches', /^[a-zA-Z0-9ÄäÖöÜüß\-\s.*+()|\[\]\\{},?!]+$/]]"
:validation-messages="{
matches: 'Bitte nur Buchstaben, Zahlen, Leerzeichen oder folgende Sonderzeichen eingeben: . * + ( ) | [ ] ? !'
}"
Expand Down

0 comments on commit 9e54fce

Please sign in to comment.