From 07e0d0b4c329fdc7865035ed3793114b23a42a67 Mon Sep 17 00:00:00 2001 From: Bojie Li Date: Fri, 23 Aug 2024 17:24:43 +0800 Subject: [PATCH] [fix] update stop words to support special Chinese chars in search query --- app/models/searchcache.py | 2 +- app/views/search/sqlcache.py | 2 +- app/views/search/sqllike.py | 2 +- tests/import_courses_new.py | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/models/searchcache.py b/app/models/searchcache.py index 1582684a..48479404 100644 --- a/app/models/searchcache.py +++ b/app/models/searchcache.py @@ -16,7 +16,7 @@ # "数据分析与实践" -> "数据分析及实践" # "概率论和数理统计" -> "概率论与数理统计" def is_chinese_stop_char(c: str) -> bool: - STOP = ["与", "和", "及"] + STOP = ["与", "和", "及", ",", "、", "。", ":", "(", ")", "【", "】"] return c in STOP diff --git a/app/views/search/sqlcache.py b/app/views/search/sqlcache.py index c5474367..83d17053 100644 --- a/app/views/search/sqlcache.py +++ b/app/views/search/sqlcache.py @@ -9,7 +9,7 @@ # from app.utils import print_sqlalchemy_statement -filter = lambda x: re.sub(r"""[~`!@#$%^&*{}\[\]\\:\";'<>,/\+\-\~\(\)><\x00-\x1F\x7F]""", " ", x) +filter = lambda x: re.sub(r"""[~`!@#$%^&*{}\[\]\\:\";'<>,/\+\-\~\(\)><,、。:【】()?“”「」·\x00-\x1F\x7F]""", " ", x) def init() -> None: diff --git a/app/views/search/sqllike.py b/app/views/search/sqllike.py index 165dab28..200f4f30 100644 --- a/app/views/search/sqllike.py +++ b/app/views/search/sqllike.py @@ -15,7 +15,7 @@ import re -filter = lambda x: re.sub(r'''[~`!@#$%^&*{}[]|\\:";'<>?,./]''', ' ', x) +filter = lambda x: re.sub(r'''[~`!@#$%^&*{}[]|\\:";'<>?,./,、。:【】()?“”「」·]''', ' ', x) def init() -> None: diff --git a/tests/import_courses_new.py b/tests/import_courses_new.py index 943d9a00..e6311265 100755 --- a/tests/import_courses_new.py +++ b/tests/import_courses_new.py @@ -6,6 +6,7 @@ from app.models import * from datetime import datetime + def parse_file(filename): data = [] with open(filename) as f: @@ -250,6 +251,9 @@ def load_courses(insert=True): course_class.term = term course_class.cno = class_code + # update course search cache + CourseSearchCache.update(course, commit=False) + print('load complete, committing changes to database') db.session.commit() print('%d new teachers loaded' % new_teacher_count)