From bab492549957d1d81a0c142e4c68a64b3720c54b Mon Sep 17 00:00:00 2001 From: Xander Vertegaal Date: Thu, 4 Jul 2024 08:37:28 +0200 Subject: [PATCH 1/5] Mark highlighted phrases in backend --- backend/aethel_db/views.py | 113 +++++++++++------- frontend/src/app/aethel/aethel.component.html | 9 +- frontend/src/app/aethel/aethel.component.scss | 4 + frontend/src/app/shared/types.ts | 15 +-- 4 files changed, 89 insertions(+), 52 deletions(-) diff --git a/backend/aethel_db/views.py b/backend/aethel_db/views.py index a30a643..ef82dc2 100644 --- a/backend/aethel_db/views.py +++ b/backend/aethel_db/views.py @@ -20,10 +20,17 @@ def aethel_status(): return dataset is not None +@dataclass +class AethelSamplePhrase: + index: str + display: str + highlight: bool + + @dataclass class AethelListSample: name: str - sentence: str + phrases: List[AethelSamplePhrase] = field(default_factory=list) @dataclass @@ -43,29 +50,16 @@ class AethelListResponse: results: List[AethelListItem] = field(default_factory=list) error: Optional[str] = None - def existing_result( + def get_or_create_existing_result( self, lemma: str, word: str, type: str - ) -> Optional[AethelListItem]: + ) -> AethelListItem: """ - Return an existing result with the same lemma, word, and type, if it exists. + Return an existing result with the same lemma, word, and type, or create a new one if it doesn't exist. """ - for item in self.results: - if item.lemma == lemma and item.type == type and item.word == word: - return item - return None - - def add_result( - self, lemma: str, word: str, type: str, sample_name: str, sample_sentence: str - ) -> None: - result_item = self.existing_result(lemma, word, type) - - if result_item is None: - result_item = AethelListItem(lemma=lemma, word=word, type=type) - self.results.append(result_item) - - result_item.samples.append( - AethelListSample(name=sample_name, sentence=sample_sentence) - ) + for result in self.results: + if result.lemma == lemma and result.type == type and result.word == word: + return result + return AethelListItem(lemma=lemma, word=word, type=type, samples=[]) def json_response(self) -> JsonResponse: results = [asdict(result) for result in self.results] @@ -85,32 +79,65 @@ def get(self, request: HttpRequest) -> JsonResponse: if query_input is None or len(query_input) < 3: return AethelListResponse().json_response() - # First we select all relevant samples from the dataset that contain the query string. - query_result = search( - bank=dataset.samples, - query=in_word(query_input) | in_lemma(query_input), - ) - - def item_contains_query_string(item: LexicalItem) -> bool: + def item_contains_query_string(item: LexicalItem, query_input: str) -> bool: + """ + Checks if a LexicalItem contains a given input string in its word or its lemma. + """ return ( query_input.lower() in item.lemma.lower() or query_input.lower() in item.word.lower() ) + def serialize_sample( + sample: Sample, highlighted_phrase_indices: set[int] + ) -> AethelListSample: + """ + Turns a Sample into an AethelListSample, while marking phrases that should be highlighted. + """ + new_phrases = [] + for index, phrase in enumerate(sample.lexical_phrases): + highlighted = index in highlighted_phrase_indices + new_phrase = AethelSamplePhrase( + index=index, + display=phrase.string, + highlight=highlighted, + ) + new_phrases.append(new_phrase) + + return AethelListSample(sample.name, new_phrases) + response_object = AethelListResponse() - # Then we loop over the samples and extract what we need from them (lemma, word, type etc.). + + # First we select all relevant samples from the dataset that contain the query string. + query_result = search( + bank=dataset.samples, + query=in_word(query_input) | in_lemma(query_input), + ) + + # Then we transform the results. + # Each key in result_dict is a unique combination of lemma, word, and type. + # Each value is a sample, mapped to a set of indices referring to the specific phrase that has the type. + result_dict: dict[tuple[str, str, str], dict[str, set]] = {} for sample in query_result: - lexical_phrases = sample.lexical_phrases - for phrase in lexical_phrases: + for phrase_index, phrase in enumerate(sample.lexical_phrases): for item in phrase.items: - if item_contains_query_string(item): - response_object.add_result( - item.lemma, - item.word, - str(phrase.type), - sample.name, - sample.sentence, - ) + if item_contains_query_string(item, query_input): + key = (item.lemma, item.word, str(phrase.type)) + # setdefault gets the value for a given key or adds the key with a provided value if None is found. + samples = result_dict.setdefault(key, {}) + phrase_indices = samples.setdefault(sample.name, set()) + phrase_indices.add(phrase_index) + + # Finally, we serialize the samples and add them to the response object. + for key, samples in result_dict.items(): + lemma, word, type = key + list_item = response_object.get_or_create_existing_result( + lemma=lemma, word=word, type=type + ) + for sample_name, phrase_indices in samples.items(): + sample = dataset.find_by_name(sample_name).pop() + list_item.samples.append(serialize_sample(sample, phrase_indices)) + response_object.results.append(list_item) return response_object.json_response() @@ -124,9 +151,10 @@ class AethelDetailError(Enum): aethel_detail_status_codes = { AethelDetailError.NO_QUERY_INPUT: status.HTTP_400_BAD_REQUEST, AethelDetailError.SAMPLE_NOT_FOUND: status.HTTP_404_NOT_FOUND, - AethelDetailError.MULTIPLE_FOUND: status.HTTP_500_INTERNAL_SERVER_ERROR + AethelDetailError.MULTIPLE_FOUND: status.HTTP_500_INTERNAL_SERVER_ERROR, } + @dataclass class AethelDetailResult: sentence: str @@ -135,6 +163,7 @@ class AethelDetailResult: subset: str phrases: list[dict] + @dataclass class AethelDetailResponse: result: Optional[AethelDetailResult] = None @@ -151,7 +180,9 @@ def parse_sample(self, sample: Sample) -> None: def json_response(self) -> JsonResponse: result = asdict(self.result) if self.result else None - status_code = aethel_detail_status_codes[self.error] if self.error else status.HTTP_200_OK + status_code = ( + aethel_detail_status_codes[self.error] if self.error else status.HTTP_200_OK + ) return JsonResponse( { diff --git a/frontend/src/app/aethel/aethel.component.html b/frontend/src/app/aethel/aethel.component.html index ad0c036..cabd52a 100644 --- a/frontend/src/app/aethel/aethel.component.html +++ b/frontend/src/app/aethel/aethel.component.html @@ -107,10 +107,11 @@

Æthel

@for (sample of row.samples; track $index) { - + + @for (phrase of sample.phrases; track phrase.index) { + {{ phrase.display }} + } + | null; } - -export interface AethelSample { - name: string; - sentence: string; -} - export interface AethelListReturnItem { lemma: string; word: string; type: string; - samples: AethelSample[]; + samples: { + name: string; + phrases: { + index: string; + display: string; + highlight: boolean; + }[]; + }[]; } export interface AethelListReturn { From b4156a0e17bcadc1136804f4480dc0882205eaf7 Mon Sep 17 00:00:00 2001 From: Xander Vertegaal Date: Thu, 4 Jul 2024 08:56:27 +0200 Subject: [PATCH 2/5] Remove highlight pipe --- frontend/src/app/app.module.ts | 2 - .../app/shared/pipes/highlight.pipe.spec.ts | 57 ------------------- .../src/app/shared/pipes/highlight.pipe.ts | 15 ----- 3 files changed, 74 deletions(-) delete mode 100644 frontend/src/app/shared/pipes/highlight.pipe.spec.ts delete mode 100644 frontend/src/app/shared/pipes/highlight.pipe.ts diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index 8365803..7f7ada4 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -18,7 +18,6 @@ import { ExportButtonComponent } from "./spindle/export-button/export-button.com import { FontAwesomeModule } from "@fortawesome/angular-fontawesome"; import { TableModule } from "primeng/table"; import { AethelComponent } from "./aethel/aethel.component"; -import { HighlightPipe } from "./shared/pipes/highlight.pipe"; import { SpindleAboutComponent } from "./spindle/spindle-about/spindle-about.component"; import { SpindleNotationComponent } from "./spindle/spindle-notation/spindle-notation.component"; import { ReferencesComponent } from "./references/references.component"; @@ -39,7 +38,6 @@ import { SampleComponent } from "./sample/sample.component"; SpindleNotationComponent, ReferencesComponent, AethelComponent, - HighlightPipe, ProofPipe, SampleComponent, ], diff --git a/frontend/src/app/shared/pipes/highlight.pipe.spec.ts b/frontend/src/app/shared/pipes/highlight.pipe.spec.ts deleted file mode 100644 index 0c1393c..0000000 --- a/frontend/src/app/shared/pipes/highlight.pipe.spec.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { TestBed } from "@angular/core/testing"; -import { HighlightPipe } from "./highlight.pipe"; -import { DomSanitizer } from "@angular/platform-browser"; - -describe("HighlightPipe", () => { - let pipe: HighlightPipe; - - beforeEach(async () => { - TestBed.configureTestingModule({ - providers: [{ - provide: DomSanitizer, - useValue: { - bypassSecurityTrustHtml: (value: string) => value, - }, - - }] - }); - const sanitizer = TestBed.inject(DomSanitizer); - pipe = new HighlightPipe(sanitizer); - }); - - it("should create an instance", () => { - expect(pipe).toBeTruthy(); - }); - - it("should highlight the specified word in the input string", () => { - const inputString = "Lorem ipsum dolor sit amet"; - const wordToHighlight = "ipsum"; - const expectedOutput = "Lorem ipsum dolor sit amet"; - - const result = pipe.transform(inputString, wordToHighlight); - - expect(result).toEqual(expectedOutput); - }); - - it("should handle case-insensitive highlighting", () => { - const inputString = "Lorem ipsum dolor sit amet"; - const wordToHighlight = "LOREM"; - const expectedOutput = "Lorem ipsum dolor sit amet"; - - const result = pipe.transform(inputString, wordToHighlight); - - expect(result).toEqual(expectedOutput); - }); - - it("should handle multiple occurrences of the word to highlight", () => { - const inputString = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ipsum ipsum ipsum."; - const wordToHighlight = "ipsum"; - const expectedOutput = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed ipsum ipsum ipsum."; - - const result = pipe.transform(inputString, wordToHighlight); - - expect(result).toEqual(expectedOutput); - }); -}); diff --git a/frontend/src/app/shared/pipes/highlight.pipe.ts b/frontend/src/app/shared/pipes/highlight.pipe.ts deleted file mode 100644 index 4f9f291..0000000 --- a/frontend/src/app/shared/pipes/highlight.pipe.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { Pipe, PipeTransform } from "@angular/core"; -import { DomSanitizer, SafeHtml } from "@angular/platform-browser"; - -@Pipe({ - name: "highlight", -}) -export class HighlightPipe implements PipeTransform { - constructor(private sanitizer: DomSanitizer) {} - - transform(value: string, word: string): SafeHtml { - const regex = new RegExp(`\\b${word}\\b`, "gi"); - const highlighted = value.replace(regex, "$&"); - return this.sanitizer.bypassSecurityTrustHtml(highlighted); - } -} From ab6d7dedf5e1839842d040a6b8afb0be5272b775 Mon Sep 17 00:00:00 2001 From: Xander Vertegaal Date: Thu, 4 Jul 2024 10:53:14 +0200 Subject: [PATCH 3/5] Remove unnecessary existing_ --- backend/aethel_db/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/aethel_db/views.py b/backend/aethel_db/views.py index ef82dc2..de8631c 100644 --- a/backend/aethel_db/views.py +++ b/backend/aethel_db/views.py @@ -50,7 +50,7 @@ class AethelListResponse: results: List[AethelListItem] = field(default_factory=list) error: Optional[str] = None - def get_or_create_existing_result( + def get_or_create_result( self, lemma: str, word: str, type: str ) -> AethelListItem: """ @@ -131,7 +131,7 @@ def serialize_sample( # Finally, we serialize the samples and add them to the response object. for key, samples in result_dict.items(): lemma, word, type = key - list_item = response_object.get_or_create_existing_result( + list_item = response_object.get_or_create_result( lemma=lemma, word=word, type=type ) for sample_name, phrase_indices in samples.items(): From 22c3aa6ad999e83f9c73d531cb14d5edcdece728 Mon Sep 17 00:00:00 2001 From: Xander Vertegaal Date: Thu, 4 Jul 2024 13:14:18 +0200 Subject: [PATCH 4/5] Remove superfluous index field --- backend/aethel_db/views.py | 8 ++------ frontend/src/app/shared/types.ts | 1 - 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/backend/aethel_db/views.py b/backend/aethel_db/views.py index de8631c..78febec 100644 --- a/backend/aethel_db/views.py +++ b/backend/aethel_db/views.py @@ -22,7 +22,6 @@ def aethel_status(): @dataclass class AethelSamplePhrase: - index: str display: str highlight: bool @@ -50,9 +49,7 @@ class AethelListResponse: results: List[AethelListItem] = field(default_factory=list) error: Optional[str] = None - def get_or_create_result( - self, lemma: str, word: str, type: str - ) -> AethelListItem: + def get_or_create_result(self, lemma: str, word: str, type: str) -> AethelListItem: """ Return an existing result with the same lemma, word, and type, or create a new one if it doesn't exist. """ @@ -98,7 +95,6 @@ def serialize_sample( for index, phrase in enumerate(sample.lexical_phrases): highlighted = index in highlighted_phrase_indices new_phrase = AethelSamplePhrase( - index=index, display=phrase.string, highlight=highlighted, ) @@ -117,7 +113,7 @@ def serialize_sample( # Then we transform the results. # Each key in result_dict is a unique combination of lemma, word, and type. # Each value is a sample, mapped to a set of indices referring to the specific phrase that has the type. - result_dict: dict[tuple[str, str, str], dict[str, set]] = {} + result_dict: dict[tuple[str, str, str], dict[str, set[int]]] = {} for sample in query_result: for phrase_index, phrase in enumerate(sample.lexical_phrases): for item in phrase.items: diff --git a/frontend/src/app/shared/types.ts b/frontend/src/app/shared/types.ts index d4d6312..6fc9864 100644 --- a/frontend/src/app/shared/types.ts +++ b/frontend/src/app/shared/types.ts @@ -42,7 +42,6 @@ export interface AethelListReturnItem { samples: { name: string; phrases: { - index: string; display: string; highlight: boolean; }[]; From 93fe007bafa7c827c7d9abf1f992b69a9a1080aa Mon Sep 17 00:00:00 2001 From: Xander Vertegaal Date: Thu, 4 Jul 2024 13:38:46 +0200 Subject: [PATCH 5/5] Refactor AethelQueryView --- backend/aethel_db/views.py | 65 ++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/backend/aethel_db/views.py b/backend/aethel_db/views.py index 78febec..95026ea 100644 --- a/backend/aethel_db/views.py +++ b/backend/aethel_db/views.py @@ -56,7 +56,9 @@ def get_or_create_result(self, lemma: str, word: str, type: str) -> AethelListIt for result in self.results: if result.lemma == lemma and result.type == type and result.word == word: return result - return AethelListItem(lemma=lemma, word=word, type=type, samples=[]) + new_result = AethelListItem(lemma=lemma, word=word, type=type, samples=[]) + self.results.append(new_result) + return new_result def json_response(self) -> JsonResponse: results = [asdict(result) for result in self.results] @@ -85,23 +87,6 @@ def item_contains_query_string(item: LexicalItem, query_input: str) -> bool: or query_input.lower() in item.word.lower() ) - def serialize_sample( - sample: Sample, highlighted_phrase_indices: set[int] - ) -> AethelListSample: - """ - Turns a Sample into an AethelListSample, while marking phrases that should be highlighted. - """ - new_phrases = [] - for index, phrase in enumerate(sample.lexical_phrases): - highlighted = index in highlighted_phrase_indices - new_phrase = AethelSamplePhrase( - display=phrase.string, - highlight=highlighted, - ) - new_phrases.append(new_phrase) - - return AethelListSample(sample.name, new_phrases) - response_object = AethelListResponse() # First we select all relevant samples from the dataset that contain the query string. @@ -110,30 +95,34 @@ def serialize_sample( query=in_word(query_input) | in_lemma(query_input), ) - # Then we transform the results. - # Each key in result_dict is a unique combination of lemma, word, and type. - # Each value is a sample, mapped to a set of indices referring to the specific phrase that has the type. - result_dict: dict[tuple[str, str, str], dict[str, set[int]]] = {} for sample in query_result: for phrase_index, phrase in enumerate(sample.lexical_phrases): for item in phrase.items: if item_contains_query_string(item, query_input): - key = (item.lemma, item.word, str(phrase.type)) - # setdefault gets the value for a given key or adds the key with a provided value if None is found. - samples = result_dict.setdefault(key, {}) - phrase_indices = samples.setdefault(sample.name, set()) - phrase_indices.add(phrase_index) - - # Finally, we serialize the samples and add them to the response object. - for key, samples in result_dict.items(): - lemma, word, type = key - list_item = response_object.get_or_create_result( - lemma=lemma, word=word, type=type - ) - for sample_name, phrase_indices in samples.items(): - sample = dataset.find_by_name(sample_name).pop() - list_item.samples.append(serialize_sample(sample, phrase_indices)) - response_object.results.append(list_item) + result = response_object.get_or_create_result( + lemma=item.lemma, word=item.word, type=str(phrase.type) + ) + + # Check whether we have already added this sample for this result + existing_sample = next( + (s for s in result.samples if s.name == sample.name), + None, + ) + + if existing_sample: + existing_sample.phrases[phrase_index].highlight = True + else: + new_sample = AethelListSample(name=sample.name, phrases=[]) + for index, sample_phrase in enumerate( + sample.lexical_phrases + ): + highlighted = index == phrase_index + new_phrase = AethelSamplePhrase( + display=sample_phrase.string, + highlight=highlighted, + ) + new_sample.phrases.append(new_phrase) + result.samples.append(new_sample) return response_object.json_response()