Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/type search #47

Merged
merged 10 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 11 additions & 56 deletions backend/aethel_db/search.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,18 @@
from __future__ import annotations
from typing import Iterable, Callable, Iterator
from aethel.frontend import Sample
from aethel.frontend import LexicalPhrase, LexicalItem
from aethel.mill.types import type_repr

# The following methods and classes have been extracted from aethel.scripts.search (not part of the published library), with some minor customisations / simplifications.

def match_type_with_phrase(phrase: LexicalPhrase, type_input: str) -> bool:
return type_input == type_repr(phrase.type)

def search(bank: Iterable[Sample], query: Callable[[Sample], bool]) -> Iterator[Sample]:
return filter(query, bank)

def match_word_with_phrase(phrase: LexicalPhrase, word_input: str) -> bool:
return any(match_word_with_item(item, word_input) for item in phrase.items)

def in_lemma(query_string: str) -> Query:
def f(sample: Sample) -> bool:
return any(
query_string.lower() in item.lemma.lower()
for phrase in sample.lexical_phrases
for item in phrase.items
)

return Query(f)


def in_word(query_string: str) -> Query:
def f(sample: Sample) -> bool:
return any(
query_string.lower() in item.word.lower()
for phrase in sample.lexical_phrases
for item in phrase.items
)

return Query(f)


class Query:
def __init__(self, fn: Callable[[Sample], bool]):
self.fn = fn

def __and__(self, other: Query) -> Query:
def f(sample: Sample) -> bool:
return self.fn(sample) and other.fn(sample)

return Query(f)

def __or__(self, other) -> Query:
def f(sample: Sample) -> bool:
return self.fn(sample) or other.fn(sample)

return Query(f)

def __invert__(self) -> Query:
def f(sample: Sample) -> bool:
return not self.fn(sample)

return Query(f)

def __xor__(self, other) -> Query:
def f(sample: Sample) -> bool:
return self.fn(sample) ^ other.fn(sample)

return Query(f)

def __call__(self, sample: Sample) -> bool:
return self.fn(sample)
def match_word_with_item(item: LexicalItem, word_input: str) -> bool:
return (
word_input.lower() in item.lemma.lower()
or word_input.lower() in item.word.lower()
)
103 changes: 46 additions & 57 deletions backend/aethel_db/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
from django.http import HttpRequest, JsonResponse
from rest_framework import status
from rest_framework.views import APIView
from aethel.frontend import LexicalItem
from spindle.utils import serialize_phrases_with_infix_notation
from aethel_db.search import search, in_lemma, in_word
from aethel_db.search import (
match_type_with_phrase,
match_word_with_phrase,
)
from aethel.frontend import Sample

from aethel.frontend import LexicalItem

from .models import dataset
from .search import search, in_lemma, in_word


def aethel_status():
Expand Down Expand Up @@ -41,7 +39,9 @@ class AethelListItem:

def serialize(self):
out = asdict(self)
out['samples'] = sorted(out['samples'], key=lambda sample: len(sample['sentence']))
out["samples"] = sorted(
out["samples"], key=lambda sample: len(sample["phrases"])
)
return out


Expand All @@ -51,22 +51,19 @@ class AethelListResponse:
Response object for Aethel query view.
"""

results: List[AethelListItem] = field(default_factory=list)
results: dict[tuple[str, str, str], AethelListItem] = field(default_factory=dict)
error: Optional[str] = None

def get_or_create_result(self, lemma: str, word: str, type: str) -> AethelListItem:
"""
Return an existing result with the same lemma, word, and type, or create a new one if it doesn't exist.
"""
for result in self.results:
if result.lemma == lemma and result.type == type and result.word == word:
return result
key = (lemma, word, type)
new_result = AethelListItem(lemma=lemma, word=word, type=type, samples=[])
self.results.append(new_result)
return new_result
return self.results.setdefault(key, new_result)

def json_response(self) -> JsonResponse:
results = [result.serialize() for result in self.results]
results = [result.serialize() for result in self.results.values()]

return JsonResponse(
{
Expand All @@ -79,55 +76,47 @@ def json_response(self) -> JsonResponse:

class AethelQueryView(APIView):
def get(self, request: HttpRequest) -> JsonResponse:
query_input = self.request.query_params.get("query", None)
if query_input is None or len(query_input) < 3:
return AethelListResponse().json_response()
word_input = self.request.query_params.get("word", None)
type_input = self.request.query_params.get("type", None)

def item_contains_query_string(item: LexicalItem, query_input: str) -> bool:
"""
Checks if a LexicalItem contains a given input string in its word or its lemma.
"""
return (
query_input.lower() in item.lemma.lower()
or query_input.lower() in item.word.lower()
)
# We only search for strings of 3 or more characters.
if word_input is not None and len(word_input) < 3:
return AethelListResponse().json_response()

response_object = AethelListResponse()

# First we select all relevant samples from the dataset that contain the query string.
query_result = search(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out that this pre-filtering loop is not necessary, and we can improve performance slightly by removing it. I compared word and type searches with and without this step using a bigger dataset (3000 samples). Word searches performed slightly better without the pre-filtering (0.04s) step than with it (0.05s), and the same is true for type searches (0.52s with pre-filtering => 0.42s without).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As an added benefit, we can drop a lot of code taken from aethel in search.py.

bank=dataset.samples,
query=in_word(query_input) | in_lemma(query_input),
)

for sample in query_result:
for sample in dataset.samples:
for phrase_index, phrase in enumerate(sample.lexical_phrases):
for item in phrase.items:
if item_contains_query_string(item, query_input):
result = response_object.get_or_create_result(
lemma=item.lemma, word=item.word, type=str(phrase.type)
word_match = word_input and match_word_with_phrase(phrase, word_input)
type_match = type_input and match_type_with_phrase(phrase, type_input)
if not (word_match or type_match):
continue

phrase_word = " ".join([item.word for item in phrase.items])
phrase_lemma = " ".join([item.lemma for item in phrase.items])

result = response_object.get_or_create_result(
lemma=phrase_lemma, word=phrase_word, type=str(phrase.type)
)

# Check whether we have already added this sample for this result.
existing_sample = next(
(s for s in result.samples if s.name == sample.name),
None,
)

if existing_sample:
existing_sample.phrases[phrase_index].highlight = True
else:
new_sample = AethelListSample(name=sample.name, phrases=[])
for index, sample_phrase in enumerate(sample.lexical_phrases):
highlighted = index == phrase_index
new_phrase = AethelSamplePhrase(
display=sample_phrase.string,
highlight=highlighted,
)

# Check whether we have already added this sample for this result
existing_sample = next(
(s for s in result.samples if s.name == sample.name),
None,
)

if existing_sample:
existing_sample.phrases[phrase_index].highlight = True
else:
new_sample = AethelListSample(name=sample.name, phrases=[])
for index, sample_phrase in enumerate(
sample.lexical_phrases
):
highlighted = index == phrase_index
new_phrase = AethelSamplePhrase(
display=sample_phrase.string,
highlight=highlighted,
)
new_sample.phrases.append(new_phrase)
result.samples.append(new_sample)
new_sample.phrases.append(new_phrase)
result.samples.append(new_sample)

return response_object.json_response()

Expand Down
18 changes: 14 additions & 4 deletions frontend/src/app/aethel/aethel.component.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ import { ReactiveFormsModule } from "@angular/forms";
import { ActivatedRoute, Router, RouterModule } from "@angular/router";
import { routes } from "../routes";
import { of } from "rxjs";
import { AethelApiService } from "../shared/services/aethel-api.service";

describe("AethelComponent", () => {
let component: AethelComponent;
let fixture: ComponentFixture<AethelComponent>;
let apiService: AethelApiService;
let httpController: HttpTestingController;
let route: ActivatedRoute;
let router: Router;
Expand All @@ -28,6 +30,7 @@ describe("AethelComponent", () => {
route = TestBed.inject(ActivatedRoute);
router = TestBed.inject(Router);
fixture = TestBed.createComponent(AethelComponent);
apiService = TestBed.inject(AethelApiService);
component = fixture.componentInstance;
fixture.detectChanges();
});
Expand All @@ -43,17 +46,24 @@ describe("AethelComponent", () => {
httpController.expectNone("/api/aethel");
});

it("should request data when there is a query parameter on init", () => {
route.queryParams = of({ query: "test" });
it("should insert data into the form when there is a 'word' query parameter", () => {
route.queryParams = of({ word: "test" });
component.ngOnInit();
expect(component.form.controls.aethelInput.value).toBe("test");
httpController.expectOne("/api/aethel/?query=test");
});

it("should pass query param data to the API service", () => {
apiService.input$.subscribe(input => {
expect(input.word).toBe("test3");
});
route.queryParams = of({ word: "test3" });
component.ngOnInit();
});

it("should react to form submissions", () => {
const navigatorSpy = spyOn(router, "navigateByUrl");
component.form.controls.aethelInput.setValue("test-two");
component.submit();
expect(navigatorSpy).toHaveBeenCalledWith("/?query=test-two");
expect(navigatorSpy).toHaveBeenCalledWith("/?word=test-two");
});
});
11 changes: 7 additions & 4 deletions frontend/src/app/aethel/aethel.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,17 @@ export class AethelComponent implements OnInit {
// Whenever the query parameter changes, we run a new query.
this.route.queryParams
.pipe(
map((queryParams) => queryParams["query"]),
isNonNull(),
distinctUntilChanged(),
takeUntilDestroyed(this.destroyRef),
)
.subscribe((query) => {
this.form.controls.aethelInput.setValue(query);
this.apiService.input$.next(query);
const word = query['word'];
const type = query['type']
if (word) {
this.form.controls.aethelInput.setValue(word);
}
this.apiService.input$.next({ word, type });
});
}

Expand All @@ -91,7 +94,7 @@ export class AethelComponent implements OnInit {
private updateUrl(query: string): void {
// This does not actually refresh the page because it just adds parameters to the current route.
// It just updates the URL in the browser, triggering a new query.
const url = this.router.createUrlTree([], { relativeTo: this.route, queryParams: { query } }).toString();
const url = this.router.createUrlTree([], { relativeTo: this.route, queryParams: { word: query } }).toString();
this.router.navigateByUrl(url);
}

Expand Down
16 changes: 11 additions & 5 deletions frontend/src/app/sample/sample.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<th i18n>#</th>
<th i18n>Phrase</th>
<th i18n>Type</th>
<th></th>
<th i18n>Search in Æthel</th>
</tr>
</thead>
<tbody>
Expand All @@ -38,10 +38,16 @@
<td>
<span class="proof" [innerHtml]="phrase.type | proof"></span>
</td>
<td class="has-text-right">
@if (showButton(phrase.items)) {
<button class="button is-info" (click)="routeToAethel(phrase.items)">
Search in Æthel
<td>
@if (showButtons(phrase.items)) {
<button class="button search-button is-info" (click)="searchAethel(phrase, 'word')">
Word
</button>
<button class="button search-button is-info" (click)="searchAethel(phrase, 'type')">
Type
</button>
<button class="button search-button is-info" (click)="searchAethel(phrase, 'word-and-type')">
Word and type
</button>
}
</td>
Expand Down
3 changes: 3 additions & 0 deletions frontend/src/app/sample/sample.component.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.search-button:not(:last-child) {
margin-right: .5rem;
}
Loading
Loading