Skip to content

Commit

Permalink
Merge pull request #112 from studentinovisad/as/fix/search-sisanje
Browse files Browse the repository at this point in the history
fix(search): small refactor, adjust distances allowed and SISANJE
  • Loading branch information
aleksasiriski authored Dec 31, 2024
2 parents af9046f + 45ae5f4 commit 49a5e05
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 75 deletions.
11 changes: 6 additions & 5 deletions src/lib/server/db/employee.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { or, eq, and, max, gt, count, isNull } from 'drizzle-orm';
import { employee, employeeEntry, employeeExit } from './schema/employee';
import { StateInside, StateOutside, type State } from '$lib/types/state';
import { fuzzySearchFilters, sqlConcat, sqlLeast, sqlLevenshteinDistance } from './fuzzysearch';
import { fuzzySearchFilters } from './fuzzysearch';
import { sqlConcat, sqlLeast, sqlLevenshteinDistance } from './utils';
import { isInside } from '../isInside';
import { DB as db } from './connect';
import { capitalizeString, sanitizeString } from '$lib/utils/sanitize';
Expand Down Expand Up @@ -92,13 +93,13 @@ export async function getEmployees(
...fuzzySearchFilters([employee.identifier], nonEmptySearchQuery, {
substr: true
}),
...fuzzySearchFilters([employee.fname], nonEmptySearchQuery, { distance: 4 }),
...fuzzySearchFilters([employee.lname], nonEmptySearchQuery, { distance: 4 }),
...fuzzySearchFilters([employee.fname], nonEmptySearchQuery, { distance: 5 }),
...fuzzySearchFilters([employee.lname], nonEmptySearchQuery, { distance: 5 }),
...fuzzySearchFilters([employee.fname, employee.lname], nonEmptySearchQuery, {
distance: 5
distance: 6
}),
...fuzzySearchFilters([employee.lname, employee.fname], nonEmptySearchQuery, {
distance: 5
distance: 6
})
]
)
Expand Down
66 changes: 2 additions & 64 deletions src/lib/server/db/fuzzysearch.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
import { sql, ilike, type Column, type SQL } from 'drizzle-orm';
import { ilike, type Column, type SQL } from 'drizzle-orm';
import { sqlConcat, sqlLevenshtein } from './utils';

type FuzzySearchFiltersOptions = {
distance?: number;
substr?: boolean;
};

type LevenshteinOptions = {
insertCost: number;
deleteCost: number;
substitutionCost: number;
};

const defaultLevenshteinOptions: LevenshteinOptions = {
insertCost: 1,
deleteCost: 3,
substitutionCost: 2
};

export function fuzzySearchFilters(
dbFields: Column[],
searchQuery: string,
Expand Down Expand Up @@ -52,54 +41,3 @@ export function fuzzySearchFilters(

return [ilikeFilter, ...levenshteinFilter];
}

/*
* Returns the sql for concatenating multiple columns with a separator using CONCAT_WS
*/
export function sqlConcat(cols: Column[], separator?: string): SQL<Column> {
if (cols.length === 0) {
throw new Error('Passed columns length is 0');
}

if (cols.length === 1) {
return sql<Column>`${cols[0]}`;
}

const sqlCols = cols
.map((col) => sql<Column>`${col}`)
.reduce((prev, curr) => sql`${prev}, ${curr}`);
return separator !== undefined
? sql<Column>`CONCAT_WS(${separator}, ${sqlCols})`
: sql<Column>`CONCAT(${sqlCols})`;
}

/*
* Returns the sql for getting the least value out of N columns
*/
export function sqlLeast(cols: SQL<number>[]): SQL<number> {
const sqlCols = cols.reduce((prev, curr) => sql`${prev}, ${curr}`);
return sql<number>`LEAST(${sqlCols})`;
}

/*
* Returns the sql for determining if the levenshtein distance is less than or equal to the passed distance
*/
export function sqlLevenshtein(
col: SQL<Column>,
input: string,
distance: number,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<boolean> {
return sql<boolean>`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost}) <= ${distance}`;
}

/*
* Returns the sql for getting the levenshtein distance
*/
export function sqlLevenshteinDistance(
col: SQL<Column>,
input: string,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<number> {
return sql<number>`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost})`;
}
11 changes: 6 additions & 5 deletions src/lib/server/db/student.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { or, eq, and, max, gt, count, isNull } from 'drizzle-orm';
import { student, studentEntry, studentExit } from './schema/student';
import { StateInside, StateOutside, type State } from '$lib/types/state';
import { fuzzySearchFilters, sqlConcat, sqlLeast, sqlLevenshteinDistance } from './fuzzysearch';
import { fuzzySearchFilters } from './fuzzysearch';
import { sqlConcat, sqlLeast, sqlLevenshteinDistance } from './utils';
import { isInside } from '../isInside';
import { DB as db } from './connect';
import { capitalizeString, sanitizeString } from '$lib/utils/sanitize';
Expand Down Expand Up @@ -90,13 +91,13 @@ export async function getStudents(
or(
...[
...fuzzySearchFilters([student.index], nonEmptySearchQuery),
...fuzzySearchFilters([student.fname], nonEmptySearchQuery, { distance: 4 }),
...fuzzySearchFilters([student.lname], nonEmptySearchQuery, { distance: 4 }),
...fuzzySearchFilters([student.fname], nonEmptySearchQuery, { distance: 5 }),
...fuzzySearchFilters([student.lname], nonEmptySearchQuery, { distance: 5 }),
...fuzzySearchFilters([student.fname, student.lname], nonEmptySearchQuery, {
distance: 5
distance: 6
}),
...fuzzySearchFilters([student.lname, student.fname], nonEmptySearchQuery, {
distance: 5
distance: 6
})
]
)
Expand Down
75 changes: 75 additions & 0 deletions src/lib/server/db/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { diacriticsMap } from '$lib/utils/sanitize';
import { sql, type Column, type SQL } from 'drizzle-orm';

type LevenshteinOptions = {
insertCost: number;
deleteCost: number;
substitutionCost: number;
};

const defaultLevenshteinOptions: LevenshteinOptions = {
insertCost: 1,
deleteCost: 3,
substitutionCost: 2
};

/*
* Returns the sql for concatenating multiple columns with a separator using CONCAT_WS
*/
export function sqlConcat(cols: Column[], separator?: string): SQL<Column> {
if (cols.length === 0) {
throw new Error('Passed columns length is 0');
}

if (cols.length === 1) {
return sql<Column>`${cols[0]}`;
}

const sqlCols = cols
.map((col) => sql<Column>`${col}`)
.reduce((prev, curr) => sql`${prev}, ${curr}`);
return separator !== undefined
? sql<Column>`CONCAT_WS(${separator}, ${sqlCols})`
: sql<Column>`CONCAT(${sqlCols})`;
}

/*
* Returns the sql for getting the least value out of N columns
*/
export function sqlLeast(cols: SQL<number>[]): SQL<number> {
const sqlCols = cols.reduce((prev, curr) => sql`${prev}, ${curr}`);
return sql<number>`LEAST(${sqlCols})`;
}

/*
* Returns the sql for determining if the levenshtein distance is less than or equal to the passed distance
*/
export function sqlLevenshtein(
col: SQL<Column>,
input: string,
distance: number,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<boolean> {
return sql<boolean>`LEVENSHTEIN(LOWER(${sqlRemoveDiacritics(input)}), LOWER(${sqlRemoveDiacritics(col)}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost}) <= ${distance}`;
}

/*
* Returns the sql for getting the levenshtein distance
*/
export function sqlLevenshteinDistance(
col: SQL<Column>,
input: string,
opts: LevenshteinOptions = defaultLevenshteinOptions
): SQL<number> {
return sql<number>`LEVENSHTEIN(LOWER(${sqlRemoveDiacritics(input)}), LOWER(${sqlRemoveDiacritics(col)}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost})`;
}

/*
* Returns the input string with diacritics removed
*/
export function sqlRemoveDiacritics(input: string | Column | SQL<Column>): SQL<Column> {
return Array.from(diacriticsMap).reduce(
(acc, [diacritic, sanitized]) => sql`REGEXP_REPLACE(${acc}, ${diacritic}, ${sanitized}, 'g')`,
sql<Column>`${input}`
);
}
2 changes: 1 addition & 1 deletion src/lib/utils/sanitize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export function removeDiacritics(input: string): string {
.join('');
}

const diacriticsMap = new Map([
export const diacriticsMap = new Map([
['č', 'c'],
['Č', 'C'],
['ć', 'c'],
Expand Down

0 comments on commit 49a5e05

Please sign in to comment.