diff --git a/src/lib/server/db/employee.ts b/src/lib/server/db/employee.ts index 4350607..c11d624 100644 --- a/src/lib/server/db/employee.ts +++ b/src/lib/server/db/employee.ts @@ -1,7 +1,8 @@ import { or, eq, and, max, gt, count, isNull } from 'drizzle-orm'; import { employee, employeeEntry, employeeExit } from './schema/employee'; import { StateInside, StateOutside, type State } from '$lib/types/state'; -import { fuzzySearchFilters, sqlConcat, sqlLeast, sqlLevenshteinDistance } from './fuzzysearch'; +import { fuzzySearchFilters } from './fuzzysearch'; +import { sqlConcat, sqlLeast, sqlLevenshteinDistance } from './utils'; import { isInside } from '../isInside'; import { DB as db } from './connect'; import { capitalizeString, sanitizeString } from '$lib/utils/sanitize'; @@ -92,13 +93,13 @@ export async function getEmployees( ...fuzzySearchFilters([employee.identifier], nonEmptySearchQuery, { substr: true }), - ...fuzzySearchFilters([employee.fname], nonEmptySearchQuery, { distance: 4 }), - ...fuzzySearchFilters([employee.lname], nonEmptySearchQuery, { distance: 4 }), + ...fuzzySearchFilters([employee.fname], nonEmptySearchQuery, { distance: 5 }), + ...fuzzySearchFilters([employee.lname], nonEmptySearchQuery, { distance: 5 }), ...fuzzySearchFilters([employee.fname, employee.lname], nonEmptySearchQuery, { - distance: 5 + distance: 6 }), ...fuzzySearchFilters([employee.lname, employee.fname], nonEmptySearchQuery, { - distance: 5 + distance: 6 }) ] ) diff --git a/src/lib/server/db/fuzzysearch.ts b/src/lib/server/db/fuzzysearch.ts index 7b77951..20b1087 100644 --- a/src/lib/server/db/fuzzysearch.ts +++ b/src/lib/server/db/fuzzysearch.ts @@ -1,22 +1,11 @@ -import { sql, ilike, type Column, type SQL } from 'drizzle-orm'; +import { ilike, type Column, type SQL } from 'drizzle-orm'; +import { sqlConcat, sqlLevenshtein } from './utils'; type FuzzySearchFiltersOptions = { distance?: number; substr?: boolean; }; -type LevenshteinOptions = { - insertCost: number; - deleteCost: number; - substitutionCost: number; -}; - -const defaultLevenshteinOptions: LevenshteinOptions = { - insertCost: 1, - deleteCost: 3, - substitutionCost: 2 -}; - export function fuzzySearchFilters( dbFields: Column[], searchQuery: string, @@ -52,54 +41,3 @@ export function fuzzySearchFilters( return [ilikeFilter, ...levenshteinFilter]; } - -/* - * Returns the sql for concatenating multiple columns with a separator using CONCAT_WS - */ -export function sqlConcat(cols: Column[], separator?: string): SQL { - if (cols.length === 0) { - throw new Error('Passed columns length is 0'); - } - - if (cols.length === 1) { - return sql`${cols[0]}`; - } - - const sqlCols = cols - .map((col) => sql`${col}`) - .reduce((prev, curr) => sql`${prev}, ${curr}`); - return separator !== undefined - ? sql`CONCAT_WS(${separator}, ${sqlCols})` - : sql`CONCAT(${sqlCols})`; -} - -/* - * Returns the sql for getting the least value out of N columns - */ -export function sqlLeast(cols: SQL[]): SQL { - const sqlCols = cols.reduce((prev, curr) => sql`${prev}, ${curr}`); - return sql`LEAST(${sqlCols})`; -} - -/* - * Returns the sql for determining if the levenshtein distance is less than or equal to the passed distance - */ -export function sqlLevenshtein( - col: SQL, - input: string, - distance: number, - opts: LevenshteinOptions = defaultLevenshteinOptions -): SQL { - return sql`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost}) <= ${distance}`; -} - -/* - * Returns the sql for getting the levenshtein distance - */ -export function sqlLevenshteinDistance( - col: SQL, - input: string, - opts: LevenshteinOptions = defaultLevenshteinOptions -): SQL { - return sql`LEVENSHTEIN(LOWER(${input}), LOWER(${col}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost})`; -} diff --git a/src/lib/server/db/student.ts b/src/lib/server/db/student.ts index d18766a..2f346a1 100644 --- a/src/lib/server/db/student.ts +++ b/src/lib/server/db/student.ts @@ -1,7 +1,8 @@ import { or, eq, and, max, gt, count, isNull } from 'drizzle-orm'; import { student, studentEntry, studentExit } from './schema/student'; import { StateInside, StateOutside, type State } from '$lib/types/state'; -import { fuzzySearchFilters, sqlConcat, sqlLeast, sqlLevenshteinDistance } from './fuzzysearch'; +import { fuzzySearchFilters } from './fuzzysearch'; +import { sqlConcat, sqlLeast, sqlLevenshteinDistance } from './utils'; import { isInside } from '../isInside'; import { DB as db } from './connect'; import { capitalizeString, sanitizeString } from '$lib/utils/sanitize'; @@ -90,13 +91,13 @@ export async function getStudents( or( ...[ ...fuzzySearchFilters([student.index], nonEmptySearchQuery), - ...fuzzySearchFilters([student.fname], nonEmptySearchQuery, { distance: 4 }), - ...fuzzySearchFilters([student.lname], nonEmptySearchQuery, { distance: 4 }), + ...fuzzySearchFilters([student.fname], nonEmptySearchQuery, { distance: 5 }), + ...fuzzySearchFilters([student.lname], nonEmptySearchQuery, { distance: 5 }), ...fuzzySearchFilters([student.fname, student.lname], nonEmptySearchQuery, { - distance: 5 + distance: 6 }), ...fuzzySearchFilters([student.lname, student.fname], nonEmptySearchQuery, { - distance: 5 + distance: 6 }) ] ) diff --git a/src/lib/server/db/utils.ts b/src/lib/server/db/utils.ts new file mode 100644 index 0000000..e2662e6 --- /dev/null +++ b/src/lib/server/db/utils.ts @@ -0,0 +1,75 @@ +import { diacriticsMap } from '$lib/utils/sanitize'; +import { sql, type Column, type SQL } from 'drizzle-orm'; + +type LevenshteinOptions = { + insertCost: number; + deleteCost: number; + substitutionCost: number; +}; + +const defaultLevenshteinOptions: LevenshteinOptions = { + insertCost: 1, + deleteCost: 3, + substitutionCost: 2 +}; + +/* + * Returns the sql for concatenating multiple columns with a separator using CONCAT_WS + */ +export function sqlConcat(cols: Column[], separator?: string): SQL { + if (cols.length === 0) { + throw new Error('Passed columns length is 0'); + } + + if (cols.length === 1) { + return sql`${cols[0]}`; + } + + const sqlCols = cols + .map((col) => sql`${col}`) + .reduce((prev, curr) => sql`${prev}, ${curr}`); + return separator !== undefined + ? sql`CONCAT_WS(${separator}, ${sqlCols})` + : sql`CONCAT(${sqlCols})`; +} + +/* + * Returns the sql for getting the least value out of N columns + */ +export function sqlLeast(cols: SQL[]): SQL { + const sqlCols = cols.reduce((prev, curr) => sql`${prev}, ${curr}`); + return sql`LEAST(${sqlCols})`; +} + +/* + * Returns the sql for determining if the levenshtein distance is less than or equal to the passed distance + */ +export function sqlLevenshtein( + col: SQL, + input: string, + distance: number, + opts: LevenshteinOptions = defaultLevenshteinOptions +): SQL { + return sql`LEVENSHTEIN(LOWER(${sqlRemoveDiacritics(input)}), LOWER(${sqlRemoveDiacritics(col)}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost}) <= ${distance}`; +} + +/* + * Returns the sql for getting the levenshtein distance + */ +export function sqlLevenshteinDistance( + col: SQL, + input: string, + opts: LevenshteinOptions = defaultLevenshteinOptions +): SQL { + return sql`LEVENSHTEIN(LOWER(${sqlRemoveDiacritics(input)}), LOWER(${sqlRemoveDiacritics(col)}), ${opts.insertCost}, ${opts.deleteCost}, ${opts.substitutionCost})`; +} + +/* + * Returns the input string with diacritics removed + */ +export function sqlRemoveDiacritics(input: string | Column | SQL): SQL { + return Array.from(diacriticsMap).reduce( + (acc, [diacritic, sanitized]) => sql`REGEXP_REPLACE(${acc}, ${diacritic}, ${sanitized}, 'g')`, + sql`${input}` + ); +} diff --git a/src/lib/utils/sanitize.ts b/src/lib/utils/sanitize.ts index b5f031c..a72486e 100644 --- a/src/lib/utils/sanitize.ts +++ b/src/lib/utils/sanitize.ts @@ -16,7 +16,7 @@ export function removeDiacritics(input: string): string { .join(''); } -const diacriticsMap = new Map([ +export const diacriticsMap = new Map([ ['č', 'c'], ['Č', 'C'], ['ć', 'c'],