Skip to content

Commit

Permalink
feat: Remove fastq. (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShogunPanda authored Aug 4, 2022
1 parent 19b9283 commit 6d85476
Show file tree
Hide file tree
Showing 9 changed files with 300 additions and 242 deletions.
144 changes: 84 additions & 60 deletions packages/benchmarks/benchmark-table/index.mjs
Original file line number Diff line number Diff line change
@@ -1,81 +1,102 @@
/* global console */
/* global console , setImmediate*/

import { readFile } from "fs/promises";
import { create, insert, search, formatNanoseconds } from "@nearform/lyra";
import { URL } from "node:url";

const lines = JSON.parse(await readFile(new URL("../dataset/divinaCommedia.json", import.meta.url).pathname));
async function populateDB(db, lines) {
let i = 0;
return new Promise(resolve => {
function insertBatch() {
const batch = lines.slice(i * 1000, (i + 1) * 1000);
i++;

const db = create({
schema: {
id: "string",
txt: "string",
},
});
if (!batch.length) {
return resolve();
}

for (const line of lines) {
insert(db, line);
for (const line of batch) {
insert(db, line);
}

setImmediate(insertBatch);
}

setImmediate(insertBatch);
});
}

const d1 = search(db, {
term: "stelle",
properties: ["txt"],
exact: true,
});

const d2 = search(db, {
term: "stelle",
exact: true,
});

const d3 = search(db, {
term: "stele",
properties: "*",
tolerance: 1,
});

const d4 = search(db, {
term: "onde si muovono a diversi porti",
properties: "*",
exact: true,
});

const d5 = search(db, {
term: "ode si mossero a divisi porte",
properties: "*",
tolerance: 5,
});

const d6 = search(db, {
term: "ode si mossero a divisi porte",
properties: ["txt"],
tolerance: 5,
});

const table = `
async function main() {
const db = create({
schema: {
id: "string",
txt: "string",
},
});

const lines = JSON.parse(await readFile(new URL("../dataset/divinaCommedia.json", import.meta.url).pathname));

await populateDB(db, lines);

const d1 = search(db, {
term: "stelle",
properties: ["txt"],
exact: true,
});

const d2 = search(db, {
term: "stelle",
exact: true,
});

const d3 = search(db, {
term: "stele",
properties: "*",
tolerance: 1,
});

const d4 = search(db, {
term: "onde si muovono a diversi porti",
properties: "*",
exact: true,
});

const d5 = search(db, {
term: "ode si mossero a divisi porte",
properties: "*",
tolerance: 5,
});

const d6 = search(db, {
term: "ode si mossero a divisi porte",
properties: ["txt"],
tolerance: 5,
});

const table = `
| Search | Term | Properties | Typo tolerance | Time Elapsed | Results |
|--------------------|---------------------------------------|------------|----------------|---------------|-------------|
| **Exact search** | \`"stelle"\` | \`["txt"]\`| \`N/A\` | ${formatNanoseconds(
d1.elapsed,
)} | ${d1.count} |
d1.elapsed,
)} | ${d1.count} |
| **Exact search** | \`"stelle"\` | \`"*"\` | \`N/A\` | ${formatNanoseconds(
d2.elapsed,
)} | ${d2.count} |
d2.elapsed,
)} | ${d2.count} |
| **Typo tolerance** | \`"stele"\` | \`"*"\` | \`1\` | ${formatNanoseconds(
d3.elapsed,
)} | ${d3.count} |
d3.elapsed,
)} | ${d3.count} |
| **Exact search** | \`"onde si muovono a diversi porti"\` | \`"*"\` | \`N/A\` | ${formatNanoseconds(
d4.elapsed,
)} | ${d4.count} |
d4.elapsed,
)} | ${d4.count} |
| **Typo tolerance** | \`"ode si mossero a divisi porte"\` | \`"*"\` | \`5\` | ${formatNanoseconds(
d5.elapsed,
)} | ${d5.count} |
d5.elapsed,
)} | ${d5.count} |
| **Typo tolerance** | \`"ode si mossero a divisi porte"\` | \`["txt"]\`| \`5\` | ${formatNanoseconds(
d6.elapsed,
)} | ${d6.count} |
d6.elapsed,
)} | ${d6.count} |
`;

const markdownContent = `
const markdownContent = `
# Benchmarks
The following is an automated benchmark performed on the [Divina Commedia](https://en.wikipedia.org/wiki/Divina_Commedia) dataset. <br />
Expand All @@ -86,4 +107,7 @@ You can find the full dataset [here](https://github.com/nearform/lyra/blob/main/
${table}
`;

console.log(markdownContent);
console.log(markdownContent);
}

main();
3 changes: 0 additions & 3 deletions packages/lyra/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
"main": "./dist/cjs/lyra.js",
"module": "./dist/esm/lyra.js",
"types": "./dist/esm/lyra.d.ts",
"dependencies": {
"fastq": "^1.13.0"
},
"devDependencies": {
"@types/node": "^18.6.2",
"@types/tap": "^15.0.7",
Expand Down
31 changes: 31 additions & 0 deletions packages/lyra/src/insertion-checker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const kInsertions = Symbol("lyra.insertions");

const warn =
"process" in globalThis
? process.emitWarning
: function emitWarning(message: string, options: { code: string }) {
console.warn(`[WARNING] [${options.code}] ${message}`);
};

export function trackInsertion(_lyra: unknown) {
const lyra = _lyra as object & { [kInsertions]?: number };

if (typeof lyra[kInsertions] !== "number") {
queueMicrotask(() => {
lyra[kInsertions] = undefined;
});

lyra[kInsertions] = 0;
}

if (lyra[kInsertions]! > 1000) {
warn(
"Lyra's insert operation is synchronous. Please avoid inserting a large number of document in a single operation in order not to block the main thread.",
{ code: "LYRA0001" },
);

lyra[kInsertions] = -1;
} else if (lyra[kInsertions] >= 0) {
lyra[kInsertions]++;
}
}
11 changes: 9 additions & 2 deletions packages/lyra/src/levenshtein.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
export function levenshtein(a: string, b: string): number {
if (!a.length) return b.length;
if (!b.length) return a.length;
/* c8 ignore next 3 */
if (!a.length) {
return b.length;
}

/* c8 ignore next 3 */
if (!b.length) {
return a.length;
}

let tmp;

Expand Down
Loading

0 comments on commit 6d85476

Please sign in to comment.