diff --git a/packages/plugin-qps/LICENSE.md b/packages/plugin-qps/LICENSE.md deleted file mode 100644 index 1b02b293..00000000 --- a/packages/plugin-qps/LICENSE.md +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2024 OramaSearch Inc - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/packages/plugin-qps/README.md b/packages/plugin-qps/README.md deleted file mode 100644 index 773d1894..00000000 --- a/packages/plugin-qps/README.md +++ /dev/null @@ -1,96 +0,0 @@ -# Orama Plugin Embeddings - -**Orama Plugin Embeddings** allows you to generate fast text embeddings at insert and search time offline, directly on your machine - no OpenAI needed! - -## Installation - -To get started with **Orama Plugin Embeddings**, just install it with npm: - -```sh -npm i @orama/plugin-embeddings -``` - -**Important note**: to use this plugin, you'll also need to install one of the following TensorflowJS backend: - -- `@tensorflow/tfjs` -- `@tensorflow/tfjs-node` -- `@tensorflow/tfjs-backend-webgl` -- `@tensorflow/tfjs-backend-cpu` -- `@tensorflow/tfjs-node-gpu` -- `@tensorflow/tfjs-backend-wasm` - -For example, if you're running Orama on the browser, we highly recommend using `@tensorflow/tfjs-backend-webgl`: - -```sh -npm i @tensorflow/tfjs-backend-webgl -``` - -If you're using Orama in Node.js, we recommend using `@tensorflow/tfjs-node`: - -```sh -npm i @tensorflow/tfjs-node -``` - -## Usage - -```js -import { create } from '@orama/orama' -import { pluginEmbeddings } from '@orama/plugin-embeddings' -import '@tensorflow/tfjs-node' // Or any other appropriate TensorflowJS backend - -const plugin = await pluginEmbeddings({ - embeddings: { - defaultProperty: 'embeddings', // Property used to store generated embeddings - onInsert: { - generate: true, // Generate embeddings at insert-time - properties: ['description'], // properties to use for generating embeddings at insert time - verbose: true, - } - } -}) - -const db = await create({ - schema: { - description: 'string', - embeddings: 'vector[512]' // Orama generates 512-dimensions vectors - }, - plugins: [plugin] -}) -``` - -Example usage at insert time: - -```js -await insert(db, { - description: 'Classroom Headphones Bulk 5 Pack, Student On Ear Color Varieties' -}) - -await insert(db, { - description: 'Kids Wired Headphones for School Students K-12' -}) - -await insert(db, { - description: 'Kids Headphones Bulk 5-Pack for K-12 School' -}) - -await insert(db, { - description: 'Bose QuietComfort Bluetooth Headphones' -}) -``` - -Orama will automatically generate text embeddings and store them into the `embeddings` property. - -Then, you can use the `vector` or `hybrid` setting to perform hybrid or vector search at runtime: - -```js -await search(db, { - term: 'Headphones for 12th grade students', - mode: 'vector' -}) -``` - -Orama will generate embeddings at search time and perform vector or hybrid search for you. - -# License - -[Apache 2.0](/LICENSE.md) \ No newline at end of file diff --git a/packages/plugin-qps/package.json b/packages/plugin-qps/package.json deleted file mode 100644 index 85f67c68..00000000 --- a/packages/plugin-qps/package.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "name": "@orama/plugin-qps", - "version": "3.0.0-rc-2", - "description": "Performant search algorithm optimized for descriptive texts", - "keywords": [ - "orama", - "embeddings", - "secure proxy", - "vector search" - ], - "license": "Apache-2.0", - "main": "./dist/index.js", - "type": "module", - "exports": { - ".": { - "require": "./dist/index.cjs", - "import": "./dist/index.js", - "types": "./dist/index.d.ts", - "browser": "./dist/index.global.js" - } - }, - "bugs": { - "url": "https://github.com/askorama/orama/issues" - }, - "homepage": "https://github.com/askorama/orama#readme", - "repository": { - "type": "git", - "url": "git+https://github.com/askorama/orama.git" - }, - "sideEffects": false, - "types": "./dist/index.d.ts", - "files": [ - "dist" - ], - "scripts": { - "build": "tsup --config tsup.lib.js", - "lint": "exit 0", - "test": "node --test --import tsx test/*.test.ts" - }, - "publishConfig": { - "access": "public" - }, - "devDependencies": { - "@types/node": "^20.9.0", - "tap": "^21.0.1", - "tsup": "^7.2.0", - "tsx": "^4.19.1", - "typescript": "^5.0.0" - }, - "dependencies": { - "@orama/orama": "workspace:*" - } -} diff --git a/packages/plugin-qps/src/algorithm.ts b/packages/plugin-qps/src/algorithm.ts deleted file mode 100644 index 9b01dac8..00000000 --- a/packages/plugin-qps/src/algorithm.ts +++ /dev/null @@ -1,231 +0,0 @@ -import { AnyIndexStore, AnyOrama, SearchableType, Tokenizer, VectorIndex } from "@orama/orama" -import { avl, bkd, flat, radix, bool } from '@orama/orama/trees' -import { - getVectorSize, index as Index, internalDocumentIDStore, isVectorType } from '@orama/orama/components' - - -type InternalDocumentID = internalDocumentIDStore.InternalDocumentID; -// type InternalDocumentIDStore = internalDocumentIDStore.InternalDocumentIDStore; -// type DocumentID = internalDocumentIDStore.DocumentID; - -export interface QPSIndex extends AnyIndexStore { - indexes: Record - vectorIndexes: Record - searchableProperties: string[] - searchablePropertiesWithTypes: Record - stats: Record> - tokensLength: Map - }> -} - -export function recursiveCreate(indexDatastore: QPSIndex, schema: T['schema'], prefix: string) { - for (const entry of Object.entries(schema)) { - const prop = entry[0] - const type = entry[1] - const path = `${prefix}${prefix ? '.' : ''}${prop}` - - if (typeof type === 'object' && !Array.isArray(type)) { - // Nested - recursiveCreate(indexDatastore, type, path) - continue - } - - if (isVectorType(type)) { - indexDatastore.searchableProperties.push(path) - indexDatastore.searchablePropertiesWithTypes[path] = type - indexDatastore.vectorIndexes[path] = { - size: getVectorSize(type), - vectors: {} - } - } else { - const isArray = /\[/.test(type as string) - switch (type) { - case 'boolean': - case 'boolean[]': - indexDatastore.indexes[path] = { type: 'Bool', node: new bool.BoolNode(), isArray } - break - case 'number': - case 'number[]': - indexDatastore.indexes[path] = { type: 'AVL', node: new avl.AVLTree(0, []), isArray } - break - case 'string': - case 'string[]': - indexDatastore.indexes[path] = { type: 'Radix', node: new radix.RadixTree(), isArray } - // indexDatastore.avgFieldLength[path] = 0 - // indexDatastore.frequencies[path] = {} - // indexDatastore.tokenOccurrences[path] = {} - // indexDatastore.fieldLengths[path] = {} - break - case 'enum': - case 'enum[]': - indexDatastore.indexes[path] = { type: 'Flat', node: new flat.FlatTree(), isArray } - break - case 'geopoint': - indexDatastore.indexes[path] = { type: 'BKD', node: new bkd.BKDTree(), isArray } - break - default: - throw new Error('INVALID_SCHEMA_TYPE: ' + path) - } - - indexDatastore.searchableProperties.push(path) - indexDatastore.searchablePropertiesWithTypes[path] = type - } - } -} - - -const BIT_MASK_20 = 0b11111111111111111111 - -export function calculateTokenQuantum(prevValue: number, bit: number) { - // if (prevValue < 0) { - // throw new Error("Overflow") - // } - // if (bit < 0 || bit > 20) { - // throw new Error("Invalid bit") - // } - - const currentCount = count(prevValue) - const currentSentenceMask = bitmask_20(prevValue) - const newSentenceMask = currentSentenceMask | (1 << bit) - return ((currentCount + 1) << 20) | newSentenceMask -} - -export function insertString( - value: string, - radixTree: radix.RadixTree, - stats: QPSIndex['stats'][string], - prop: string, - internalId: InternalDocumentID, - language: string | undefined, - tokenizer: Tokenizer, -) { - const sentences = value.split(/\.|\?|!/) - - let quantumIndex = 0 - let tokenNumber = 0 - for (const sentence of sentences) { - const tokens = tokenizer.tokenize(sentence, language, prop) - - for (const token of tokens) { - tokenNumber++ - - if (!stats[token]) { - stats[token] = 0 - } - - const tokenBitIndex = Math.min( - quantumIndex, - 20 - ) - - stats.tokenQuantums[internalId][token] = calculateTokenQuantum( - stats.tokenQuantums[internalId][token], - tokenBitIndex - ) - // if (stats.tokenQuantums[internalId][token] < 0) { - // throw new Error("Overflow") - // } - - radixTree.insert(token, internalId) - } - - // Don't increment the quantum index if the sentence is too short - if (tokens.length > 1) { - quantumIndex++ - } - } - - stats.tokensLength.set(internalId, tokenNumber) -} - -export function searchString(prop: { - tokens: string[], - radixNode: radix.RadixNode, - exact: boolean, - tolerance: number, - stats: { - tokensLength: Map, - tokenQuantums: Record>, - }, - boostPerProp: number, - resultMap: Map, -}) { - const tokens = prop.tokens - const radixNode = prop.radixNode - const exact = prop.exact - const tolerance = prop.tolerance - const stats = prop.stats - const boostPerProp = prop.boostPerProp - const resultMap = prop.resultMap - const tokensLength = stats.tokensLength - const tokenQuantums = stats.tokenQuantums - - const findParam = { - term: '', - exact, - tolerance, - } - - let foundWords = {} as Record - const tokenLength = tokens.length - for (let i = 0; i < tokenLength; i++) { - const term = tokens[i] - findParam.term = term - const results = radixNode.find(findParam) - foundWords = { - ...foundWords, - ...results - } - } - - const foundKeys = Object.keys(foundWords) - const foundKeysLength = foundKeys.length - for (let i = 0; i < foundKeysLength; i++) { - const key = foundKeys[i] - const matchedDocs = foundWords[key] - const matchedDocsLength = matchedDocs.length - const isExactMatch = tokens.includes(key) - - for (let j = 0; j < matchedDocsLength; j++) { - const docId = matchedDocs[j] - - const numberOfQuantums = tokensLength.get(docId)! - const tokenQuantumDescriptor = tokenQuantums[docId][key] - - const occurrence = count(tokenQuantumDescriptor) - const bitMask = bitmask_20(tokenQuantumDescriptor) - const score = (occurrence * occurrence / numberOfQuantums + (isExactMatch ? 1 : 0)) * boostPerProp - - if (!resultMap.has(docId)) { - resultMap.set(docId, [score, bitMask]) - continue - } - - const current = resultMap.get(docId)! - - const totalScore = current[0] - + numberOfOnes(current[1] & bitMask) * 2 - + score - - current[0] = totalScore - current[1] = current[1] | bitMask - } - } -} - -export function bitmask_20(n: number) { - return n & BIT_MASK_20 -} -export function count(n: number) { - return n >> 20 -} - -export function numberOfOnes(n: number) { - let i = 0; - do { - if (n&1) { ++i } - // eslint-disable-next-line no-cond-assign - } while (n>>=1) - return i -} \ No newline at end of file diff --git a/packages/plugin-qps/src/index.ts b/packages/plugin-qps/src/index.ts deleted file mode 100644 index 8e6608cf..00000000 --- a/packages/plugin-qps/src/index.ts +++ /dev/null @@ -1,195 +0,0 @@ -import type { create, AnyOrama, SearchableType, IIndex, AnyIndexStore, SearchableValue, Tokenizer, OnlyStrings, FlattenSchemaProperty, TokenScore, WhereCondition } from '@orama/orama' -import { - index as Index, internalDocumentIDStore } from '@orama/orama/components' -import { insertString, QPSIndex, recursiveCreate, searchString } from './algorithm.js'; -import { radix } from '@orama/orama/trees'; - -type InternalDocumentID = internalDocumentIDStore.InternalDocumentID; -type InternalDocumentIDStore = internalDocumentIDStore.InternalDocumentIDStore; - -type CreateParams = Parameters>>[0] -type Component = NonNullable -type IndexParameter = NonNullable -type DocumentID = internalDocumentIDStore.DocumentID; - - -const unusedRadix = new radix.RadixNode('', '', false) -const unusedStats = { - tokenQuantums: {}, - tokensLength: new Map(), -} - -function search(index: QPSIndex, term: string, tokenizer: Tokenizer, language: string | undefined, propertiesToSearch: string[], exact: boolean, tolerance: number, boost: Partial[]>, number>>): TokenScore[] { - const all: Map = new Map() - - const args = { - tokens: tokenizer.tokenize(term, language), - radixNode: unusedRadix, - exact, - tolerance, - stats: unusedStats, - boostPerProp: 0, - all, - resultMap: all, - } - - const propertiesToSearchLength = propertiesToSearch.length - for (let i = 0; i < propertiesToSearchLength; i++) { - const prop = propertiesToSearch[i] - const stats = index.stats[prop] - const boostPerProp = boost[prop] ?? 1 - args.radixNode = index.indexes[prop].node as radix.RadixNode - args.stats = stats - args.boostPerProp = boostPerProp - searchString(args) - } - - const g: [number, [number, number]][] = Array.from(all) - const gLength = g.length - const res: TokenScore[] = [] - for (let i = 0; i < gLength; i++) { - const element = g[i] - const id = element[0] - const score = element[1][0] - - res.push([id, score]) - } - - return res -} - -export function qpsComponents(): { - index: IndexParameter, -} { - return { - index: { - create: function create(orama: T, sharedInternalDocumentStore: T['internalDocumentIDStore'], schema: T['schema']) { - const indexDatastore: QPSIndex = { - indexes: {}, - vectorIndexes: {}, - searchableProperties: [], - searchablePropertiesWithTypes: {}, - stats: {} - } - - recursiveCreate(indexDatastore, schema, '') - - return indexDatastore - }, - insert: function insert( - implementation: IIndex, - indexDatastorage: QPSIndex, - prop: string, - id: DocumentID, - internalId: InternalDocumentID, - value: SearchableValue, - schemaType: SearchableType, - language: string | undefined, - tokenizer: Tokenizer, - docsCount: number - ) { - if (!(schemaType === 'string' || schemaType === 'string[]')) { - return Index.insert(implementation as unknown as IIndex, indexDatastorage as unknown as Index.Index, prop, id, internalId, value, schemaType, language, tokenizer, docsCount) - } - - if (!indexDatastorage.stats[prop]) { - indexDatastorage.stats[prop] = { - tokenQuantums: {}, - tokensLength: new Map() - } - } - - const stats = indexDatastorage.stats[prop] - const radixTree = indexDatastorage.indexes[prop].node as radix.RadixNode - - stats.tokenQuantums[internalId] = {} - - if (Array.isArray(value)) { - for (const item of value) { - insertString( - item as string, - radixTree, - stats, - prop, - internalId, - language, - tokenizer, - ) - } - } else { - insertString( - value as string, - radixTree, - stats, - prop, - internalId, - language, - tokenizer, - ) - } - }, - remove: function remove() { - throw new Error('Not implemented yet') - }, - insertDocumentScoreParameters: () => {throw new Error()}, - insertTokenScoreParameters: () => {throw new Error()}, - removeDocumentScoreParameters: () => {throw new Error()}, - removeTokenScoreParameters: () => {throw new Error()}, - calculateResultScores: () => {throw new Error()}, - search, - searchByWhereClause: function searchByWhereClause(index: AnyIndexStore, tokenizer: Tokenizer, filters: Partial>, language: string | undefined): InternalDocumentID[] { - return Index.searchByWhereClause(index as Index.Index, tokenizer, filters, language) - }, - getSearchableProperties: function getSearchableProperties(index: QPSIndex): string[] { - return index.searchableProperties - }, - getSearchablePropertiesWithTypes: function (index: QPSIndex) { - return index.searchablePropertiesWithTypes - }, - load: function load(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): QPSIndex { - const dump1 = Index.load(sharedInternalDocumentStore, raw[0]) - - const dump2 = raw[1] as { - radixTrees: [string, boolean, string, unknown][], - stats: [string, { - tokenQuantums: [InternalDocumentID, Record][], - tokensLength: [InternalDocumentID, number][] - }][] - } - - const indexes = { - ...dump1.indexes, - ...Object.fromEntries(dump2.radixTrees.map(([prop, isArray, type, node]) => [prop, { node: radix.RadixNode.fromJSON(node), isArray, type } as Index.Tree])) - }; - - return { - ...dump1, - indexes, - stats: Object.fromEntries(dump2.stats.map(([prop, { tokenQuantums, tokensLength }]) => [prop, { - tokenQuantums, - tokensLength: new Map(tokensLength) - }])) - } as unknown as QPSIndex - }, - save: function save(index: QPSIndex): R { - const baseIndex = index as unknown as Index.Index - const nonStringIndexes = Object.entries(baseIndex.indexes).filter(([, { type }]) => type !== 'Radix') - const dump1 = Index.save({ - ...baseIndex, - indexes: Object.fromEntries(nonStringIndexes) - }) - - const stringIndexes = Object.entries(baseIndex.indexes).filter(([, { type }]) => type === 'Radix') - const dump2 = { - radixTrees: stringIndexes.map(([prop, { node, isArray, type }]) => [prop, isArray, type, node.toJSON()]), - stats: Object.entries(index.stats).map(([prop, { tokenQuantums, tokensLength }]) => [prop, { - tokenQuantums, - tokensLength: Array.from(tokensLength.entries()) - }]) - } - - return [dump1, dump2] as unknown as R - } - } - } -} diff --git a/packages/plugin-qps/test/foo.js b/packages/plugin-qps/test/foo.js deleted file mode 100644 index f9e192b8..00000000 --- a/packages/plugin-qps/test/foo.js +++ /dev/null @@ -1,24 +0,0 @@ -import { create, insertMultiple, search } from '@orama/orama' -import {qpsComponents} from '../dist/index.js' -import dataset from '/Users/allevo/repos/orama/benchmarks/src/dataset.json' assert { type: 'json' } - -const db = create({ - schema: { - description: 'string', - }, - components: { - ...qpsComponents() - } -}) - -await insertMultiple(db, dataset) - -console.log('........') - -search(db, { term: '' }); - -/* -for (let i = 0; i < 2000; ++i) { - search(db, { term: 'League' }); -} -*/ \ No newline at end of file diff --git a/packages/plugin-qps/test/index.test.ts b/packages/plugin-qps/test/index.test.ts deleted file mode 100644 index 54e7e1fb..00000000 --- a/packages/plugin-qps/test/index.test.ts +++ /dev/null @@ -1,52 +0,0 @@ -import t from 'tap' -import { create, insertMultiple, load, save, search } from '@orama/orama' -import {qpsComponents} from '../src/index.js' - -t.test('plugin-qps', async t => { - const db = create({ - schema: { - name: 'string', - age: 'number', - isCool: 'boolean', - algo: 'string[]', - preferredNumbers: 'number[]', - } as const, - components: { - ...qpsComponents() - } - }) - - await insertMultiple(db, [ - { name: 'foo foo foo', age: 33, isCool: true, algo: ['algo1', 'algo2'], preferredNumbers: [20] }, - { name: 'bar bar bar', age: 32, isCool: true, algo: ['algo3'], preferredNumbers: [55] }, - { name: 'baz baz baz', age: 22, isCool: false, algo: ['algo4'], preferredNumbers: [22] } - ]) - - const result = await search(db, { - term: 'b' - }) - - t.equal(result.count, 2) - - const dump = await save(db) - const restored = JSON.parse(JSON.stringify(dump)) - - const db2 = create({ - schema: { - name: 'string', - age: 'number', - isCool: 'boolean', - algo: 'string[]', - preferredNumbers: 'number[]', - } as const, - components: { - ...qpsComponents() - } - }) - await load(db2, restored) - - const result2 = await search(db, { - term: 'b' - }) - t.equal(result2.count, 2) -}) \ No newline at end of file diff --git a/packages/plugin-qps/test/quality.test.ts b/packages/plugin-qps/test/quality.test.ts deleted file mode 100644 index b8ef3ad2..00000000 --- a/packages/plugin-qps/test/quality.test.ts +++ /dev/null @@ -1,403 +0,0 @@ -import t from 'tap' -import { AnyOrama, create, insertMultiple, search } from '@orama/orama' -import { bitmask_20, calculateTokenQuantum, count, numberOfOnes } from '../src/algorithm.js' -import { qpsComponents } from '../src/index.js' - -async function createNew(docs: { description: string }[]) { - const db = await create({ - schema: { - description: 'string' - } as const, - components: { - tokenizer: { - stopWords: ['the', 'is', 'on', 'under'] - }, - ...qpsComponents() - } - }) - await insertMultiple( - db, - docs.map(d => ({ ...d })) - ) - return db -} -async function searchNew(db: AnyOrama, { term }: { term: string }) { - const searchResult = await search(db, { - mode: 'fulltext', - term - }) - return searchResult.hits -} - -t.test('order of the results', async (t) => { - const docs = [ - { id: '0', description: 'The pen is on the table. The cat is under the table. The dog is near the table' }, - { id: '1', description: 'The pen is on the table' }, - { id: '2', description: 'The cat is under the table' }, - { id: '3', description: 'The dog is near the table' } - ] - const s = await createNew(docs) - - t.test('if the document more words, it should be the first result', async (t) => { - const results = await searchNew(s, { - term: 'table' - }) - - console.log(s.data.index) - - t.equal(results.length, 4) - t.equal(results[0].id, '0') - const score = results[0].score - - for (let i = 1; i < results.length; i++) { - t.ok(results[i].score < score, 'Score should be less than the first result') - } - }) - - t.test('every doc permutation has the correct order', async (t) => { - const docs = permutator([ - { id: '0', description: 'The pen is on the table. The cat is under the table. The dog is near the table' }, - { id: '1', description: 'The pen is on the table' }, - { id: '2', description: 'The cat is under the table' }, - { id: '3', description: 'The dog is near the table' } - ]) - for (const d of docs) { - const s = await createNew(d) - const results = await searchNew(s, { - term: 'table' - }) - - t.equal(results.length, 4) - t.equal(results[0].id, '0') - const score = results[0].score - - for (let i = 1; i < results.length; i++) { - t.ok(results[i].score < score, 'Score should be less than the first result') - } - } - }) - - t.test('multiple words increments score', async (t) => { - const results = await searchNew(s, { - term: 'table pen' - }) - t.equal(results.length, 4) - t.equal(results[0].id, '0') - const score = results[0].score - - for (let i = 1; i < results.length; i++) { - t.ok(results[i].score < score, 'Score should be less than the first result') - } - - const score2 = results[1].score - for (let i = 2; i < results.length; i++) { - t.ok(results[i].score < score2, 'Score should be less than the second result') - } - }) - - t.test('same matches + same length, same score', async (t) => { - const results = await searchNew(s, { - term: 'table pen cat' - }) - t.equal(results.length, 4) - t.equal(results[0].id, '0') - t.equal(results[1].id, '1') - t.equal(results[2].id, '2') - t.equal(results[3].id, '3') - t.equal(results[1].score, results[2].score) - }) - - t.test('shorter, more score', async (t) => { - const results = await searchNew(s, { - term: 'table pen dog' - }) - t.equal(results.length, 4) - t.equal(results[0].id, '0') - t.equal(results[1].id, '1') - t.equal(results[2].id, '3') - t.equal(results[3].id, '2') - }) - - t.test('matching word score is higher than prefixed word', async (t) => { - const docs = [ - { id: '0', description: 'table' }, - { id: '1', description: 'tab' } - ] - const s = await createNew(docs) - - const results = await searchNew(s, { - term: 'tab' - }) - t.equal(results[0].id, '1') - t.equal(results[1].id, '0') - t.ok(results[1].score < results[0].score) - }) - - t.test("prefix score doesn't depend on matched word lenght", async (t) => { - const docs = [{ description: 'table' }, { description: 'tab' }] - const s = await createNew(docs) - - const results = await searchNew(s, { - term: 't' - }) - t.equal(results[0].score, results[1].score) - }) -}) - -t.test('calculateTokenQuantum', async t => { - let n = 0 - n = calculateTokenQuantum(n, 0) - t.equal(n, 1 + (1 << 20), 'set the 0th bit and the 20th bit') - n = calculateTokenQuantum(n, 0) - t.equal(n, 1 + (2 << 20), 'increment the counter') - n = calculateTokenQuantum(n, 1) - t.equal(n, 3 + (3 << 20), ' 1 + 2 + (3 count)') - n = calculateTokenQuantum(n, 2) - t.equal(n, 7 + (4 << 20), ' 1 + 2 + 4 + (4 count)') - - t.equal(bitmask_20(n), 7) - t.equal(count(n), 4) -}) - -t.test('numberOfOnes', async t => { - t.equal(0, numberOfOnes(0)) - t.equal(1, numberOfOnes(1)) - t.equal(1, numberOfOnes(2)) - t.equal(2, numberOfOnes(3)) - t.equal(1, numberOfOnes(4)) - t.equal(2, numberOfOnes(5)) - t.equal(2, numberOfOnes(6)) - t.equal(3, numberOfOnes(7)) - t.equal(1, numberOfOnes(8)) -}) - -t.test('matching criteria', async t => { - const docs = [ - { id: '0', description: 'Find your way!' }, - ] - const s = await createNew(docs) - - t.test('no match', async t => { - const results = await searchNew(s, { - term: 'unknown words' - }) - t.equal(results.length, 0) - }) - - t.test('match', async t => { - const results = await searchNew(s, { - term: 'way' - }) - t.equal(results.length, 1) - t.equal(results[0].id, '0') - }) - - t.test('match with 2 words', async t => { - const results = await searchNew(s, { - term: 'way find' - }) - t.equal(results.length, 1) - t.equal(results[0].id, '0') - }) - - t.test('the order of the words doesn\'t matter', async t => { - const results1 = await searchNew(s, { - term: 'way find' - }) - t.equal(results1.length, 1) - t.equal(results1[0].id, '0') - const score = results1[0].score - - const results2 = await searchNew(s, { - term: 'way find' - }) - t.equal(results2.length, 1) - t.equal(results2[0].id, '0') - - t.equal(results2[0].score, score) - }) - - t.test('empty string', async t => { - const results = await searchNew(s, { - term: '' - }) - t.equal(results.length, 1) - t.equal(results[0].id, '0') - t.equal(results[0].score, 0) - }) - - t.test('prefix', async t => { - const results = await searchNew(s, { - term: 'w' - }) - t.equal(results.length, 1) - t.equal(results[0].id, '0') - }) -}) - -t.test('long text', async t => { - const docs = [ - { id: '0', description: 'The pen is on the table. '.repeat(100) }, - { id: '1', description: 'The pen is on the table' }, - ] - const s = await createNew(docs) - - const results = await searchNew(s, { - term: 'table' - }) - t.equal(results.length, 2) - t.equal(results[0].id, '0') - t.equal(results[1].id, '1') - t.ok(results[0].score > results[1].score) -}) - -t.test('test', async t => { - const docs = [ - { id: '0', description: 'FLEXIBLE COMFORT.On your marks! The Nike Flex Plus is built for kiddos who want to move all day. With a flexible feel, supersoft cushioning and an easy slip-on design (no laces needed!), these running-inspired shoes are ready to zoom with little feet learning to crawl and walk.360 ComfortSupersoft foam cushioning feels plush with every move.Easy to WearThe slip-on design with 2 pull tabs gets little feet in easily. A stretchy strap with leather on the sides creates a secure feel.Super FlexibleFlexibility grooves under the forefoot help make every growing step feel natural.More BenefitsMesh fabric adds breathability where little feet need it.Reinforced toe tip brings extra durability for kids who drag their toes.Product DetailsNot intended for use as Personal Protective Equipment (PPE)Shown: Game Royal/Midnight Navy/White/Yellow OchreStyle: CW7430-405' }, - { id: '1', description: 'no matter the distance. Easy, adjustable cuffs help you slide these on and off after your warmup or run.BenefitsNike Dri-FIT technology moves sweat away from your skin for quicker evaporation, helping you stay dry and comfortable.Soft knit fabric is lightweight and breathable.Zippered pockets help keep your things secure.Bungee cords at the hem make it easier to change them over your running shoes.Nike Track Club silicone oval logo is inspired by the shape of a track.Product DetailsElastic waistband with a drawcord100% polyesterMachine washImportedShown: Midnight Navy/Summit White/Summit WhiteStyle: FB5503-410' }, - { id: '2', description: 'A NEW GENERATION OF MAX.Nodding to '90s style, the Nike Air Max Axis honors the past while looking to the future. Subtle design lines and branding pay homage to icons like the Air Max 97 and 98, while sleek no-sew skins, airy mesh and its unique heel design keep your look fresh. Of course, Max Air cushions your journey.BenefitsOriginally designed for performance running, the visible Max Air unit provides lasting comfort.The design lines and details nod to the iconic '90s running shoes you lovesuddenly they could see it. Since then, next-generation Air Max shoes have become a hit with athletes and collectors by offering striking color combinations and reliable, lightweight cushioning.'} - ] - const s = await createNew(docs) - - const results = await searchNew(s, { - term: 'running shoes' - }) - - t.equal(results.length, 3) - // The 3° document has the most matches because it: - // - contains running - // - contains shoes twice - // - contains running shoes in the same sentence - t.equal(results[0].id, '2') - // The 2° document is the second because it: - // - contains running - // - contains shoes but only one - // - contains running shoes in the same sentence - t.equal(results[1].id, '1') - // The 1° document is the last because it: - // - not contain running (it contains "running-inspired" but it's not the same in this case) - // - contains shoes - t.equal(results[2].id, '0') -}) - -t.test('test #2', async t => { - const texts = [ - // 0 - "The sun was setting behind the mountains, casting a golden hue over the landscape. Birds chirped as they flew across the sky, their silhouettes blending with the clouds. The air was cool and crisp, filled with the scent of pine trees.", - "She opened the old book, its pages yellowed with time. The words inside told a story of adventure, of brave heroes and distant lands. As she read, the room around her seemed to fade away.", - "The city buzzed with energy as people hurried along the streets. Tall buildings towered over them, casting long shadows. A street vendor called out, selling fresh fruit to passersby, while car horns blared in the distance.", - "On a quiet night, the stars twinkled brightly in the clear sky. A gentle breeze rustled the leaves, and the sound of crickets filled the air. It was a peaceful moment, one that seemed to stretch on forever.", - "The ocean waves crashed against the shore, their rhythm steady and unchanging. Seagulls circled overhead, calling out to one another. A lone figure stood at the water's edge, watching the horizon with a sense of calm.", - // 5 - "In the heart of the forest, the trees stood tall and proud. Sunlight filtered through the leaves, casting dappled shadows on the ground. A deer cautiously stepped out into a clearing, its ears twitching as it listened for danger.", - "The train pulled into the station with a loud screech. Passengers hurried to board, their footsteps echoing on the platform. Inside the train, the seats were worn but comfortable, and the soft hum of the engine filled the air.", - "The storm raged outside, lightning flashing across the sky. Rain pounded against the windows, and the wind howled through the trees. Inside, the fire crackled in the fireplace, offering warmth and light against the storm’s fury.", - "The classroom was filled with the sound of pencils scratching on paper. Students sat at their desks, focused on their assignments. The teacher moved quietly between the rows, offering guidance and encouragement.", - // 9 - "At the edge of the desert, the sand dunes stretched as far as the eye could see. The heat was intense, and the sun beat down relentlessly. In the distance, a caravan made its way slowly across the barren landscape." - ] - const docs = texts.map((text, i) => ({ id: i.toString(), description: text })) - const s = await createNew(docs) - - await t.test('"sun"', async t => { - const results = await searchNew(s, { - term: 'sun' - }) - - // only 3 documents contain the word "sun" - t.equal(results.length, 3) - // This contains the word "sun". - t.equal(results[0].id, '9') - // Also this, but the text is more length, so it has less score. - t.equal(results[1].id, '0') - // This contains the word "Sunlight", so it match as prefix and not as a word. - t.equal(results[2].id, '5') - }) - - await t.test("stormy night", async t => { - const results = await searchNew(s, { - term: 'storm night' - }) - - t.equal(results.length, 2) - // This mention the storm twice - t.equal(results[0].id, '7') - // this mention night only once - t.equal(results[1].id, '3') - // For this reason, the first document has more score - t.ok(results[0].score > results[1].score) - }) - - await t.test('trees casting sun', async t => { - const results = await searchNew(s, { - term: 'trees casting sun' - }) - - t.equal(results.length, 5) - - // This contains the word "sun" and "trees" and "casting" - // Also, "sun" & "casting" are in the same sentence. - t.equal(results[0].id, '0') - - // This contains "trees" and "sun" ("Sunlight") also "casting" but not in the same sentence. - // This score is high because "Sunlight" (so "sun") and "casting" are in the same sentence. - t.equal(results[1].id, '5') - - // This contains only "trees" - t.equal(results[2].id, '7') - - // This contains only "sun". - // This score is less (compared to the previous one) because the sentences are longer. - t.equal(results[3].id, '9') - - // This contains only "casting" - // Again, the score is less because the sentences are longer. - t.equal(results[4].id, '2') - }) - - await t.test('the sound of pencils scratching on paper', async t => { - const results = await searchNew(s, { - term: 'the sound of pencils scratching on paper' - }) - - console.log(results.length) - - t.equal(results.length, 9) - - // This contains a lot of word in the same sentence. - t.equal(results[0].id, '8') - // This contains 2 words in the same sentence. - t.equal(results[1].id, '3') - - // The remaining documents contain only "of" word. - t.equal(results[2].id, '6') - t.equal(results[3].id, '1') - t.equal(results[4].id, '4') - t.equal(results[5].id, '9') - t.equal(results[6].id, '5') - t.equal(results[7].id, '0') - - // This contains "of" in term of "offering", so it has less score. - t.equal(results[8].id, '7') - }) -}) - -function permutator(inputArr: T[]): T[][] { - const result: T[][] = [] - - const permute = (arr, m = []) => { - if (arr.length === 0) { - result.push(m) - } else { - for (let i = 0; i < arr.length; i++) { - const curr = arr.slice() - const next = curr.splice(i, 1) - permute(curr.slice(), m.concat(next)) - } - } - } - - permute(inputArr) - - return result -} diff --git a/packages/plugin-qps/tsconfig.json b/packages/plugin-qps/tsconfig.json deleted file mode 100644 index a60b3643..00000000 --- a/packages/plugin-qps/tsconfig.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "compilerOptions": { - "allowJs": true, - "target": "ES2020", - "module": "NodeNext", - "outDir": "dist", - "jsx": "react", - "noImplicitAny": false, - "lib": ["ESNext", "DOM", "DOM.Iterable"], - "esModuleInterop": true, - "declaration": true, - "forceConsistentCasingInFileNames": true, - "strict": true, - "skipLibCheck": true, - "resolveJsonModule": true, - "sourceMap": true, - "moduleResolution": "nodenext" - }, - "include": ["src/*.ts", "src/**/*.ts", "src/*.tsx", "src/**/*.tsx"] -} diff --git a/packages/plugin-qps/tsup.lib.js b/packages/plugin-qps/tsup.lib.js deleted file mode 100644 index a78a3f79..00000000 --- a/packages/plugin-qps/tsup.lib.js +++ /dev/null @@ -1,17 +0,0 @@ -import { defineConfig } from 'tsup' - -const entry = new URL('src/index.ts', import.meta.url).pathname -const outDir = new URL('dist', import.meta.url).pathname - -export default defineConfig({ - entry: [entry], - splitting: false, - sourcemap: true, - minify: true, - format: ['cjs', 'esm', 'iife'], - globalName: 'pluginEmbeddings', - dts: true, - clean: true, - bundle: true, - outDir -}) diff --git a/turbo.json b/turbo.json index f77a0e4f..2a074019 100644 --- a/turbo.json +++ b/turbo.json @@ -50,10 +50,6 @@ "dependsOn": ["@orama/orama#build"], "outputs": ["dist/**"] }, - "@orama/plugin-qps#build": { - "dependsOn": ["@orama/orama#build"], - "outputs": ["dist/**"] - }, "@orama/plugin-pt15#build": { "dependsOn": ["@orama/orama#build"], "outputs": ["dist/**"]