Skip to content

Commit

Permalink
Implement remove for pt15
Browse files Browse the repository at this point in the history
  • Loading branch information
allevo committed Oct 14, 2024
1 parent 3c79d0b commit 1edb08a
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 43 deletions.
4 changes: 1 addition & 3 deletions packages/orama/src/components/documents-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ export function getAll<T extends AnyOrama, ResultDocument extends TypedDocument<
return store.docs
}

export function store(store: DocumentsStore, id: DocumentID, doc: AnyDocument): boolean {
const internalId = getInternalDocumentId(store.sharedInternalDocumentStore, id)

export function store(store: DocumentsStore, id: DocumentID, internalId: InternalDocumentID, doc: AnyDocument): boolean {
if (typeof store.docs[internalId] !== 'undefined') {
return false
}
Expand Down
7 changes: 4 additions & 3 deletions packages/orama/src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,13 @@ function removeScalar(
index: Index,
prop: string,
id: DocumentID,
internalId: InternalDocumentID,
value: SearchableValue,
schemaType: ScalarSearchableType,
language: string | undefined,
tokenizer: Tokenizer,
docsCount: number
): boolean {
const internalId = getInternalDocumentId(index.sharedInternalDocumentStore, id)

if (isVectorType(schemaType)) {
delete index.vectorIndexes[prop].vectors[id]
return true
Expand Down Expand Up @@ -354,6 +353,7 @@ export function remove(
index: Index,
prop: string,
id: DocumentID,
internalId: InternalDocumentID,
value: SearchableValue,
schemaType: SearchableType,
language: string | undefined,
Expand All @@ -366,6 +366,7 @@ export function remove(
index,
prop,
id,
internalId,
value,
schemaType as ScalarSearchableType,
language,
Expand All @@ -379,7 +380,7 @@ export function remove(
const elements = value as Array<string | number | boolean>
const elementsLength = elements.length
for (let i = 0; i < elementsLength; i++) {
removeScalar(implementation, index, prop, id, elements[i], innerSchemaType, language, tokenizer, docsCount)
removeScalar(implementation, index, prop, id, internalId, elements[i], innerSchemaType, language, tokenizer, docsCount)
}

return true
Expand Down
6 changes: 4 additions & 2 deletions packages/orama/src/methods/insert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ async function innerInsertAsync<T extends AnyOrama>(
throw createError('DOCUMENT_ID_MUST_BE_STRING', typeof id)
}

if (!orama.documentsStore.store(docs, id, doc)) {
const internalId = getInternalDocumentId(orama.internalDocumentIDStore, id)
if (!orama.documentsStore.store(docs, id, internalId, doc)) {
throw createError('DOCUMENT_ALREADY_EXISTS', id)
}

Expand Down Expand Up @@ -100,7 +101,8 @@ function innerInsertSync<T extends AnyOrama>(
throw createError('DOCUMENT_ID_MUST_BE_STRING', typeof id)
}

if (!orama.documentsStore.store(docs, id, doc)) {
const internalId = getInternalDocumentId(orama.internalDocumentIDStore, id)
if (!orama.documentsStore.store(docs, id, internalId, doc)) {
throw createError('DOCUMENT_ALREADY_EXISTS', id)
}

Expand Down
14 changes: 9 additions & 5 deletions packages/orama/src/methods/remove.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { runMultipleHook, runSingleHook } from '../components/hooks.js'
import {
DocumentID,
getDocumentIdFromInternalId,
getInternalDocumentId
getInternalDocumentId,
} from '../components/internal-document-id-store.js'
import { trackRemoval } from '../components/sync-blocking-checker.js'
import { isAsyncFunction } from '../utils.js'
Expand Down Expand Up @@ -40,9 +40,10 @@ async function removeAsync<T extends AnyOrama>(
return false
}

const internalId = getInternalDocumentId(orama.internalDocumentIDStore, id)
const docId = getDocumentIdFromInternalId(
orama.internalDocumentIDStore,
getInternalDocumentId(orama.internalDocumentIDStore, id)
internalId
)
const docsCount = orama.documentsStore.count(docs)

Expand Down Expand Up @@ -79,6 +80,7 @@ async function removeAsync<T extends AnyOrama>(
orama.data.index,
prop,
id,
internalId,
value,
schemaType,
language,
Expand Down Expand Up @@ -115,7 +117,7 @@ async function removeAsync<T extends AnyOrama>(
await runSingleHook(orama.afterRemove, orama, docId)
}

orama.documentsStore.remove(orama.data.docs, id)
orama.documentsStore.remove(orama.data.docs, id, internalId)

trackRemoval(orama)
return result
Expand All @@ -130,9 +132,10 @@ function removeSync<T extends AnyOrama>(orama: T, id: DocumentID, language?: str
return false
}

const internalId = getInternalDocumentId(orama.internalDocumentIDStore, id)
const docId = getDocumentIdFromInternalId(
orama.internalDocumentIDStore,
getInternalDocumentId(orama.internalDocumentIDStore, id)
internalId
)
const docsCount = orama.documentsStore.count(docs)

Expand Down Expand Up @@ -160,6 +163,7 @@ function removeSync<T extends AnyOrama>(orama: T, id: DocumentID, language?: str
orama.data.index,
prop,
id,
internalId,
value,
schemaType,
language,
Expand Down Expand Up @@ -187,7 +191,7 @@ function removeSync<T extends AnyOrama>(orama: T, id: DocumentID, language?: str
runSingleHook(orama.afterRemove, orama, docId)
}

orama.documentsStore.remove(orama.data.docs, id)
orama.documentsStore.remove(orama.data.docs, id, internalId)

trackRemoval(orama)
return result
Expand Down
5 changes: 3 additions & 2 deletions packages/orama/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,7 @@ export interface IIndex<I extends AnyIndexStore> {
index: T,
prop: string,
id: DocumentID,
internalId: InternalDocumentID,
value: SearchableValue,
schemaType: SearchableType,
language: string | undefined,
Expand Down Expand Up @@ -1010,8 +1011,8 @@ export interface IDocumentsStore<D extends AnyDocumentStore = AnyDocumentStore>
get(store: D, id: DocumentID): Optional<AnyDocument>
getMultiple(store: D, ids: DocumentID[]): Optional<AnyDocument>[]
getAll(store: D): SyncOrAsyncValue<Record<InternalDocumentID, AnyDocument>>
store(store: D, id: DocumentID, doc: AnyDocument): boolean
remove(store: D, id: DocumentID): SyncOrAsyncValue<boolean>
store(store: D, id: DocumentID, internalId: InternalDocumentID, doc: AnyDocument): boolean
remove(store: D, id: DocumentID, internalId: InternalDocumentID): SyncOrAsyncValue<boolean>
count(store: D): number

load<R = unknown>(sharedInternalDocumentStore: InternalDocumentIDStore, raw: R): D
Expand Down
40 changes: 20 additions & 20 deletions packages/orama/tests/remove.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,26 @@ t.test('should correctly remove documents with vector properties', async (t) =>
t.ok(await getByID(db, id2))
})

t.test(
'test case for #766: Zero division when computing scores after removing all documents from an index.',
async (t) => {
const db = create({
schema: {
name: 'string'
} as const
})

const id = insert(db, { name: 'test' })

const success = remove(db, id as string)

insert(db, { name: 'foo' })
insert(db, { name: 'bar' })

t.ok(success)
}
)

function createSimpleDB() {
let i = 0
const db = create({
Expand Down Expand Up @@ -354,23 +374,3 @@ function createSimpleDB() {

return [db, id1, id2, id3, id4] as const
}

t.test(
'test case for #766: Zero division when computing scores after removing all documents from an index.',
async (t) => {
const db = create({
schema: {
name: 'string'
} as const
})

const id = insert(db, { name: 'test' })

const success = remove(db, id as string)

insert(db, { name: 'foo' })
insert(db, { name: 'bar' })

t.ok(success)
}
)
31 changes: 31 additions & 0 deletions packages/plugin-pt15/src/algorithm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,34 @@ export function searchString(

return ret
}

export function removeString(
value: string,
positionsStorage: PositionsStorage,
prop: string,
internalId: InternalDocumentID,
tokenizer: Tokenizer,
language: string | undefined,
) {
const tokens = tokenizer.tokenize(value, language, prop)
const tokensLength = tokens.length
for (let i = 0; i < tokensLength; i++) {
const token = tokens[i]
const position = MAX_POSITION - get_position(i, tokensLength) - 1

const positionStorage = positionsStorage[position]

const tokenLength = token.length
for (let j = tokenLength; j > 0; j--) {
const tokenPart = token.slice(0, j)
const a = positionStorage[tokenPart]
if (a) {
const index = a.indexOf(internalId)
if (index !== -1) {
a.splice(index, 1)
}
}
}
}

}
35 changes: 32 additions & 3 deletions packages/plugin-pt15/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { AnyOrama, SearchableType, IIndex, AnyIndexStore, SearchableValue, Tokenizer, OnlyStrings, FlattenSchemaProperty, TokenScore, WhereCondition, OramaPluginSync, AnySchema, ObjectComponents } from '@orama/orama'
import {
index as Index, internalDocumentIDStore } from '@orama/orama/components'
import { PT15IndexStore, insertString, recursiveCreate, PositionsStorage, searchString } from './algorithm.js';
import { PT15IndexStore, insertString, recursiveCreate, PositionsStorage, searchString, removeString } from './algorithm.js';

type InternalDocumentID = internalDocumentIDStore.InternalDocumentID;
type InternalDocumentIDStore = internalDocumentIDStore.InternalDocumentIDStore;
Expand Down Expand Up @@ -73,8 +73,37 @@ function createComponents(schema: AnySchema): Partial<ObjectComponents<any, any,
)
}
},
remove: function remove() {
throw new Error('not implemented yet')
// remove: <T extends I>(implementation: IIndex<T>, index: T, prop: string, id: DocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, tokenizer: Tokenizer, docsCount: number) => SyncOrAsyncValue<boolean>;
remove: function remove(implementation: IIndex<PT15IndexStore>, indexDatastorage: PT15IndexStore, prop: string, id: DocumentID, internalId: InternalDocumentID, value: SearchableValue, schemaType: SearchableType, language: string | undefined, tokenizer: Tokenizer, docsCount: number) {
if (!(schemaType === 'string' || schemaType === 'string[]')) {
return Index.remove(implementation as IIndex<Index.Index>, indexDatastorage as Index.Index, prop, id, internalId, value, schemaType, language, tokenizer, docsCount)
}

const storage = indexDatastorage.indexes[prop].node as PositionsStorage

if (Array.isArray(value)) {
for (const item of value) {
removeString(
item as string,
storage,
prop,
internalId,
tokenizer,
language
)
}
} else {
removeString(
value as string,
storage,
prop,
internalId,
tokenizer,
language
)
}

return true
},
insertDocumentScoreParameters: () => {throw new Error()},
insertTokenScoreParameters: () => {throw new Error()},
Expand Down
17 changes: 12 additions & 5 deletions packages/plugin-pt15/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import t from 'tap'
import { create, insertMultiple, load, save, search } from '@orama/orama'
import { create, insertMultiple, load, remove, save, search } from '@orama/orama'
import {pluginPT15} from '../src/index.js'
import { get_position } from '../src/algorithm.js'

Expand Down Expand Up @@ -49,9 +49,9 @@ t.test('plugin-pt15', async t => {
})

await insertMultiple(db, [
{ name: 'The pen is on the table', age: 33, isCool: true, algo: ['algo1', 'algo2'], preferredNumbers: [20] },
{ name: 'The can is near the table', age: 32, isCool: true, algo: ['algo3'], preferredNumbers: [55] },
{ name: 'My table is cool', age: 22, isCool: false, algo: ['algo4'], preferredNumbers: [22] }
{ id: '1', name: 'The pen is on the table', age: 33, isCool: true, algo: ['algo1', 'algo2'], preferredNumbers: [20] },
{ id: '2', name: 'The can is near the table', age: 32, isCool: true, algo: ['algo3'], preferredNumbers: [55] },
{ id: '3', name: 'My table is cool', age: 22, isCool: false, algo: ['algo4'], preferredNumbers: [22] }
])

const result = await search(db, {
Expand All @@ -75,8 +75,15 @@ t.test('plugin-pt15', async t => {
})
await load(db2, restored)

const result2 = await search(db, {
const result2 = await search(db2, {
term: 't'
})
t.equal(result2.count, 3)

await remove(db2, '1')

const result3 = await search(db2, {
term: 't'
})
t.equal(result3.count, 2)
})

0 comments on commit 1edb08a

Please sign in to comment.