Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add "returning" search option to select only specified fields from a document #770

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions packages/orama/src/methods/search-fulltext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import type {
TokenScore,
TypedDocument
} from '../types.js'
import { getNanosecondsTime, removeVectorsFromHits, safeArrayPush, sortTokenScorePredicate } from '../utils.js'
import { filterAndReduceDocuments, getNanosecondsTime, removeVectorsFromHits, safeArrayPush, sortTokenScorePredicate } from '../utils.js'
import { createSearchContext, defaultBM25Params, fetchDocuments, fetchDocumentsWithDistinct } from './search.js'

export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
Expand All @@ -34,7 +34,7 @@ export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedD
const vectorProperties = Object.keys(orama.data.index.vectorIndexes)

const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0
const { limit = 10, offset = 0, term, properties, threshold = 1, distinctOn, includeVectors = false } = params
const { limit = 10, offset = 0, term, properties, returning, threshold = 1, distinctOn, includeVectors = false } = params
const isPreflight = params.preflight === true

const { index, docs } = orama.data
Expand Down Expand Up @@ -182,10 +182,10 @@ export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedD
}

if (typeof results !== 'undefined') {
searchResult.hits = results.filter(Boolean)
searchResult.hits = filterAndReduceDocuments(results, returning)

// Vectors can be very large, so we remove them from the result if not needed
if (!includeVectors) {
if (!includeVectors && typeof returning === 'undefined') {
removeVectorsFromHits(searchResult, vectorProperties)
}
}
Expand Down
9 changes: 5 additions & 4 deletions packages/orama/src/methods/search-hybrid.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import type {
HybridWeights
} from '../types.js'
import type { InternalDocumentID } from '../components/internal-document-id-store.js'
import { getNanosecondsTime, safeArrayPush, formatNanoseconds, removeVectorsFromHits } from '../utils.js'
import { getNanosecondsTime, safeArrayPush, formatNanoseconds, removeVectorsFromHits, filterAndReduceDocuments } from '../utils.js'
import { intersectFilteredIDs } from '../components/filters.js'
import { prioritizeTokenScores } from '../components/algorithms.js'
import { createError } from '../errors.js'
Expand All @@ -31,7 +31,7 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
await runBeforeSearch(orama.beforeSearch, orama, params, language)
}

const { offset = 0, limit = 10, includeVectors = false } = params
const { offset = 0, limit = 10, includeVectors = false, returning } = params
const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0

const [fullTextIDs, vectorIDs] = await Promise.all([
Expand Down Expand Up @@ -99,7 +99,8 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
groups = await getGroups<T, ResultDocument>(orama, uniqueTokenScores, params.groupBy)
}

const results = (await fetchDocuments(orama, uniqueTokenScores, offset, limit)).filter(Boolean)
const documents = await fetchDocuments(orama, uniqueTokenScores, offset, limit)
const results = filterAndReduceDocuments(documents, returning)

if (orama.afterSearch) {
await runAfterSearch(orama.afterSearch, orama, params, language, results as any)
Expand All @@ -118,7 +119,7 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
...(groups ? { groups } : {})
}

if (!includeVectors) {
if (!includeVectors && typeof returning === 'undefined') {
const vectorProperties = Object.keys(orama.data.index.vectorIndexes)
removeVectorsFromHits(returningResults, vectorProperties)
}
Expand Down
8 changes: 4 additions & 4 deletions packages/orama/src/methods/search-vector.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { AnyOrama, Results, SearchParamsVector, TypedDocument, Result } from '../types.js'
import type { InternalDocumentID } from '../components/internal-document-id-store.js'
import { createSearchContext } from './search.js'
import { getNanosecondsTime, formatNanoseconds } from '../utils.js'
import { getNanosecondsTime, formatNanoseconds, filterAndReduceDocuments } from '../utils.js'
import { getFacets } from '../components/facets.js'
import { createError } from '../errors.js'
import { findSimilarVectors } from '../components/cosine-similarity.js'
Expand All @@ -28,7 +28,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc
throw createError('INVALID_VECTOR_INPUT', Object.keys(vector).join(', '))
}

const { limit = 10, offset = 0, includeVectors = false } = params
const { limit = 10, offset = 0, includeVectors = false, returning } = params
const vectorIndex = orama.data.index.vectorIndexes[vector!.property]
const vectorSize = vectorIndex.size
const vectors = vectorIndex.vectors
Expand Down Expand Up @@ -102,7 +102,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc
const doc = orama.data.docs.docs[result[0]]

if (doc) {
if (!includeVectors) {
if (!includeVectors && typeof returning === 'undefined') {
doc[vector.property] = null
}

Expand Down Expand Up @@ -130,7 +130,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc

return {
count: results.length,
hits: docs.filter(Boolean),
hits: filterAndReduceDocuments(docs, returning),
elapsed: {
raw: Number(elapsedTime),
formatted: await formatNanoseconds(elapsedTime)
Expand Down
59 changes: 59 additions & 0 deletions packages/orama/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ export type FacetsParams<T extends AnyOrama> = Partial<Record<LiteralUnion<T['sc

export type FacetDefinition = StringFacetDefinition | NumberFacetDefinition | BooleanFacetDefinition

export type ReturningParams<T extends AnyOrama> = Array<LiteralUnion<T['schema']> | FlattenSchemaProperty<T>>

export type ReduceFunction<T, R> = (values: ScalarSearchableValue[], acc: T, value: R, index: number) => T
export type Reduce<T, R = AnyDocument> = {
reducer: ReduceFunction<T, R>
Expand Down Expand Up @@ -291,6 +293,23 @@ export interface SearchParamsFullText<T extends AnyOrama, ResultDocument = Typed
*/
properties?: '*' | FlattenSchemaProperty<T>[]

/**
* The properties of the document to be returned.
* Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
* If provided, only the fields listed in this array will be included in the result.
*
* NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
* by including only the specified fields, it can slow down the search.
*
* @example
* const results = await search(db, {
* term: 'Personal Computer',
* returning: ['title', 'meta.rating'],
* })
*
*/
returning?: ReturningParams<T>

/**
* The number of matched documents to return.
*/
Expand Down Expand Up @@ -481,6 +500,8 @@ export interface SearchParamsFullText<T extends AnyOrama, ResultDocument = Typed
* Whether to include the vectors in the result.
* By default, Orama will not include the vectors, as they can be quite large.
* If set to "false" (default), vectors will be presented as "null".
*
* NOTE: Skipped when "returning" option is provided
*/
includeVectors?: boolean
}
Expand Down Expand Up @@ -527,6 +548,23 @@ export interface SearchParamsHybrid<T extends AnyOrama, ResultDocument = TypedDo
*/
properties?: '*' | FlattenSchemaProperty<T>[]

/**
* The properties of the document to be returned.
* Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
* If provided, only the fields listed in this array will be included in the result.
*
* NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
* by including only the specified fields, it can slow down the search.
*
* @example
* const results = await search(db, {
* term: 'Personal Computer',
* returning: ['title', 'meta.rating'],
* })
*
*/
returning?: ReturningParams<T>

/**
* The BM25 parameters to use.
*
Expand Down Expand Up @@ -567,6 +605,8 @@ export interface SearchParamsHybrid<T extends AnyOrama, ResultDocument = TypedDo
* Whether to include the vectors in the result.
* By default, Orama will not include the vectors, as they can be quite large.
* If set to "false" (default), vectors will be presented as "null".
*
* NOTE: Skipped when "returning" option is provided
*/
includeVectors?: boolean

Expand Down Expand Up @@ -675,6 +715,23 @@ export interface SearchParamsVector<T extends AnyOrama, ResultDocument = TypedDo
property: string
}

/**
* The properties of the document to be returned.
* Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
* If provided, only the fields listed in this array will be included in the result.
*
* NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
* by including only the specified fields, it can slow down the search.
*
* @example
* const results = await search(db, {
* term: 'Personal Computer',
* returning: ['title', 'meta.rating'],
* })
*
*/
returning?: ReturningParams<T>

/**
* The minimum similarity score between the vector and the document.
* By default, Orama will use 0.8.
Expand Down Expand Up @@ -715,6 +772,8 @@ export interface SearchParamsVector<T extends AnyOrama, ResultDocument = TypedDo
* Whether to include the vectors in the result.
* By default, Orama will not include the vectors, as they can be quite large.
* If set to "false" (default), vectors will be presented as "null".
*
* NOTE: Skipped when "returning" option is provided
*/
includeVectors?: boolean
}
Expand Down
102 changes: 101 additions & 1 deletion packages/orama/src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { AnyDocument, GeosearchDistanceUnit, Results, SearchableValue, TokenScore } from './types.js'
import type { AnyDocument, AnyOrama, GeosearchDistanceUnit, Result, Results, ReturningParams, SearchableValue, TokenScore, TypedDocument } from './types.js'
import { createError } from './errors.js'

const baseId = Date.now().toString().slice(5)
Expand Down Expand Up @@ -337,3 +337,103 @@ export function removeVectorsFromHits(searchResult: Results<AnyDocument>, vector
}
}))
}

/**
* Selects and returns only the specified fields from a document.
* Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
*
* @example
* const doc = {
* firstname: 'John',
* lastname: 'Doe',
* age: 30,
* address: { street: 'Main St', city: 'New York' },
* details: {
* hair: 'Brown',
* sizes: {
* weight: 80,
* height: 180
* },
* },
* };
*
* const fields = ['firstname', 'address', 'details.sizes.height'];
*
* console.log(pickDocumentProperties(doc, fields));
* {
* firstname: 'John',
* address: { street: 'Main St', city: 'New York' },
* details: { sizes: { height: 180 }}
* }
*
* @param doc The document to process.
* @param returning The list of fields to extract, including nested fields (e.g., 'address.street').
* @returns The document with only the selected fields, preserving the original nested structure.
* If fields are missing in a document, the resulting document will be empty.
*/
export function pickDocumentProperties<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
doc: ResultDocument,
returning: ReturningParams<T>
): ResultDocument {
const result = {} as ResultDocument;
// Iterate over each properties to map the returned item
for (const field of returning) {
// Splits field into its parts (e.g., 'address.street' -> ['address', 'street'])
const parts = (field as string).split('.');
let source = doc;
let target = result;
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (source && source[part]) {
if (i === parts.length - 1) {
// Set the value at the deepest level
target[part] = source[part];
} else {
// Ensure the target object has the correct structure
if (!target[part]) target[part] = {};
// Move deeper into the object structure
source = source[part];
target = target[part];
}
} else {
// If any part of the path is undefined, break out of the loop
break;
}
}
}
return result
}

/**
* Cleans an array of documents by removing falsy items.
* An optional array of fields can be provided in order to selects and returns only the
* specified fields from each document, supporting nested objects and maintaining the original structure.
*
* @param results The results of a fetch documents to process.
* @param returning An optional list of fields to extract, including nested fields.
* @returns Cleaned array of documents
*/
export function filterAndReduceDocuments<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
results: Result<ResultDocument>[],
returning?: ReturningParams<T>
): Result<ResultDocument>[] {
if (returning?.length) {
return results.reduce((
acc: Result<ResultDocument>[],
item: Result<ResultDocument>
) => {
// Removes falsy documents
if (item) {
const result = pickDocumentProperties(item.document, returning);
// Remove empty object
if (Object.keys(result as any).length) {
item.document = result
acc.push(item);
}
}
return acc;
}, []);
}
// Removes falsy documents
return results.filter(Boolean)
}
Loading