oramasearch · fasenderos · Sep 4, 2024
diff --git a/packages/orama/src/methods/search-fulltext.ts b/packages/orama/src/methods/search-fulltext.ts
@@ -15,7 +15,7 @@ import type {
   TokenScore,
   TypedDocument
 } from '../types.js'
-import { getNanosecondsTime, removeVectorsFromHits, safeArrayPush, sortTokenScorePredicate } from '../utils.js'
+import { filterAndReduceDocuments, getNanosecondsTime, removeVectorsFromHits, safeArrayPush, sortTokenScorePredicate } from '../utils.js'
 import { createSearchContext, defaultBM25Params, fetchDocuments, fetchDocumentsWithDistinct } from './search.js'
 
 export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
@@ -34,7 +34,7 @@ export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedD
   const vectorProperties = Object.keys(orama.data.index.vectorIndexes)
 
   const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0
-  const { limit = 10, offset = 0, term, properties, threshold = 1, distinctOn, includeVectors = false } = params
+  const { limit = 10, offset = 0, term, properties, returning, threshold = 1, distinctOn, includeVectors = false } = params
   const isPreflight = params.preflight === true
 
   const { index, docs } = orama.data
@@ -182,10 +182,10 @@ export async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedD
   }
 
   if (typeof results !== 'undefined') {
-    searchResult.hits = results.filter(Boolean)
+    searchResult.hits = filterAndReduceDocuments(results, returning)
 
     // Vectors can be very large, so we remove them from the result if not needed
-    if (!includeVectors) {
+    if (!includeVectors && typeof returning === 'undefined') {
       removeVectorsFromHits(searchResult, vectorProperties)
     }
   }

diff --git a/packages/orama/src/methods/search-hybrid.ts b/packages/orama/src/methods/search-hybrid.ts
@@ -8,7 +8,7 @@ import type {
   HybridWeights
 } from '../types.js'
 import type { InternalDocumentID } from '../components/internal-document-id-store.js'
-import { getNanosecondsTime, safeArrayPush, formatNanoseconds, removeVectorsFromHits } from '../utils.js'
+import { getNanosecondsTime, safeArrayPush, formatNanoseconds, removeVectorsFromHits, filterAndReduceDocuments } from '../utils.js'
 import { intersectFilteredIDs } from '../components/filters.js'
 import { prioritizeTokenScores } from '../components/algorithms.js'
 import { createError } from '../errors.js'
@@ -31,7 +31,7 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
     await runBeforeSearch(orama.beforeSearch, orama, params, language)
   }
 
-  const { offset = 0, limit = 10, includeVectors = false } = params
+  const { offset = 0, limit = 10, includeVectors = false, returning } = params
   const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0
 
   const [fullTextIDs, vectorIDs] = await Promise.all([
@@ -99,7 +99,8 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
     groups = await getGroups<T, ResultDocument>(orama, uniqueTokenScores, params.groupBy)
   }
 
-  const results = (await fetchDocuments(orama, uniqueTokenScores, offset, limit)).filter(Boolean)
+  const documents = await fetchDocuments(orama, uniqueTokenScores, offset, limit)
+  const results = filterAndReduceDocuments(documents, returning)
 
   if (orama.afterSearch) {
     await runAfterSearch(orama.afterSearch, orama, params, language, results as any)
@@ -118,7 +119,7 @@ export async function hybridSearch<T extends AnyOrama, ResultDocument = TypedDoc
     ...(groups ? { groups } : {})
   }
 
-  if (!includeVectors) {
+  if (!includeVectors && typeof returning === 'undefined') {
     const vectorProperties = Object.keys(orama.data.index.vectorIndexes)
     removeVectorsFromHits(returningResults, vectorProperties)
   }

diff --git a/packages/orama/src/methods/search-vector.ts b/packages/orama/src/methods/search-vector.ts
@@ -1,7 +1,7 @@
 import type { AnyOrama, Results, SearchParamsVector, TypedDocument, Result } from '../types.js'
 import type { InternalDocumentID } from '../components/internal-document-id-store.js'
 import { createSearchContext } from './search.js'
-import { getNanosecondsTime, formatNanoseconds } from '../utils.js'
+import { getNanosecondsTime, formatNanoseconds, filterAndReduceDocuments } from '../utils.js'
 import { getFacets } from '../components/facets.js'
 import { createError } from '../errors.js'
 import { findSimilarVectors } from '../components/cosine-similarity.js'
@@ -28,7 +28,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc
     throw createError('INVALID_VECTOR_INPUT', Object.keys(vector).join(', '))
   }
 
-  const { limit = 10, offset = 0, includeVectors = false } = params
+  const { limit = 10, offset = 0, includeVectors = false, returning } = params
   const vectorIndex = orama.data.index.vectorIndexes[vector!.property]
   const vectorSize = vectorIndex.size
   const vectors = vectorIndex.vectors
@@ -102,7 +102,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc
     const doc = orama.data.docs.docs[result[0]]
 
     if (doc) {
-      if (!includeVectors) {
+      if (!includeVectors && typeof returning === 'undefined') {
         doc[vector.property] = null
       }
 
@@ -130,7 +130,7 @@ export async function searchVector<T extends AnyOrama, ResultDocument = TypedDoc
 
   return {
     count: results.length,
-    hits: docs.filter(Boolean),
+    hits: filterAndReduceDocuments(docs, returning),
     elapsed: {
       raw: Number(elapsedTime),
       formatted: await formatNanoseconds(elapsedTime)

diff --git a/packages/orama/src/types.ts b/packages/orama/src/types.ts
@@ -161,6 +161,8 @@ export type FacetsParams<T extends AnyOrama> = Partial<Record<LiteralUnion<T['sc
 
 export type FacetDefinition = StringFacetDefinition | NumberFacetDefinition | BooleanFacetDefinition
 
+export type ReturningParams<T extends AnyOrama> = Array<LiteralUnion<T['schema']> | FlattenSchemaProperty<T>>
+
 export type ReduceFunction<T, R> = (values: ScalarSearchableValue[], acc: T, value: R, index: number) => T
 export type Reduce<T, R = AnyDocument> = {
   reducer: ReduceFunction<T, R>
@@ -291,6 +293,23 @@ export interface SearchParamsFullText<T extends AnyOrama, ResultDocument = Typed
    */
   properties?: '*' | FlattenSchemaProperty<T>[]
 
+  /**
+   * The properties of the document to be returned.
+   * Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
+   * If provided, only the fields listed in this array will be included in the result.
+   * 
+   * NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
+   * by including only the specified fields, it can slow down the search.
+   * 
+   * @example
+   * const results = await search(db, {
+   *  term: 'Personal Computer',
+   *  returning: ['title', 'meta.rating'],
+   * })
+   *
+   */
+  returning?: ReturningParams<T>
+
   /**
    * The number of matched documents to return.
    */
@@ -481,6 +500,8 @@ export interface SearchParamsFullText<T extends AnyOrama, ResultDocument = Typed
    * Whether to include the vectors in the result.
    * By default, Orama will not include the vectors, as they can be quite large.
    * If set to "false" (default), vectors will be presented as "null".
+   * 
+   * NOTE: Skipped when "returning" option is provided
    */
   includeVectors?: boolean
 }
@@ -527,6 +548,23 @@ export interface SearchParamsHybrid<T extends AnyOrama, ResultDocument = TypedDo
    */
   properties?: '*' | FlattenSchemaProperty<T>[]
 
+  /**
+   * The properties of the document to be returned.
+   * Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
+   * If provided, only the fields listed in this array will be included in the result.
+   * 
+   * NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
+   * by including only the specified fields, it can slow down the search.
+   * 
+   * @example
+   * const results = await search(db, {
+   *  term: 'Personal Computer',
+   *  returning: ['title', 'meta.rating'],
+   * })
+   *
+   */
+  returning?: ReturningParams<T>
+
   /**
    * The BM25 parameters to use.
    *
@@ -567,6 +605,8 @@ export interface SearchParamsHybrid<T extends AnyOrama, ResultDocument = TypedDo
    * Whether to include the vectors in the result.
    * By default, Orama will not include the vectors, as they can be quite large.
    * If set to "false" (default), vectors will be presented as "null".
+   * 
+   * NOTE: Skipped when "returning" option is provided
    */
   includeVectors?: boolean
 
@@ -675,6 +715,23 @@ export interface SearchParamsVector<T extends AnyOrama, ResultDocument = TypedDo
     property: string
   }
 
+  /**
+   * The properties of the document to be returned.
+   * Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
+   * If provided, only the fields listed in this array will be included in the result.
+   * 
+   * NOTE: This functionality is recommended primarily for server-side use. While it reduces the payload of the response
+   * by including only the specified fields, it can slow down the search.
+   * 
+   * @example
+   * const results = await search(db, {
+   *  term: 'Personal Computer',
+   *  returning: ['title', 'meta.rating'],
+   * })
+   *
+   */
+  returning?: ReturningParams<T>
+
   /**
    * The minimum similarity score between the vector and the document.
    * By default, Orama will use 0.8.
@@ -715,6 +772,8 @@ export interface SearchParamsVector<T extends AnyOrama, ResultDocument = TypedDo
    * Whether to include the vectors in the result.
    * By default, Orama will not include the vectors, as they can be quite large.
    * If set to "false" (default), vectors will be presented as "null".
+   * 
+   * NOTE: Skipped when "returning" option is provided
    */
   includeVectors?: boolean
 }

diff --git a/packages/orama/src/utils.ts b/packages/orama/src/utils.ts
@@ -1,4 +1,4 @@
-import type { AnyDocument, GeosearchDistanceUnit, Results, SearchableValue, TokenScore } from './types.js'
+import type { AnyDocument, AnyOrama, GeosearchDistanceUnit, Result, Results, ReturningParams, SearchableValue, TokenScore, TypedDocument } from './types.js'
 import { createError } from './errors.js'
 
 const baseId = Date.now().toString().slice(5)
@@ -337,3 +337,103 @@ export function removeVectorsFromHits(searchResult: Results<AnyDocument>, vector
     }
   }))
 }
+
+/**
+ * Selects and returns only the specified fields from a document.
+ * Supports nested objects, allowing root to deepest field extraction while maintaining the original structure.
+ * 
+ * @example
+ * const doc = { 
+ *    firstname: 'John',
+ *    lastname: 'Doe',
+ *    age: 30,
+ *    address: { street: 'Main St', city: 'New York' }, 
+ *    details: {
+ *      hair: 'Brown',
+ *      sizes: {
+ *        weight: 80,
+ *        height: 180
+ *      },
+ *    }, 
+ * };
+ * 
+ * const fields = ['firstname', 'address', 'details.sizes.height'];
+ * 
+ * console.log(pickDocumentProperties(doc, fields));
+ * { 
+ *    firstname: 'John', 
+ *    address: { street: 'Main St', city: 'New York' },
+ *    details: { sizes: { height: 180 }}
+ * }
+ * 
+ * @param doc The document to process.
+ * @param returning The list of fields to extract, including nested fields (e.g., 'address.street').
+ * @returns The document with only the selected fields, preserving the original nested structure.
+ *          If fields are missing in a document, the resulting document will be empty.
+ */
+export function pickDocumentProperties<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
+  doc: ResultDocument,
+  returning: ReturningParams<T>
+): ResultDocument {
+  const result = {} as ResultDocument;
+  // Iterate over each properties to map the returned item
+  for (const field of returning) {
+    // Splits field into its parts (e.g., 'address.street' -> ['address', 'street'])
+    const parts = (field as string).split('.');
+    let source = doc;
+    let target = result;
+    for (let i = 0; i < parts.length; i++) {
+        const part = parts[i];
+        if (source && source[part]) {
+            if (i === parts.length - 1) {
+                // Set the value at the deepest level
+                target[part] = source[part];
+            } else {
+                // Ensure the target object has the correct structure
+                if (!target[part]) target[part] = {};
+                // Move deeper into the object structure
+                source = source[part];
+                target = target[part];
+            }
+        } else {
+            // If any part of the path is undefined, break out of the loop
+            break;
+        }
+    }
+  }
+  return result
+}
+
+/**
+ * Cleans an array of documents by removing falsy items.
+ * An optional array of fields can be provided in order to selects and returns only the 
+ * specified fields from each document, supporting nested objects and maintaining the original structure.
+ * 
+ * @param results The results of a fetch documents to process.
+ * @param returning  An optional list of fields to extract, including nested fields.
+ * @returns Cleaned array of documents
+ */
+export function filterAndReduceDocuments<T extends AnyOrama, ResultDocument = TypedDocument<T>>(
+  results: Result<ResultDocument>[],
+  returning?: ReturningParams<T>
+): Result<ResultDocument>[] {
+  if (returning?.length) {
+    return results.reduce((
+      acc: Result<ResultDocument>[], 
+      item: Result<ResultDocument>
+    ) => {
+      // Removes falsy documents
+      if (item) {
+        const result = pickDocumentProperties(item.document, returning);
+        // Remove empty object
+        if (Object.keys(result as any).length) {
+          item.document = result
+          acc.push(item);
+        }
+      }
+      return acc;
+    }, []);
+  }
+  // Removes falsy documents
+  return results.filter(Boolean)
+}