Skip to content

feat: implements vector search in datastore #1337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
88e1349
feat: Support for field update operators in the Datastore API and res…
gcf-owl-bot[bot] Oct 7, 2024
1a02e42
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 7, 2024
cdce374
feat: Add FindNearest API to the stable branch
gcf-owl-bot[bot] Oct 11, 2024
b6c5f10
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 11, 2024
119f124
create vector search types
kevkim-codes Oct 21, 2024
d9cdd47
add new protos
kevkim-codes Oct 21, 2024
371ab04
added vector search functionality to query and proto conversion
kevkim-codes Oct 23, 2024
7caad07
revert prettier file
kevkim-codes Oct 23, 2024
da44890
add unit tests
kevkim-codes Oct 23, 2024
44aabf1
remove unit test
kevkim-codes Oct 23, 2024
ca0e797
Merge branch 'main' into vector-search
kevkim-codes Oct 23, 2024
5bf1c89
basic query test
kevkim-codes Oct 24, 2024
e260a43
Merge branch 'vector-search' of github.com:kevkim-codes/nodejs-datast…
kevkim-codes Oct 24, 2024
e5e9753
Merge branch 'main' into vector-search
kevkim-codes Oct 24, 2024
48bf71b
fix linting issue
kevkim-codes Oct 24, 2024
4b52449
Merge branch 'vector-search' of github.com:kevkim-codes/nodejs-datast…
kevkim-codes Oct 24, 2024
e7efaca
remove unused import
kevkim-codes Oct 24, 2024
f6deba1
fix prettier issues
kevkim-codes Oct 24, 2024
071aba9
fix eslint issue
kevkim-codes Oct 24, 2024
276c791
add proto query conversion unit test
kevkim-codes Oct 28, 2024
a882893
Merge branch 'main' into vector-search
kevkim-codes Oct 28, 2024
55c5f32
fix lint issues
kevkim-codes Oct 28, 2024
8e91394
Merge branch 'vector-search' of github.com:kevkim-codes/nodejs-datast…
kevkim-codes Oct 28, 2024
bfd52f5
fix lint issues
kevkim-codes Oct 28, 2024
4d57dda
add commit
kevkim-codes Nov 4, 2024
0efa611
Merge branch 'main' into vector-search
kevkim-codes Nov 4, 2024
f80efd2
Merge branch 'main' into vector-search
kevkim-codes Nov 8, 2024
84e7ef3
update tests
kevkim-codes Nov 8, 2024
e9c1372
Merge branch 'vector-search' of github.com:kevkim-codes/nodejs-datast…
kevkim-codes Nov 8, 2024
c9cef05
add systems test query
kevkim-codes Nov 13, 2024
71b5832
add vector query test
kevkim-codes Nov 13, 2024
cb35443
git push origin proto conversion
kevkim-codes Nov 19, 2024
2a4cb45
change test entity
kevkim-codes Nov 19, 2024
8619969
Merge branch 'main' of github.com:kevkim-codes/nodejs-datastore into …
kevkim-codes Nov 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion protos/protos.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions src/entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {protobuf as Protobuf} from 'google-gax';
import * as path from 'path';
import {google} from '../protos/protos';
import {and, PropertyFilter} from './filter';
import {Vector} from './vector';

// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace entity {
Expand Down Expand Up @@ -1268,6 +1269,30 @@ export namespace entity {
queryProto.filter = and(allFilters).toProto();
}

if (query.vectorSearch && query.vectorOptions) {
function queryVectorToArray(
queryVector: Vector | Array<number> | undefined
): google.datastore.v1.IValue | undefined {
if (queryVector instanceof Vector) {
return queryVector.value as google.datastore.v1.IValue;
} else {
return queryVector as google.datastore.v1.IValue;
}
}

const vectorProto: google.datastore.v1.FindNearest = {
vectorProperty: {name: query.vectorOptions.vectorProperty},
queryVector: queryVectorToArray(query.vectorOptions.queryVector),
distanceMeasure: query.vectorOptions
.distanceMeasure as google.datastore.v1.FindNearest.DistanceMeasure,
limit: {value: query.vectorOptions.limit},
distanceResultProperty: query.vectorOptions.distanceResultProperty,
distanceThreshold: {value: query.vectorOptions.distanceThreshold},
};

queryProto.findNearest = vectorProto;
}

return queryProto;
}

Expand Down
4 changes: 3 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import {
import * as is from 'is';
import {Transform, pipeline} from 'stream';

import {entity, Entities, Entity, EntityProto, ValueProto} from './entity';
import {entity, Entities, Entity, ValueProto} from './entity';
import {AggregateField} from './aggregate';
import Key = entity.Key;
export {Entity, Key, AggregateField};
Expand Down Expand Up @@ -489,6 +489,8 @@ class Datastore extends DatastoreRequest {

options.projectId = options.projectId || process.env.DATASTORE_PROJECT_ID;

// prod: datastore.googleapis.com
// nightly: nightly-datastore.sandbox.googleapis.com
this.defaultBaseUrl_ = 'datastore.googleapis.com';
this.determineBaseUrl_(options.apiEndpoint);

Expand Down
38 changes: 38 additions & 0 deletions src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {CallOptions} from 'google-gax';
import {RunQueryStreamOptions} from '../src/request';
import * as gaxInstance from 'google-gax';
import {google} from '../protos/protos';
import {VectorQueryOptions} from './vector';

export type Operator =
| '='
Expand Down Expand Up @@ -76,6 +77,7 @@ export interface Filter {
class Query {
scope?: Datastore | Transaction;
namespace?: string | null;
vectorOptions?: VectorQueryOptions;
kinds: string[];
filters: Filter[];
entityFilters: EntityFilter[];
Expand All @@ -86,6 +88,7 @@ class Query {
endVal: string | Buffer | null;
limitVal: number;
offsetVal: number;
vectorSearch = false;

constructor(scope?: Datastore | Transaction, kinds?: string[] | null);
constructor(
Expand Down Expand Up @@ -256,6 +259,40 @@ class Query {
return this;
}

/**
* Returns a query that can perform vector distance (similarity) search with given parameters.
*
* The returned query, when executed, performs a distance (similarity) search on the specified
* `vectorField` against the given `queryVector` and returns the top documents that are closest
* to the `queryVector`.
*
* @example
* ```
* // Returns the closest 10 documents whose Euclidean distance from their 'embedding' fields are closed to [41, 42].
* const vectorQuery = query.findNearest({vectorfield: 'embedding', queryVector: [41, 42], limit: 10, distanceMeasure: 'EUCLIDEAN'});
*
* const querySnapshot = await vectorQuery.get();
* ```
*
* @param {VectorQueryOptions} options - Options control the vector query. `limit` specifies the upper bound of documents to return, must
* be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is calculated
* when performing the query.
*
*/
findNearest(options: VectorQueryOptions): Query {
if (options.limit && options.limit <= 0) {
throw new Error('limit should be a positive limit number');
}

if (options.queryVector && options.queryVector.length === 0) {
throw new Error('vector size must be larger than 0');
}

this.vectorOptions = options;
this.vectorSearch = true;
return this;
}

/**
* Filter a query by ancestors.
*
Expand Down Expand Up @@ -584,6 +621,7 @@ export interface QueryProto {
limit?: {};
offset?: number;
filter?: {};
findNearest?: {};
}

/**
Expand Down
121 changes: 121 additions & 0 deletions src/vector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*!
* Copyright 2024 Google LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const VECTOR_VALUE = 31;

export enum DistanceMeasure {
DISTANCE_MEASURE_UNSPECIFIED = 0,
EUCLIDEAN = 1,
COSINE = 2,
DOT_PRODUCT = 3,
}

interface VectorDict {
array_value: {values: {double_value: number}[]};
meaning: number;
exclude_from_indexes: boolean;
}

/*A class to represent a Vector for use in query.findNearest.
*Underlying object will be converted to a map representation in Firestore API.
*/
export class Vector {
value: number[];

constructor(value: number[]) {
this.value = value.map(v => parseFloat(v.toString()));
}

get(index: number): number {
return this.value[index];
}

slice(start?: number, end?: number): Vector {
return new Vector(this.value.slice(start, end));
}

get length(): number {
return this.value.length;
}

equals(other: Vector): boolean {
if (!(other instanceof Vector)) {
throw new Error('Cannot compare Vector to a non-Vector object.');
}
return (
this.value.length === other.value.length &&
this.value.every((v, i) => v === other.value[i])
);
}

toString(): string {
return `Vector<${this.value.join(', ')}>`;
}

_toDict(): VectorDict {
return {
array_value: {
values: this.value.map(v => ({double_value: v})),
},
meaning: VECTOR_VALUE,
exclude_from_indexes: true,
};
}
}

/**
* Specifies the behavior of the a Vector Search Query generated by a call to {@link Query.findNearest}.
*/
export interface VectorQueryOptions {
/**
* A string specifying the vector field to search on.
*/
vectorProperty?: string;

/**
* The value used to measure the distance from `vectorProperty` values in the documents.
*/
queryVector?: Vector | Array<number>;

/**
* Specifies what type of distance is calculated when performing the query.
*/
distanceMeasure: DistanceMeasure;

/**
* Specifies the upper bound of documents to return, must be a positive integer with a maximum value of 1000.
*/
limit?: number;

/**
* Optionally specifies the name of a property that will be set on each returned DocumentSnapshot,
* which will contain the computed distance for the document.
*/
distanceResultProperty: string;

/**
* Specifies a threshold for which no less similar documents will be returned. The behavior
* of the specified `distanceMeasure` will affect the meaning of the distance threshold.
*
* - For `distanceMeasure: "EUCLIDEAN"`, the meaning of `distanceThreshold` is:
* SELECT docs WHERE euclidean_distance <= distanceThreshold
* - For `distanceMeasure: "COSINE"`, the meaning of `distanceThreshold` is:
* SELECT docs WHERE cosine_distance <= distanceThreshold
* - For `distanceMeasure: "DOT_PRODUCT"`, the meaning of `distanceThreshold` is:
* SELECT docs WHERE dot_product_distance >= distanceThreshold
*/
distanceThreshold?: number;
}
31 changes: 31 additions & 0 deletions system-test/datastore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import {Entities, entity, Entity} from '../src/entity';
import {Query, RunQueryInfo, ExecutionStats} from '../src/query';
import KEY_SYMBOL = entity.KEY_SYMBOL;
import {transactionExpiredError} from '../src/request';
import {DistanceMeasure, Vector, VectorQueryOptions} from '../src/vector';
import {startServer} from '../mock-server/datastore-server';

const async = require('async');

Expand Down Expand Up @@ -3296,5 +3298,34 @@ async.each(
});
});
});

describe('vector search query', () => {
it.only('should complete a request successfully with vector search options', async () => {
startServer(async () => {
const customDatastore = new Datastore({
namespace: `${Date.now()}`,
apiEndpoint: 'localhost:50051',
});

const vectorOptions: VectorQueryOptions = {
vectorProperty: 'embedding',
queryVector: [1.0, 2.0, 3.0],
limit: 2,
distanceMeasure: DistanceMeasure.EUCLIDEAN,
distanceResultProperty: 'distance',
distanceThreshold: 0.5,
};

const query = customDatastore
.createQuery('Kind')
.findNearest(vectorOptions);

const [entities] = await customDatastore.runQuery(query);

console.log(entities);
assert.deepEqual(entities, new Vector([1.0, 2.0, 3.0]));
});
});
});
}
);
30 changes: 27 additions & 3 deletions test/entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ import * as sinon from 'sinon';
import {Datastore} from '../src';
import {Entity, entity} from '../src/entity';
import {IntegerTypeCastOptions} from '../src/query';
import {PropertyFilter, EntityFilter, and} from '../src/filter';
import {PropertyFilter, and} from '../src/filter';
import {
entityObject,
expectedEntityProto,
} from './fixtures/entityObjectAndProto';
import {DistanceMeasure} from '../src/vector';

export function outOfBoundsError(opts: {
propertyName?: string;
Expand Down Expand Up @@ -1502,12 +1503,26 @@ describe('entity', () => {
op: 'AND',
},
},
findNearest: {
distanceMeasure: 1,
distanceResultField: 'vector_distance',
limit: 3,
queryVector: [1, 2, 3],
vectorField: 'embedding_field',
},
};

it('should support all configurations of a query', () => {
const ancestorKey = new entity.Key({
path: ['Kind2', 'somename'],
});
const vectorOptions = {
vectorProperty: 'embedding_property',
queryVector: [1.0, 2.0, 3.0],
limit: 3,
distanceMeasure: DistanceMeasure.EUCLIDEAN,
distanceResultProperty: 'vector_distance',
};

const ds = new Datastore({projectId: 'project-id'});

Expand All @@ -1521,7 +1536,8 @@ describe('entity', () => {
.select('name')
.limit(1)
.offset(1)
.hasAncestor(ancestorKey);
.hasAncestor(ancestorKey)
.findNearest(vectorOptions);

assert.deepStrictEqual(testEntity.queryToQueryProto(query), queryProto);
});
Expand Down Expand Up @@ -1589,6 +1605,13 @@ describe('entity', () => {
const ancestorKey = new entity.Key({
path: ['Kind2', 'somename'],
});
const vectorOptions = {
vectorProperty: 'embedding_property',
queryVector: [1.0, 2.0, 3.0],
limit: 3,
distanceMeasure: DistanceMeasure.EUCLIDEAN,
distanceResultProperty: 'vector_distance',
};

const ds = new Datastore({projectId: 'project-id'});

Expand All @@ -1602,7 +1625,8 @@ describe('entity', () => {
.select('name')
.limit(1)
.offset(1)
.hasAncestor(ancestorKey);
.hasAncestor(ancestorKey)
.findNearest(vectorOptions);
assert.deepStrictEqual(testEntity.queryToQueryProto(query), queryProto);
});

Expand Down
Loading
Loading