From 80294c0859b2d7a32691430aa968d2166c00e9cb Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 6 Aug 2024 11:26:26 +0100 Subject: [PATCH 1/2] Fix updating vector index of legacy vectorizer, comment back in missing test for it --- src/collections/config/classes.ts | 2 +- src/collections/config/integration.test.ts | 124 ++++++++++----------- 2 files changed, 57 insertions(+), 69 deletions(-) diff --git a/src/collections/config/classes.ts b/src/collections/config/classes.ts index 0966a84d..6440327a 100644 --- a/src/collections/config/classes.ts +++ b/src/collections/config/classes.ts @@ -38,7 +38,7 @@ export class MergeWithExisting { if (update.vectorizers !== undefined) { if (Array.isArray(update.vectorizers)) { current.vectorConfig = MergeWithExisting.vectors(current.vectorConfig, update.vectorizers); - } else if (supportsNamedVectors) { + } else if (supportsNamedVectors && current.vectorConfig !== undefined) { const updateVectorizers = { ...update.vectorizers, name: 'default', diff --git a/src/collections/config/integration.test.ts b/src/collections/config/integration.test.ts index 546be6df..b5de18a4 100644 --- a/src/collections/config/integration.test.ts +++ b/src/collections/config/integration.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { WeaviateUnsupportedFeatureError } from '../../errors.js'; -import weaviate, { WeaviateClient } from '../../index.js'; +import weaviate, { WeaviateClient, weaviateV2 } from '../../index.js'; import { PropertyConfig, VectorIndexConfigDynamic, VectorIndexConfigHNSW } from './types/index.js'; const fail = (msg: string) => { @@ -526,71 +526,59 @@ describe('Testing of the collection.config namespace', () => { expect(config.multiTenancy.enabled).toEqual(true); }); - // it('should be able update the config of a collection with legacy vectors', async () => { - // const collectionName = 'TestCollectionConfigUpdateLegacyVectors'; - // const collection = await client.collections.create({ - // name: collectionName, - // properties: [ - // { - // name: 'testProp', - // dataType: 'text', - // }, - // ], - // vectorizer: { - // name: 'none', - // config: {}, - // }, - // }); - // const config = await collection.config - // .update({ - // vectorizers: weaviate.reconfigure.vectorIndex.hnsw({ - // quantizer: weaviate.reconfigure.vectorIndex.quantizer.pq(), - // ef: 4, - // }), - // }) - // .then(() => collection.config.get()); - - // expect(config.name).toEqual(collectionName); - // expect(config.properties).toEqual([ - // { - // name: 'testProp', - // dataType: 'text', - // description: undefined, - // indexSearchable: true, - // indexFilterable: true, - // indexInverted: false, - // vectorizerConfig: undefined, - // nestedProperties: undefined, - // tokenization: 'word', - // }, - // ]); - // expect(config.generative).toBeUndefined(); - // expect(config.reranker).toBeUndefined(); - // expect(config.vectorizers.default.indexConfig).toEqual({ - // skip: false, - // cleanupIntervalSeconds: 300, - // maxConnections: 64, - // efConstruction: 128, - // ef: 4, - // dynamicEfMin: 100, - // dynamicEfMax: 500, - // dynamicEfFactor: 8, - // vectorCacheMaxObjects: 1000000000000, - // flatSearchCutoff: 40000, - // distance: 'cosine', - // quantizer: { - // bitCompression: false, - // segments: 0, - // centroids: 256, - // trainingLimit: 100000, - // encoder: { - // type: 'kmeans', - // distribution: 'log-normal', - // }, - // type: 'pq', - // }, - // }); - // expect(config.vectorizers.default.indexType).toEqual('hnsw'); - // expect(config.vectorizers.default.vectorizer.name).toEqual('none'); - // }); + it('should be able update the config of a collection with legacy vectors', async () => { + const clientV2 = weaviateV2.client({ + host: 'http://localhost:8080', + }); + const collectionName = 'TestCollectionConfigUpdateLegacyVectors'; + await clientV2.schema + .classCreator() + .withClass({ + class: collectionName, + vectorizer: 'none', + }) + .do(); + const collection = client.collections.get(collectionName); + const config = await collection.config + .update({ + vectorizers: weaviate.reconfigure.vectorizer.update({ + vectorIndexConfig: weaviate.reconfigure.vectorIndex.hnsw({ + quantizer: weaviate.reconfigure.vectorIndex.quantizer.pq(), + ef: 4, + }), + }), + }) + .then(() => collection.config.get()); + + expect(config.name).toEqual(collectionName); + expect(config.generative).toBeUndefined(); + expect(config.reranker).toBeUndefined(); + expect(config.vectorizers.default.indexConfig).toEqual({ + skip: false, + cleanupIntervalSeconds: 300, + maxConnections: (await client.getWeaviateVersion().then((ver) => ver.isLowerThan(1, 26, 0))) ? 64 : 32, + efConstruction: 128, + ef: 4, + dynamicEfMin: 100, + dynamicEfMax: 500, + dynamicEfFactor: 8, + vectorCacheMaxObjects: 1000000000000, + flatSearchCutoff: 40000, + distance: 'cosine', + type: 'hnsw', + quantizer: { + bitCompression: false, + segments: 0, + centroids: 256, + trainingLimit: 100000, + encoder: { + type: 'kmeans', + distribution: 'log-normal', + }, + type: 'pq', + }, + }); + expect(config.vectorizers.default.indexType).toEqual('hnsw'); + expect(config.vectorizers.default.vectorizer.name).toEqual('none'); + }); }); From a356e68ea9be6d1d5aa4c9b54f676297ee0a67d3 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Tue, 6 Aug 2024 11:26:45 +0100 Subject: [PATCH 2/2] Improve quantizer typings --- src/collections/config/index.ts | 9 +++++-- src/collections/config/types/vectorIndex.ts | 2 ++ src/collections/configure/parsing.ts | 18 ------------- src/collections/configure/vectorIndex.ts | 29 +++++++++------------ 4 files changed, 21 insertions(+), 37 deletions(-) diff --git a/src/collections/config/index.ts b/src/collections/config/index.ts index e7065224..320545c7 100644 --- a/src/collections/config/index.ts +++ b/src/collections/config/index.ts @@ -16,6 +16,8 @@ import { CollectionConfig, CollectionConfigUpdate, PQConfig, + QuantizerConfig, + SQConfig, VectorIndexConfig, VectorIndexConfigDynamic, VectorIndexConfigFlat, @@ -163,12 +165,15 @@ export class VectorIndex { } export class Quantizer { - static isPQ(config?: PQConfig | BQConfig): config is PQConfig { + static isPQ(config?: QuantizerConfig): config is PQConfig { return config?.type === 'pq'; } - static isBQ(config?: PQConfig | BQConfig): config is BQConfig { + static isBQ(config?: QuantizerConfig): config is BQConfig { return config?.type === 'bq'; } + static isSQ(config?: QuantizerConfig): config is SQConfig { + return config?.type === 'sq'; + } } export const configGuards = { diff --git a/src/collections/config/types/vectorIndex.ts b/src/collections/config/types/vectorIndex.ts index 85e77e42..8c845216 100644 --- a/src/collections/config/types/vectorIndex.ts +++ b/src/collections/config/types/vectorIndex.ts @@ -73,3 +73,5 @@ export type PQEncoderDistribution = 'log-normal' | 'normal'; export type VectorIndexType = 'hnsw' | 'flat' | 'dynamic' | string; export type VectorIndexConfig = VectorIndexConfigHNSW | VectorIndexConfigFlat | VectorIndexConfigDynamic; + +export type QuantizerConfig = PQConfig | BQConfig | SQConfig; diff --git a/src/collections/configure/parsing.ts b/src/collections/configure/parsing.ts index 3a1bae20..09319424 100644 --- a/src/collections/configure/parsing.ts +++ b/src/collections/configure/parsing.ts @@ -39,21 +39,3 @@ export class QuantizerGuards { export function parseWithDefault(value: D | undefined, defaultValue: D): D { return value !== undefined ? value : defaultValue; } - -export const parseQuantizer = (config?: T): T | undefined => { - if (config === undefined) { - return undefined; - } - if (QuantizerGuards.isPQCreate(config)) { - return { - ...config, - type: 'pq', - } as T; - } else if (QuantizerGuards.isBQCreate(config)) { - return { - ...config, - type: 'bq', - } as T; - } - return config; -}; diff --git a/src/collections/configure/vectorIndex.ts b/src/collections/configure/vectorIndex.ts index 7783a841..9f055677 100644 --- a/src/collections/configure/vectorIndex.ts +++ b/src/collections/configure/vectorIndex.ts @@ -16,8 +16,6 @@ import { VectorIndexConfigHNSWUpdate, } from './types/index.js'; -import { parseQuantizer } from './parsing.js'; - const isModuleConfig = (config: ModuleConfig | C): config is ModuleConfig => { return config && typeof config === 'object' && 'name' in config && 'config' in config; }; @@ -40,7 +38,7 @@ const configure = { config: { distance, vectorCacheMaxObjects, - quantizer: parseQuantizer(quantizer), + quantizer: quantizer, }, }; }, @@ -62,7 +60,7 @@ const configure = { ? { ...rest, distance: distanceMetric, - quantizer: parseQuantizer(rest.quantizer), + quantizer: rest.quantizer, } : undefined, }; @@ -177,10 +175,7 @@ const reconfigure = { }): ModuleConfig<'flat', VectorIndexConfigFlatUpdate> => { return { name: 'flat', - config: { - vectorCacheMaxObjects: options.vectorCacheMaxObjects, - quantizer: parseQuantizer(options.quantizer), - }, + config: options, }; }, /** @@ -221,8 +216,8 @@ const reconfigure = { * NOTE: If the vector index already has a quantizer configured, you cannot change its quantizer type; only its values. * So if you want to change the quantizer type, you must recreate the collection. * - * @param {boolean} [options.cache] Whether to cache the quantizer. Default is false. - * @param {number} [options.rescoreLimit] The rescore limit. Default is 1000. + * @param {boolean} [options.cache] Whether to cache the quantizer. + * @param {number} [options.rescoreLimit] The new rescore limit. * @returns {BQConfigCreate} The configuration object. */ bq: (options?: { cache?: boolean; rescoreLimit?: number }): BQConfigUpdate => { @@ -237,11 +232,11 @@ const reconfigure = { * NOTE: If the vector index already has a quantizer configured, you cannot change its quantizer type; only its values. * So if you want to change the quantizer type, you must recreate the collection. * - * @param {number} [options.centroids] The number of centroids. Default is 256. - * @param {PQEncoderDistribution} [options.pqEncoderDistribution] The encoder distribution. Default is 'log-normal'. - * @param {PQEncoderType} [options.pqEncoderType] The encoder type. Default is 'kmeans'. - * @param {number} [options.segments] The number of segments. Default is 0. - * @param {number} [options.trainingLimit] The training limit. Default is 100000. + * @param {number} [options.centroids] The new number of centroids. + * @param {PQEncoderDistribution} [options.pqEncoderDistribution] The new encoder distribution. + * @param {PQEncoderType} [options.pqEncoderType] The new encoder type. + * @param {number} [options.segments] The new number of segments. + * @param {number} [options.trainingLimit] The new training limit. * @returns {PQConfigUpdate} The configuration object. */ pq: (options?: { @@ -270,8 +265,8 @@ const reconfigure = { * NOTE: If the vector index already has a quantizer configured, you cannot change its quantizer type; only its values. * So if you want to change the quantizer type, you must recreate the collection. * - * @param {number} [options.rescoreLimit] The rescore limit. Default is 1000. - * @param {number} [options.trainingLimit] The training limit. Default is 100000. + * @param {number} [options.rescoreLimit] The rescore limit. + * @param {number} [options.trainingLimit] The training limit. * @returns {SQConfigUpdate} The configuration object. */ sq: (options?: { rescoreLimit?: number; trainingLimit?: number }): SQConfigUpdate => {