From 2bdeb79f1d9a64588bb62659d770dce25fe1a4a3 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 9 May 2024 10:27:09 -0700 Subject: [PATCH 1/5] Add support for mistral --- .../classes/config_named_vectors.py | 39 +++++++++++++++++++ .../collections/classes/config_vectorizers.py | 26 +++++++++++++ weaviate/connect/integrations.py | 31 +++++++++++++-- 3 files changed, 92 insertions(+), 4 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 13b3e94e1..383c9fabe 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -29,6 +29,7 @@ _Text2VecGPT4AllConfigCreate, _Text2VecHuggingFaceConfigCreate, _Text2VecJinaConfigCreate, + _Text2VecMistralConfig, _Text2VecOctoConfig, _Text2VecOllamaConfig, _Text2VecOpenAIConfigCreate, @@ -225,6 +226,44 @@ def text2vec_contextionary( vector_index_config=vector_index_config, ) + @staticmethod + def text2vec_mistral( + name: str, + *, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + model: Optional[str] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `text2vec-mistral` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-mistral) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `source_properties` + Which properties should be included when vectorizing. By default all text properties are included. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + """ + return _NamedVectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecMistralConfig( + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=vector_index_config, + ) + @staticmethod def text2vec_octoai( name: str, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 23667226f..51be1a1b5 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -92,6 +92,7 @@ class Vectorizers(str, Enum): TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary" TEXT2VEC_GPT4ALL = "text2vec-gpt4all" TEXT2VEC_HUGGINGFACE = "text2vec-huggingface" + TEXT2VEC_MISTRAL = "text2vec-mistral" TEXT2VEC_OCTOAI = "text2vec-octoai" TEXT2VEC_OLLAMA = "text2vec-ollama" TEXT2VEC_OPENAI = "text2vec-openai" @@ -230,6 +231,12 @@ class _Text2VecHuggingFaceConfigCreate(_Text2VecHuggingFaceConfig, _VectorizerCo pass +class _Text2VecMistralConfig(_VectorizerConfigCreate): + vectorizer: Vectorizers = Field(default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True) + model: Optional[str] + vectorizeClassName: bool + + OpenAIType = Literal["text", "code"] @@ -805,6 +812,25 @@ def text2vec_huggingface( vectorizeClassName=vectorize_collection_name, ) + @staticmethod + def text2vec_mistral( + *, + model: Optional[str] = None, + vectorize_collection_name: bool = True, + ) -> _VectorizerConfigCreate: + """Create a `_Text2VecMistralConfig` object for use when vectorizing using the `text2vec-mistral` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-mistral) + for detailed usage. + + Arguments: + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + """ + return _Text2VecMistralConfig(model=model, vectorizeClassName=vectorize_collection_name) + @staticmethod def text2vec_octoai( *, diff --git a/weaviate/connect/integrations.py b/weaviate/connect/integrations.py index 2bdd08650..a0db5363f 100644 --- a/weaviate/connect/integrations.py +++ b/weaviate/connect/integrations.py @@ -73,6 +73,16 @@ class _IntegrationConfigJina(_IntegrationConfig): base_url: Optional[str] = Field(serialization_alias="X-Jinaai-Baseurl") +class _IntegrationConfigMistral(_IntegrationConfig): + api_key: str = Field(serialization_alias="X-Mistral-Api-Key") + request_per_minute_embeddings: Optional[int] = Field( + serialization_alias="X-Mistral-Ratelimit-RequestPM-Embedding" + ) + tokens_per_minute_embeddings: Optional[int] = Field( + serialization_alias="X-Mistral-Ratelimit-TokenPM-Embedding" + ) + + class _IntegrationConfigOcto(_IntegrationConfig): api_key: str = Field(serialization_alias="X-OctoAI-Api-Key") requests_per_minute_embeddings: Optional[int] = Field( @@ -115,7 +125,7 @@ def openai( requests_per_minute_embeddings: Optional[int] = None, tokens_per_minute_embeddings: Optional[int] = None, organization: Optional[str] = None, - base_url: Optional[str] = None + base_url: Optional[str] = None, ) -> _IntegrationConfig: return _IntegrationConfigOpenAi( api_key=api_key, @@ -147,7 +157,7 @@ def voyageai( api_key: str, requests_per_minute_embeddings: Optional[int] = None, tokens_per_minute_embeddings: Optional[int] = None, - base_url: Optional[str] = None + base_url: Optional[str] = None, ) -> _IntegrationConfig: return _IntegrationConfigVoyage( api_key=api_key, @@ -161,7 +171,7 @@ def jinaai( *, api_key: str, requests_per_minute_embeddings: Optional[int] = None, - base_url: Optional[str] = None + base_url: Optional[str] = None, ) -> _IntegrationConfig: return _IntegrationConfigJina( api_key=api_key, @@ -174,10 +184,23 @@ def octoai( *, api_key: str, requests_per_minute_embeddings: Optional[int] = None, - base_url: Optional[str] = None + base_url: Optional[str] = None, ) -> _IntegrationConfig: return _IntegrationConfigOcto( api_key=api_key, requests_per_minute_embeddings=requests_per_minute_embeddings, base_url=base_url, ) + + @staticmethod + def mistral( + *, + api_key: str, + request_per_minute_embeddings: Optional[int] = None, + tokens_per_minute_embeddings: Optional[int] = None, + ) -> _IntegrationConfig: + return _IntegrationConfigMistral( + api_key=api_key, + request_per_minute_embeddings=request_per_minute_embeddings, + tokens_per_minute_embeddings=tokens_per_minute_embeddings, + ) From 7affa78b486f60d947a06942db758ca1b10febf7 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 9 May 2024 10:44:34 -0700 Subject: [PATCH 2/5] Linter --- weaviate/connect/integrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/connect/integrations.py b/weaviate/connect/integrations.py index a0db5363f..40550aa29 100644 --- a/weaviate/connect/integrations.py +++ b/weaviate/connect/integrations.py @@ -97,7 +97,7 @@ def cohere( *, api_key: str, base_url: Optional[str] = None, - requests_per_minute_embeddings: Optional[int] = None + requests_per_minute_embeddings: Optional[int] = None, ) -> _IntegrationConfig: return _IntegrationConfigCohere( api_key=api_key, From e0c4df9f3e4fb7e5a3b4cd3084476545089eafad Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 19 Aug 2024 17:21:59 +0200 Subject: [PATCH 3/5] Add tests --- test/collection/test_config.py | 26 +++++++++++++++++++ test/collection/test_vectorizer.py | 5 ++++ .../collections/classes/config_vectorizers.py | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 test/collection/test_vectorizer.py diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 7ba07248e..5bc653849 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -250,6 +250,18 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.text2vec_mistral( + vectorize_collection_name=False, + model="cool-model", + ), + { + "text2vec-mistral": { + "vectorizeClassName": False, + "model": "cool-model", + } + }, + ), ( Configure.Vectorizer.text2vec_palm( project_id="project", @@ -1185,6 +1197,20 @@ def test_vector_config_flat_pq() -> None: } }, ), + ( + [Configure.NamedVectors.text2vec_mistral(name="test", source_properties=["prop"])], + { + "test": { + "vectorizer": { + "text2vec-mistral": { + "vectorizeClassName": True, + "properties": ["prop"], + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [ Configure.NamedVectors.text2vec_palm( diff --git a/test/collection/test_vectorizer.py b/test/collection/test_vectorizer.py new file mode 100644 index 000000000..7b5d0fab9 --- /dev/null +++ b/test/collection/test_vectorizer.py @@ -0,0 +1,5 @@ +from weaviate.collections.classes.config import Configure + + +def test_multi2vec_clip() -> None: + Configure.Vectorizer.multi2vec_clip(image_fields=["test"]) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 51be1a1b5..fc07cff37 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -232,7 +232,7 @@ class _Text2VecHuggingFaceConfigCreate(_Text2VecHuggingFaceConfig, _VectorizerCo class _Text2VecMistralConfig(_VectorizerConfigCreate): - vectorizer: Vectorizers = Field(default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True) + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True) model: Optional[str] vectorizeClassName: bool From c6def217469307d38f81baf5921e56050aed293a Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 19 Aug 2024 17:27:55 +0200 Subject: [PATCH 4/5] formatter --- weaviate/collections/classes/config_vectorizers.py | 4 +++- weaviate/connect/integrations.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index fc07cff37..add9c9d3f 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -232,7 +232,9 @@ class _Text2VecHuggingFaceConfigCreate(_Text2VecHuggingFaceConfig, _VectorizerCo class _Text2VecMistralConfig(_VectorizerConfigCreate): - vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True) + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.TEXT2VEC_MISTRAL, frozen=True, exclude=True + ) model: Optional[str] vectorizeClassName: bool diff --git a/weaviate/connect/integrations.py b/weaviate/connect/integrations.py index 40550aa29..3093e00eb 100644 --- a/weaviate/connect/integrations.py +++ b/weaviate/connect/integrations.py @@ -110,7 +110,7 @@ def huggingface( *, api_key: str, requests_per_minute_embeddings: Optional[int] = None, - base_url: Optional[str] = None + base_url: Optional[str] = None, ) -> _IntegrationConfig: return _IntegrationConfigHuggingface( api_key=api_key, From a06d6bdd1db27140bff36c33d83edbdc3a0c52f6 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 29 Aug 2024 11:09:02 +0200 Subject: [PATCH 5/5] Fix docstring --- weaviate/collections/classes/config_named_vectors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 383c9fabe..2dd923bfe 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -247,8 +247,6 @@ def text2vec_mistral( Which properties should be included when vectorizing. By default all text properties are included. `vector_index_config` The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default - `vectorize_collection_name` - Whether to vectorize the collection name. Defaults to `True`. `model` The model to use. Defaults to `None`, which uses the server-defined default. `vectorize_collection_name`