From 2ef9c38b8b82be5289bd4cef3ec73a1553e07e2d Mon Sep 17 00:00:00 2001 From: Hossam Hagag <90828745+hh-space-invader@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:25:35 +0200 Subject: [PATCH] Tsk 409 support gte models (#415) * new: Added support for gte base model * tests: Added test cannonical vectors for gte model --- fastembed/text/pooled_normalized_embedding.py | 9 +++++++++ tests/test_text_onnx_embeddings.py | 1 + 2 files changed, 10 insertions(+) diff --git a/fastembed/text/pooled_normalized_embedding.py b/fastembed/text/pooled_normalized_embedding.py index 70660420..47ec91a1 100644 --- a/fastembed/text/pooled_normalized_embedding.py +++ b/fastembed/text/pooled_normalized_embedding.py @@ -75,6 +75,15 @@ "sources": {"hf": "jinaai/jina-embeddings-v2-base-es"}, "model_file": "onnx/model.onnx", }, + { + "model": "thenlper/gte-base", + "dim": 768, + "description": "General text embeddings, Unimodal (text), supports English only input text, 512 input tokens truncation, Prefixes for queries/documents: not necessary, 2024 year.", + "license": "mit", + "size_in_GB": 0.44, + "sources": {"hf": "thenlper/gte-base"}, + "model_file": "onnx/model.onnx", + }, ] diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py index f576330c..a40794be 100644 --- a/tests/test_text_onnx_embeddings.py +++ b/tests/test_text_onnx_embeddings.py @@ -64,6 +64,7 @@ ), "snowflake/snowflake-arctic-embed-l": np.array([0.0189, -0.0673, 0.0183, 0.0124, 0.0146]), "Qdrant/clip-ViT-B-32-text": np.array([0.0083, 0.0103, -0.0138, 0.0199, -0.0069]), + "thenlper/gte-base": np.array([0.0038, 0.0355, 0.0181, 0.0092, 0.0654]), }