From 9e531a392bd7fb2e49ec553b89da1873af3dfef7 Mon Sep 17 00:00:00 2001 From: Prathamesh Date: Tue, 25 Jun 2024 00:55:40 +0530 Subject: [PATCH 1/2] Added reranking via model_gateway class --- backend/Dockerfile | 9 +-- .../modules/model_gateway/model_gateway.py | 43 +++++++++++- .../reranker_svc.py | 8 +-- backend/modules/query_controllers/__init__.py | 2 +- .../query_controllers/example/controller.py | 46 ++++--------- .../query_controllers/example/payload.py | 22 +++---- .../query_controllers/example/types.py | 23 +------ .../multimodal/controller.py | 57 ++++------------ .../query_controllers/multimodal/payload.py | 60 +++++------------ .../query_controllers/multimodal/types.py | 23 +------ backend/modules/rerankers/__init__.py | 0 backend/server/routers/internal.py | 2 + backend/types.py | 2 + compose.env | 5 +- docker-compose.yaml | 6 +- models_config.sample.yaml | 66 ++++++++++--------- models_config.truefoundry.yaml | 29 +++++--- 17 files changed, 167 insertions(+), 236 deletions(-) rename backend/modules/{rerankers => model_gateway}/reranker_svc.py (93%) delete mode 100644 backend/modules/rerankers/__init__.py diff --git a/backend/Dockerfile b/backend/Dockerfile index 0d95e9d8..605387c6 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -33,12 +33,9 @@ RUN if [ "${ADD_VECTORDB}" = "1" ]; then python3 -m pip install --use-pep517 --n ARG ADD_PRISMA=0 RUN if [ "${ADD_PRISMA}" = "1" ]; then prisma version; fi -# TODO (chiragjn): These should be removed from here and directly added as environment variables -# Temporary addition until templates have been updated using build args as environment variables -ARG ADD_RERANKER_SVC_URL="" -ENV RERANKER_SVC_URL=${ADD_RERANKER_SVC_URL} -ARG ADD_EMBEDDING_SVC_URL="" -ENV EMBEDDING_SVC_URL=${ADD_EMBEDDING_SVC_URL} +# TODO: Remove this when templates inject env vars +ARG MODELS_CONFIG_PATH +ENV MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH} # Copy the project files COPY . /app diff --git a/backend/modules/model_gateway/model_gateway.py b/backend/modules/model_gateway/model_gateway.py index 65ec9717..98489cfc 100644 --- a/backend/modules/model_gateway/model_gateway.py +++ b/backend/modules/model_gateway/model_gateway.py @@ -11,6 +11,8 @@ from backend.settings import settings from backend.types import ModelConfig, ModelProviderConfig, ModelType +from .reranker_svc import InfinityRerankerSvc + class ModelGateway: provider_configs: List[ModelProviderConfig] @@ -20,8 +22,9 @@ def __init__(self): logger.info(f"Loading models config from {settings.MODELS_CONFIG_PATH}") with open(settings.MODELS_CONFIG_PATH) as f: data = yaml.safe_load(f) - print(data) + logger.info(f"Loaded models config: {data}") _providers = data.get("model_providers") or [] + # parse the json data into a list of ModelProviderConfig objects self.provider_configs = [ ModelProviderConfig.parse_obj(item) for item in _providers @@ -32,6 +35,9 @@ def __init__(self): # load embedding models self.embedding_models: List[ModelConfig] = [] + # load reranker models + self.reranker_models: List[ModelConfig] = [] + for provider_config in self.provider_configs: if provider_config.api_key_env_var and not os.environ.get( provider_config.api_key_env_var @@ -65,12 +71,27 @@ def __init__(self): ) ) + for model_id in provider_config.reranking_model_ids: + model_name = f"{provider_config.provider_name}/{model_id}" + self.model_name_to_provider_config[model_name] = provider_config + + # Register the model as a reranker model + self.reranker_models.append( + ModelConfig( + name=f"{provider_config.provider_name}/{model_id}", + type=ModelType.reranking, + ) + ) + def get_embedding_models(self) -> List[ModelConfig]: return self.embedding_models def get_llm_models(self) -> List[ModelConfig]: return self.llm_models + def get_reranker_models(self) -> List[ModelConfig]: + return self.reranker_models + def get_embedder_from_model_config(self, model_name: str) -> Embeddings: if model_name not in self.model_name_to_provider_config: raise ValueError(f"Model {model_name} not registered in the model gateway.") @@ -116,5 +137,25 @@ def get_llm_from_model_config( base_url=model_provider_config.base_url, ) + def get_reranker_from_model_config(self, model_name: str, top_k: int = 3): + if model_name not in self.model_name_to_provider_config: + raise ValueError(f"Model {model_name} not registered in the model gateway.") + + model_provider_config: ModelProviderConfig = self.model_name_to_provider_config[ + model_name + ] + if not model_provider_config.api_key_env_var: + api_key = "EMPTY" + else: + api_key = os.environ.get(model_provider_config.api_key_env_var, "") + model_id = "/".join(model_name.split("/")[1:]) + + return InfinityRerankerSvc( + model=model_id, + api_key=api_key, + base_url=model_provider_config.base_url, + top_k=top_k, + ) + model_gateway = ModelGateway() diff --git a/backend/modules/rerankers/reranker_svc.py b/backend/modules/model_gateway/reranker_svc.py similarity index 93% rename from backend/modules/rerankers/reranker_svc.py rename to backend/modules/model_gateway/reranker_svc.py index 4b00f0ce..f3488b66 100644 --- a/backend/modules/rerankers/reranker_svc.py +++ b/backend/modules/model_gateway/reranker_svc.py @@ -6,7 +6,6 @@ from langchain.retrievers.document_compressors.base import BaseDocumentCompressor from backend.logger import logger -from backend.settings import settings # Reranking Service using Infinity API @@ -17,8 +16,9 @@ class InfinityRerankerSvc(BaseDocumentCompressor): """ model: str - top_k: int = 3 - url = settings.RERANKER_SVC_URL + top_k: int + base_url: str + api_key: str def compress_documents( self, @@ -37,7 +37,7 @@ def compress_documents( } reranked_docs = requests.post( - self.url.rstrip("/") + "/rerank", json=payload + self.base_url.rstrip("/") + "/rerank", json=payload ).json() """ diff --git a/backend/modules/query_controllers/__init__.py b/backend/modules/query_controllers/__init__.py index ace80f93..145cfacc 100644 --- a/backend/modules/query_controllers/__init__.py +++ b/backend/modules/query_controllers/__init__.py @@ -4,5 +4,5 @@ ) from backend.modules.query_controllers.query_controller import register_query_controller -register_query_controller("default", BasicRAGQueryController) +register_query_controller("basic-rag", BasicRAGQueryController) register_query_controller("multimodal", MultiModalRAGQueryController) diff --git a/backend/modules/query_controllers/example/controller.py b/backend/modules/query_controllers/example/controller.py index 5ef2b1df..03f58b70 100644 --- a/backend/modules/query_controllers/example/controller.py +++ b/backend/modules/query_controllers/example/controller.py @@ -22,28 +22,18 @@ GENERATION_TIMEOUT_SEC, ExampleQueryInput, ) -from backend.modules.rerankers.reranker_svc import InfinityRerankerSvc + +# from backend.modules.rerankers.reranker_svc import InfinityRerankerSvc from backend.modules.vector_db.client import VECTOR_STORE_CLIENT from backend.server.decorators import post, query_controller -from backend.settings import settings from backend.types import Collection, ModelConfig EXAMPLES = { "vector-store-similarity": QUERY_WITH_VECTOR_STORE_RETRIEVER_PAYLOAD, + "contextual-compression-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_PAYLOAD, + "contextual-compression-multi-query-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD, } -if settings.RERANKER_SVC_URL: - EXAMPLES.update( - { - "contextual-compression-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_PAYLOAD, - } - ) - EXAMPLES.update( - { - "contextual-compression-multi-query-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD, - } - ) - @query_controller("/basic-rag") class BasicRAGQueryController: @@ -111,26 +101,18 @@ def _get_contextual_compression_retriever(self, vector_store, retriever_config): Get the contextual compression retriever """ try: - if settings.RERANKER_SVC_URL: - retriever = self._get_vector_store_retriever( - vector_store, retriever_config - ) - logger.info("Using MxBaiRerankerSmall th' service...") - compressor = InfinityRerankerSvc( - top_k=retriever_config.top_k, - model=retriever_config.compressor_model_name, - ) + retriever = self._get_vector_store_retriever(vector_store, retriever_config) + logger.info("Using MxBaiRerankerSmall th' service...") - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever - ) + compressor = model_gateway.get_reranker_from_model_config( + model_name=retriever_config.compressor_model_name, + top_k=retriever_config.top_k, + ) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) - return compression_retriever - else: - raise HTTPException( - status_code=500, - detail="Reranker service is not available", - ) + return compression_retriever except Exception as e: logger.error(f"Error in getting contextual compression retriever: {e}") raise HTTPException( diff --git a/backend/modules/query_controllers/example/payload.py b/backend/modules/query_controllers/example/payload.py index 171bff15..df88f52c 100644 --- a/backend/modules/query_controllers/example/payload.py +++ b/backend/modules/query_controllers/example/payload.py @@ -84,8 +84,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity", "search_kwargs": {"k": 10}, @@ -113,8 +112,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "mmr", "search_kwargs": { @@ -130,7 +128,7 @@ "description": """ Requires k and fetch_k in search kwargs for mmr. search_type can either be similarity or mmr or similarity_score_threshold. - Currently only support for mixedbread-ai/mxbai-rerank-xsmall-v1 reranker is added.""", + Currently only support for local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1 reranker is added.""", "value": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_SEARCH_TYPE_MMR, } @@ -147,8 +145,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity_score_threshold", "search_kwargs": {"score_threshold": 0.7}, @@ -161,7 +158,7 @@ "description": """ Requires score_threshold float (0~1) in search kwargs for similarity search. search_type can either be similarity or mmr or similarity_score_threshold. - Currently only support for mixedbread-ai/mxbai-rerank-xsmall-v1 reranker is added""", + Currently only support for local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1 reranker is added""", "value": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_SEARCH_TYPE_SIMILARITY_WITH_SCORE, } @@ -273,8 +270,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "mmr", "search_kwargs": { @@ -309,8 +305,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity", "search_kwargs": {"k": 10}, @@ -343,8 +338,7 @@ "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity_score_threshold", "search_kwargs": {"score_threshold": 0.7}, diff --git a/backend/modules/query_controllers/example/types.py b/backend/modules/query_controllers/example/types.py index c677f5ea..0dbe2741 100644 --- a/backend/modules/query_controllers/example/types.py +++ b/backend/modules/query_controllers/example/types.py @@ -73,10 +73,6 @@ class MultiQueryRetrieverConfig(VectorStoreRetrieverConfig): class ContextualCompressionRetrieverConfig(VectorStoreRetrieverConfig): - compressor_model_provider: str = Field( - title="provider of the compressor model", - ) - compressor_model_name: str = Field( title="model name of the compressor", ) @@ -85,14 +81,7 @@ class ContextualCompressionRetrieverConfig(VectorStoreRetrieverConfig): title="Top K docs to collect post compression", ) - allowed_compressor_model_providers: ClassVar[Collection[str]] = ("mixedbread-ai",) - - @validator("compressor_model_provider") - def validate_retriever_type(cls, value) -> Dict: - assert ( - value in cls.allowed_compressor_model_providers - ), f"Compressor model of {value} not allowed. Valid values are: {cls.allowed_compressor_model_providers}" - return value + allowed_compressor_model_providers: ClassVar[Collection[str]] class ContextualCompressionMultiQueryRetrieverConfig( @@ -101,10 +90,6 @@ class ContextualCompressionMultiQueryRetrieverConfig( pass -class LordOfRetrievers(ContextualCompressionRetrieverConfig, MultiQueryRetrieverConfig): - pass - - class ExampleQueryInput(BaseModel): """ Model for Query input. @@ -137,7 +122,6 @@ class ExampleQueryInput(BaseModel): "multi-query", "contextual-compression", "contextual-compression-multi-query", - "lord-of-the-retrievers", ) stream: Optional[bool] = Field(title="Stream the results", default=False) @@ -170,9 +154,4 @@ def validate_retriever_type(cls, values: Dict) -> Dict: **values.get("retriever_config") ) - elif retriever_name == "lord-of-the-retrievers": - values["retriever_config"] = LordOfRetrievers( - **values.get("retriever_config") - ) - return values diff --git a/backend/modules/query_controllers/multimodal/controller.py b/backend/modules/query_controllers/multimodal/controller.py index 75ca0533..23901d19 100644 --- a/backend/modules/query_controllers/multimodal/controller.py +++ b/backend/modules/query_controllers/multimodal/controller.py @@ -23,29 +23,16 @@ GENERATION_TIMEOUT_SEC, ExampleQueryInput, ) -from backend.modules.rerankers.reranker_svc import InfinityRerankerSvc from backend.modules.vector_db.client import VECTOR_STORE_CLIENT from backend.server.decorators import post, query_controller -from backend.settings import settings from backend.types import Collection, ModelConfig EXAMPLES = { "vector-store-similarity": QUERY_WITH_VECTOR_STORE_RETRIEVER_PAYLOAD, + "contextual-compression-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_PAYLOAD, + "contextual-compression-multi-query-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD, } -if settings.RERANKER_SVC_URL: - EXAMPLES.update( - { - "contextual-compression-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_PAYLOAD, - } - ) - - EXAMPLES.update( - { - "contextual-compression-multi-query-similarity": QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD, - } - ) - @query_controller("/multimodal-rag") class MultiModalRAGQueryController: @@ -73,17 +60,6 @@ def _format_docs_for_stream(self, docs): ) return formatted_docs - # def _format_docs_for_stream_v2(self, docs): - # formatted_docs = list() - # # docs is a list of list of document objects - # for doc in docs: - # for pages in doc: - # pages.metadata.pop("image_b64", None) - # formatted_docs.append( - # {"page_content": pages.page_content, "metadata": pages.metadata} - # ) - # return formatted_docs - def _get_llm(self, model_configuration: ModelConfig, stream=False): """ Get the LLM @@ -124,27 +100,18 @@ def _get_contextual_compression_retriever(self, vector_store, retriever_config): Get the contextual compression retriever """ try: - if settings.RERANKER_SVC_URL: - retriever = self._get_vector_store_retriever( - vector_store, retriever_config - ) - logger.info("Using MxBaiRerankerSmall th' service...") - compressor = InfinityRerankerSvc( - top_k=retriever_config.top_k, - model=retriever_config.compressor_model_name, - ) - - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever - ) + retriever = self._get_vector_store_retriever(vector_store, retriever_config) + logger.info("Using MxBaiRerankerSmall th' service...") - return compression_retriever - else: - raise HTTPException( - status_code=500, - detail="Reranker service is not available", - ) + compressor = model_gateway.get_reranker_from_model_config( + model_name=retriever_config.compressor_model_name, + top_k=retriever_config.top_k, + ) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + return compression_retriever except Exception as e: logger.error(f"Error in getting contextual compression retriever: {e}") raise HTTPException( diff --git a/backend/modules/query_controllers/multimodal/payload.py b/backend/modules/query_controllers/multimodal/payload.py index c1ea36bd..8dd6eb70 100644 --- a/backend/modules/query_controllers/multimodal/payload.py +++ b/backend/modules/query_controllers/multimodal/payload.py @@ -4,13 +4,12 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "vectorstore", "retriever_config": {"search_type": "similarity", "search_kwargs": {"k": 5}}, - "stream": True, + "stream": False, } QUERY_WITH_VECTOR_STORE_RETRIEVER_PAYLOAD = { @@ -27,7 +26,6 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, @@ -39,7 +37,7 @@ "fetch_k": 7, }, }, - "stream": True, + "stream": False, } QUERY_WITH_VECTOR_STORE_RETRIEVER_MMR_PAYLOAD = { @@ -56,7 +54,6 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, @@ -65,7 +62,7 @@ "search_type": "similarity_score_threshold", "search_kwargs": {"score_threshold": 0.7}, }, - "stream": True, + "stream": False, } QUERY_WITH_VECTOR_STORE_RETRIEVER_SIMILARITY_SCORE_PAYLOAD = { @@ -82,19 +79,17 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity", "search_kwargs": {"k": 10}, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_PAYLOAD = { @@ -112,14 +107,12 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "mmr", "search_kwargs": { @@ -127,7 +120,7 @@ "fetch_k": 30, }, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_SEARCH_TYPE_MMR_PAYLOAD = { @@ -147,19 +140,17 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity_score_threshold", "search_kwargs": {"score_threshold": 0.7}, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_RETRIEVER_SEARCH_TYPE_SIMILARITY_WITH_SCORE_PAYLOAD = { @@ -178,7 +169,6 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, @@ -188,11 +178,10 @@ "search_kwargs": {"k": 5}, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.9}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD = { @@ -210,7 +199,6 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, @@ -223,11 +211,10 @@ }, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.9}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_MULTI_QUERY_RETRIEVER_MMR_PAYLOAD = { @@ -244,7 +231,6 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, @@ -254,11 +240,10 @@ "search_kwargs": {"score_threshold": 0.7}, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.9}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_MULTI_QUERY_RETRIEVER_SIMILARITY_SCORE_PAYLOAD = { @@ -277,14 +262,12 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "mmr", "search_kwargs": { @@ -293,11 +276,10 @@ }, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.9}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_MMR_PAYLOAD = { @@ -315,24 +297,21 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity", "search_kwargs": {"k": 10}, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_PAYLOAD = { @@ -350,24 +329,21 @@ "query": "Explain key operating metrics of FY20", "model_configuration": { "name": "truefoundry/openai-main/gpt-4-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, "prompt_template": PROMPT, "retriever_name": "contextual-compression-multi-query", "retriever_config": { - "compressor_model_provider": "mixedbread-ai", - "compressor_model_name": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "compressor_model_name": "local-infinity/mixedbread-ai/mxbai-rerank-xsmall-v1", "top_k": 5, "search_type": "similarity_score_threshold", "search_kwargs": {"score_threshold": 0.7}, "retriever_llm_configuration": { "name": "truefoundry/openai-main/gpt-3-5-turbo", - "provider": "truefoundry", "parameters": {"temperature": 0.1}, }, }, - "stream": True, + "stream": False, } QUERY_WITH_CONTEXTUAL_COMPRESSION_MULTI_QUERY_RETRIEVER_SIMILARITY_SCORE_PAYLOAD = { diff --git a/backend/modules/query_controllers/multimodal/types.py b/backend/modules/query_controllers/multimodal/types.py index c677f5ea..0dbe2741 100644 --- a/backend/modules/query_controllers/multimodal/types.py +++ b/backend/modules/query_controllers/multimodal/types.py @@ -73,10 +73,6 @@ class MultiQueryRetrieverConfig(VectorStoreRetrieverConfig): class ContextualCompressionRetrieverConfig(VectorStoreRetrieverConfig): - compressor_model_provider: str = Field( - title="provider of the compressor model", - ) - compressor_model_name: str = Field( title="model name of the compressor", ) @@ -85,14 +81,7 @@ class ContextualCompressionRetrieverConfig(VectorStoreRetrieverConfig): title="Top K docs to collect post compression", ) - allowed_compressor_model_providers: ClassVar[Collection[str]] = ("mixedbread-ai",) - - @validator("compressor_model_provider") - def validate_retriever_type(cls, value) -> Dict: - assert ( - value in cls.allowed_compressor_model_providers - ), f"Compressor model of {value} not allowed. Valid values are: {cls.allowed_compressor_model_providers}" - return value + allowed_compressor_model_providers: ClassVar[Collection[str]] class ContextualCompressionMultiQueryRetrieverConfig( @@ -101,10 +90,6 @@ class ContextualCompressionMultiQueryRetrieverConfig( pass -class LordOfRetrievers(ContextualCompressionRetrieverConfig, MultiQueryRetrieverConfig): - pass - - class ExampleQueryInput(BaseModel): """ Model for Query input. @@ -137,7 +122,6 @@ class ExampleQueryInput(BaseModel): "multi-query", "contextual-compression", "contextual-compression-multi-query", - "lord-of-the-retrievers", ) stream: Optional[bool] = Field(title="Stream the results", default=False) @@ -170,9 +154,4 @@ def validate_retriever_type(cls, values: Dict) -> Dict: **values.get("retriever_config") ) - elif retriever_name == "lord-of-the-retrievers": - values["retriever_config"] = LordOfRetrievers( - **values.get("retriever_config") - ) - return values diff --git a/backend/modules/rerankers/__init__.py b/backend/modules/rerankers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/server/routers/internal.py b/backend/server/routers/internal.py index 1a9534e5..d22b9645 100644 --- a/backend/server/routers/internal.py +++ b/backend/server/routers/internal.py @@ -104,6 +104,8 @@ def get_enabled_models( enabled_models = model_gateway.get_embedding_models() elif model_type == ModelType.chat: enabled_models = model_gateway.get_llm_models() + elif model_type == ModelType.reranking: + enabled_models = model_gateway.get_reranker_models() else: raise HTTPException( status_code=400, diff --git a/backend/types.py b/backend/types.py index a9728652..918c0751 100644 --- a/backend/types.py +++ b/backend/types.py @@ -97,6 +97,7 @@ class ModelType(str, Enum): chat = "chat" embedding = "embedding" + reranking = "reranking" class ModelConfig(BaseModel): @@ -117,6 +118,7 @@ class ModelProviderConfig(BaseModel): api_format: str llm_model_ids: List[str] embedding_model_ids: List[str] + reranking_model_ids: List[str] api_key_env_var: str base_url: Optional[str] = None diff --git a/compose.env b/compose.env index 89506edd..6901c226 100644 --- a/compose.env +++ b/compose.env @@ -11,15 +11,15 @@ OLLAMA_MODEL=qwen2:1.5b ## INFINITY VARS INFINITY_EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1 INFINITY_RERANKING_MODEL=mixedbread-ai/mxbai-rerank-xsmall-v1 +INFINITY_API_KEY=password ## COGNITA_BACKEND VARS ### Note: If you are changing `COGNITA_BACKEND_PORT`, please make sure to update `VITE_QA_FOUNDRY_URL` to match it. Frontend talks to backend via the host network -COGNITA_BACKEND_PORT=8000 ### `MODEL_PROVIDERS_CONFIG_PATH` is relative to cognita root dir MODELS_CONFIG_PATH="./models_config.yaml" METADATA_STORE_CONFIG='{"provider":"prisma"}' VECTOR_DB_CONFIG='{"provider":"qdrant","url":"http://qdrant-server:6333", "config": {"grpc_port": 6334, "prefer_grpc": false}}' -INFINITY_URL=http://infinity-server:7997/ +COGNITA_BACKEND_PORT=8000 ## COGNITA_FRONTEND VARS COGNITA_FRONTEND_PORT=5001 @@ -35,4 +35,3 @@ OPENAI_API_KEY= ## TFY VARS TFY_API_KEY= TFY_HOST= -TFY_LLM_GATEWAY_URL= diff --git a/docker-compose.yaml b/docker-compose.yaml index 36e15d6b..bcc61d5c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -71,6 +71,7 @@ services: environment: - INFINITY_MODEL_ID=${INFINITY_EMBEDDING_MODEL};${INFINITY_RERANKING_MODEL} - INFINITY_BATCH_SIZE=8 + - INFINITY_API_KEY=${INFINITY_API_KEY} command: v2 networks: - cognita-docker @@ -131,14 +132,13 @@ services: - DEBUG_MODE=true - LOCAL=${LOCAL} - LOG_LEVEL=DEBUG - - MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH} - METADATA_STORE_CONFIG=${METADATA_STORE_CONFIG} - VECTOR_DB_CONFIG=${VECTOR_DB_CONFIG} - OPENAI_API_KEY=${OPENAI_API_KEY} - - RERANKER_SVC_URL=${INFINITY_URL} + - INFINITY_API_KEY=${INFINITY_API_KEY} + - MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH} - TFY_API_KEY=${TFY_API_KEY} - TFY_HOST=${TFY_HOST} - - TFY_LLM_GATEWAY_URL=${TFY_LLM_GATEWAY_URL} - DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@cognita-db:5432/cognita-config entrypoint: /bin/bash command: -c "set -e; prisma db push --schema ./backend/database/schema.prisma && uvicorn --host 0.0.0.0 --port 8000 backend.server.app:app --reload" diff --git a/models_config.sample.yaml b/models_config.sample.yaml index 50e6b78c..1a7fe0cc 100644 --- a/models_config.sample.yaml +++ b/models_config.sample.yaml @@ -1,31 +1,32 @@ model_providers: - - ############################ Local ############################################ - # Uncomment this provider if you want to use local models providers # - # using ollama and infinity model server # - ############################################################################### - - - provider_name: local-ollama - api_format: openai - base_url: http://ollama-server:11434/v1/ - api_key_env_var: "" - llm_model_ids: - - "qwen2:1.5b" - embedding_model_ids: [] - - - provider_name: local-infinity - api_format: openai - base_url: http://infinity-server:7997/ - api_key_env_var: "" - llm_model_ids: [] - embedding_model_ids: - - "mixedbread-ai/mxbai-embed-large-v1" - - ############################ OpenAI ########################################### - # Uncomment this provider if you want to use OpenAI as a models provider # - # Remember to set `OPENAI_API_KEY` in container environment # - ############################################################################### - + ############################ Local ############################################ + # Uncomment this provider if you want to use local models providers # + # using ollama and infinity model server # + ############################################################################### + + - provider_name: local-ollama + api_format: openai + base_url: http://ollama-server:11434/v1/ + api_key_env_var: "" + llm_model_ids: + - "qwen2:1.5b" + embedding_model_ids: [] + reranking_model_ids: [] + + - provider_name: local-infinity + api_format: openai + base_url: http://infinity-server:7997/ + api_key_env_var: "" + llm_model_ids: [] + embedding_model_ids: + - "mixedbread-ai/mxbai-embed-large-v1" + reranking_model_ids: + - "mixedbread-ai/mxbai-rerank-xsmall-v1" + + ############################ OpenAI ########################################### + # Uncomment this provider if you want to use OpenAI as a models provider # + # Remember to set `OPENAI_API_KEY` in container environment # + ############################################################################### # - provider_name: openai # api_format: openai # api_key_env_var: OPENAI_API_KEY @@ -35,12 +36,12 @@ model_providers: # embedding_model_ids: # - "text-embedding-3-small" # - "text-embedding-ada-002" +# reranking_model_ids: [] - - ############################ TrueFoundry ########################################### - # Uncomment this provider if you want to use TrueFoundry as a models provider # - # Remember to set `TFY_API_KEY` in container environment # - #################################################################################### +############################ TrueFoundry ########################################### +# Uncomment this provider if you want to use TrueFoundry as a models provider # +# Remember to set `TFY_API_KEY` in container environment # +#################################################################################### # - provider_name: truefoundry # api_format: openai @@ -51,3 +52,4 @@ model_providers: # - "openai-main/gpt-3-5-turbo" # embedding_model_ids: # - "openai-main/text-embedding-ada-002" +# reranking_model_ids: [] diff --git a/models_config.truefoundry.yaml b/models_config.truefoundry.yaml index 49c5b0a1..9c859bd8 100644 --- a/models_config.truefoundry.yaml +++ b/models_config.truefoundry.yaml @@ -1,10 +1,21 @@ model_providers: - - provider_name: truefoundry - api_format: openai - base_url: https://llm-gateway.truefoundry.com/api/inference/openai - api_key_env_var: TFY_API_KEY - llm_model_ids: - - "openai-main/gpt-4-turbo" - - "openai-main/gpt-3-5-turbo" - embedding_model_ids: - - "openai-main/text-embedding-ada-002" + - provider_name: truefoundry + api_format: openai + base_url: https://llm-gateway.truefoundry.com/api/inference/openai + api_key_env_var: TFY_API_KEY + llm_model_ids: + - "openai-main/gpt-4-turbo" + - "openai-main/gpt-3-5-turbo" + embedding_model_ids: + - "openai-main/text-embedding-ada-002" + reranking_model_ids: [] + + - provider_name: local-infinity + api_format: openai + base_url: https://infinity-svc-prod.truefoundry.com + api_key_env_var: INFINITY_API_KEY + llm_model_ids: [] + embedding_model_ids: + - "mixedbread-ai/mxbai-embed-large-v1" + reranking_model_ids: + - "mixedbread-ai/mxbai-rerank-xsmall-v1" From c49eccea1e0070be4cf674c264dc42f7ae79c2ed Mon Sep 17 00:00:00 2001 From: Prathamesh Date: Tue, 25 Jun 2024 10:59:18 +0530 Subject: [PATCH 2/2] Added Auth for infinity API --- backend/Dockerfile | 5 ++++- backend/modules/model_gateway/reranker_svc.py | 8 +++++++- backend/settings.py | 5 +++-- docker-compose.yaml | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 605387c6..b2a16f46 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -33,10 +33,13 @@ RUN if [ "${ADD_VECTORDB}" = "1" ]; then python3 -m pip install --use-pep517 --n ARG ADD_PRISMA=0 RUN if [ "${ADD_PRISMA}" = "1" ]; then prisma version; fi -# TODO: Remove this when templates inject env vars +# TODO: Remove these when templates inject env vars ARG MODELS_CONFIG_PATH ENV MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH} +ARG INFINITY_API_KEY +ENV INFINITY_API_KEY=${INFINITY_API_KEY} + # Copy the project files COPY . /app diff --git a/backend/modules/model_gateway/reranker_svc.py b/backend/modules/model_gateway/reranker_svc.py index f3488b66..53d10818 100644 --- a/backend/modules/model_gateway/reranker_svc.py +++ b/backend/modules/model_gateway/reranker_svc.py @@ -6,6 +6,7 @@ from langchain.retrievers.document_compressors.base import BaseDocumentCompressor from backend.logger import logger +from backend.settings import settings # Reranking Service using Infinity API @@ -36,8 +37,13 @@ def compress_documents( "model": self.model, } + headers = { + "Authorization": f"Bearer {settings.INFINITY_API_KEY}", + "Content-Type": "application/json", + } + reranked_docs = requests.post( - self.base_url.rstrip("/") + "/rerank", json=payload + self.base_url.rstrip("/") + "/rerank", headers=headers, json=payload ).json() """ diff --git a/backend/settings.py b/backend/settings.py index dba37dbb..8c4678a9 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -17,7 +17,6 @@ class Config: METADATA_STORE_CONFIG: MetadataStoreConfig VECTOR_DB_CONFIG: VectorDBConfig - RERANKER_SVC_URL: str = "" LOCAL: bool = False TFY_HOST: str = "" @@ -28,7 +27,9 @@ class Config: LOG_LEVEL: str = "info" TFY_SERVICE_ROOT_PATH: str = "" - # TODO: This will be removed in future releases + INFINITY_API_KEY: str = "" + + # TODO: This will be removed in future releases - after fixing multimodal parser TFY_LLM_GATEWAY_URL: str = "" @root_validator(pre=True) diff --git a/docker-compose.yaml b/docker-compose.yaml index bcc61d5c..12eee1c7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -52,7 +52,7 @@ services: - cognita-docker infinity-server: - image: michaelf34/infinity:0.0.49 + image: michaelf34/infinity:0.0.50 pull_policy: if_not_present restart: unless-stopped container_name: infinity @@ -72,7 +72,7 @@ services: - INFINITY_MODEL_ID=${INFINITY_EMBEDDING_MODEL};${INFINITY_RERANKING_MODEL} - INFINITY_BATCH_SIZE=8 - INFINITY_API_KEY=${INFINITY_API_KEY} - command: v2 + command: v2 --api-key ${INFINITY_API_KEY} networks: - cognita-docker