Merge branch 'main' into fix-minio-update

ametnes · Jul 27, 2024 · 3ef0b14 · 3ef0b14
2 parents fb73719 + 69430ef
commit 3ef0b14
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 0 deletions.
diff --git a/nesis/rag/core/server/chat/chat_service.py b/nesis/rag/core/server/chat/chat_service.py
@@ -8,6 +8,7 @@
 )
 from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
 from llama_index.core.llms import ChatMessage, MessageRole
+from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.types import TokenGen
 from pydantic import BaseModel
 
@@ -111,9 +112,13 @@ def _chat_engine(
                 context_filter=context_filter,
                 similarity_top_k=self.settings.vectorstore.similarity_top_k,
             )
+            memory = ChatMemoryBuffer.from_defaults(
+                token_limit=self.settings.llm.token_limit
+            )
             return ContextChatEngine.from_defaults(
                 system_prompt=system_prompt,
                 retriever=vector_index_retriever,
+                memory=memory,
                 service_context=self.service_context,
                 node_postprocessors=[
                     MetadataReplacementPostProcessor(target_metadata_key="window"),

diff --git a/nesis/rag/core/settings/settings.py b/nesis/rag/core/settings/settings.py
@@ -106,6 +106,10 @@ class LLMSettings(BaseModel):
         "like `HuggingFaceH4/zephyr-7b-beta`. If not set, will load a tokenizer matching "
         "gpt-3.5-turbo LLM.",
     )
+    token_limit: int = Field(
+        9439,
+        description="The maximum number of chat memory tokens.",
+    )
 
 
 class VectorstoreSettings(BaseModel):

diff --git a/nesis/rag/settings.yaml b/nesis/rag/settings.yaml
@@ -12,6 +12,7 @@ llm:
   # Should be matching the selected model
   max_new_tokens: 512
   context_window: 3900
+  token_limit: ${NESIS_RAG_LLM_TOKEN_LIMIT:9439}
   tokenizer: mistralai/Mistral-7B-Instruct-v0.2
 
 embedding:

diff --git a/nesis/rag/tests/rag/core/server/chat/__init__.py b/nesis/rag/tests/rag/core/server/chat/__init__.py
diff --git a/nesis/rag/tests/rag/core/server/chat/test_chat_service.py b/nesis/rag/tests/rag/core/server/chat/test_chat_service.py
@@ -0,0 +1,62 @@
+import pathlib
+
+import pytest
+from injector import Injector
+from llama_index.core.base.llms.types import ChatMessage
+
+from nesis.rag.core.server.chat.chat_service import ChatService, Completion
+from nesis.rag.core.server.ingest.ingest_service import IngestService
+from nesis.rag.core.settings.settings import Settings
+from nesis.rag import tests
+from nesis.rag.core.server.ingest.model import IngestedDoc
+
+
+@pytest.fixture
+def injector(settings) -> Injector:
+    from nesis.rag.core.di import create_application_injector
+
+    return create_application_injector(settings=settings)
+
+
+@pytest.fixture
+def settings() -> Settings:
+    from nesis.rag.core.settings.settings import settings
+
+    return settings(
+        overrides={
+            "llm": {"mode": "mock", "token_limit": 100000},
+            "vectorstore": {"similarity_top_k": "20"},
+        }
+    )
+
+
+def test_chat_service_similarity_top_k(injector):
+    """
+    Test to ensure similarity_top_k setting takes effect.
+    """
+    file_path: pathlib.Path = (
+        pathlib.Path(tests.__file__).parent.absolute() / "resources" / "rfc791.txt"
+    )
+
+    ingest_service = injector.get(IngestService)
+
+    ingested_list: list[IngestedDoc] = ingest_service.ingest_file(
+        file_name=file_path.name,
+        file_data=file_path,
+        metadata={
+            "file_name": str(file_path.absolute()),
+            "datasource": "rfc-documents",
+        },
+    )
+
+    chat_service = injector.get(ChatService)
+    completion: Completion = chat_service.chat(
+        use_context=True,
+        messages=[
+            ChatMessage.from_str(
+                content="describe the internet protocol from the darpa internet program"
+            )
+        ],
+    )
+
+    assert len(completion.sources) == 20