SciPhi-AI · NolanTrem · Oct 30, 2024 · Oct 31, 2024 · Nov 1, 2024 · Nov 2, 2024
diff --git a/py/cli/commands/server.py b/py/cli/commands/server.py
@@ -280,6 +280,12 @@ async def serve(
             "Warning: `config-name` corresponds to an existing file. If you intended a custom config, use `config-path`."
         )
 
+    if config_name in ["local_llm", "full_local_llm"]:
+        click.secho(
+            "WARNING: `local_llm` and `full_local_llm` are deprecated and support for them will be removed. Please use `local_llm_ollama`, `local_llm_lm_studio`, `full_local_llm_ollama`, `full_local_llm_lm_studio`.",
+            fg="red",
+        )
+
     if build:
         click.echo(
             "`build` flag detected. Building Docker image from local repository..."

diff --git a/py/compose.full.yaml b/py/compose.full.yaml
@@ -270,7 +270,8 @@ services:
       retries: 5
 
   r2r:
-    image: ${R2R_IMAGE:-ragtoriches/prod:latest}
+    # image: ${R2R_IMAGE:-ragtoriches/prod:latest}
+    image: r2r/test
     build:
       context: .
       args:
@@ -333,6 +334,9 @@ services:
       # Ollama
       - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434}
 
+      # LM Studio
+      - LM_STUDIO_API_BASE=${LM_STUDIO_API_BASE:-http://host.docker.internal:1234}
+
       # Unstructured
       - UNSTRUCTURED_API_KEY=${UNSTRUCTURED_API_KEY:-}
       - UNSTRUCTURED_API_URL=${UNSTRUCTURED_API_URL:-https://api.unstructured.io/general/v0/general}
@@ -352,6 +356,8 @@ services:
       '
     networks:
       - r2r-network
+    extra_hosts:
+      - host.docker.internal:host-gateway
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v2/health"]
       interval: 6s
@@ -361,8 +367,6 @@ services:
     volumes:
       - ${R2R_CONFIG_PATH:-/}:${R2R_CONFIG_PATH:-/app/config}
       - hatchet_api_key:/hatchet_api_key:ro
-    extra_hosts:
-      - host.docker.internal:host-gateway
     depends_on:
       setup-token:
         condition: service_completed_successfully

diff --git a/py/compose.yaml b/py/compose.yaml
@@ -36,7 +36,8 @@ services:
       -c max_connections=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
 
   r2r:
-    image: ${R2R_IMAGE:-ragtoriches/prod:latest}
+    # image: ${R2R_IMAGE:-ragtoriches/prod:latest}
+    image: r2r/test
     build:
       context: .
       args:
@@ -99,6 +100,9 @@ services:
       # Ollama
       - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434}
 
+      # LM Studio
+      - LM_STUDIO_API_BASE=${LM_STUDIO_API_BASE:-http://host.docker.internal:1234}
+
     networks:
       - r2r-network
     healthcheck:

diff --git a/py/core/configs/full_local_llm_lm_studio.toml b/py/core/configs/full_local_llm_lm_studio.toml
@@ -0,0 +1,69 @@
+[agent]
+system_instruction_name = "rag_agent"
+tool_names = ["search"]
+
+  [agent.generation_config]
+  model = "lm_studio/llama-3.2-3b-instruct"
+
+[completion]
+provider = "litellm"
+concurrent_request_limit = 1
+
+  [completion.generation_config]
+  model = "lm_studio/llama-3.2-3b-instruct"
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 1_024
+  stream = false
+  add_generation_kwargs = { }
+
+[database]
+provider = "postgres"
+
+  [database.kg_creation_settings]
+    kg_entity_description_prompt = "graphrag_entity_description"
+    kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    fragment_merge_count = 4 # number of fragments to merge into a single extraction
+    max_knowledge_triples = 100
+    max_description_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for triplet extraction
+
+  [database.kg_entity_deduplication_settings]
+    kg_entity_deduplication_type = "by_name"
+    kg_entity_deduplication_prompt = "graphrag_entity_deduplication"
+    max_description_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for deduplication
+
+  [database.kg_enrichment_settings]
+    community_reports_prompt = "graphrag_community_reports"
+    max_summary_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for node description and graph clustering
+    leiden_params = {}
+
+  [database.kg_search_settings]
+    entities_level = "document" # set to collection if you've run deduplication
+    map_system_prompt = "graphrag_map_system"
+    reduce_system_prompt = "graphrag_reduce_system"
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" }
+
+[embedding]
+provider = "litellm"
+base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5-embedding"
+base_dimension = 768
+batch_size = 128
+add_title_as_prefix = true
+concurrent_request_limit = 2
+
+[ingestion]
+provider = "unstructured_local"
+strategy = "auto"
+chunking_strategy = "by_title"
+new_after_n_chars = 512
+max_characters = 1_024
+combine_under_n_chars = 128
+overlap = 20
+
+[orchestration]
+provider = "hatchet"
diff --git a/py/core/configs/full_local_llm_ollama.toml b/py/core/configs/full_local_llm_ollama.toml
@@ -0,0 +1,71 @@
+[agent]
+system_instruction_name = "rag_agent"
+tool_names = ["search"]
+
+  [agent.generation_config]
+  model = "ollama/llama3.1"
+
+[completion]
+provider = "litellm"
+concurrent_request_limit = 1
+
+  [completion.generation_config]
+  model = "ollama/llama3.1"
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 1_024
+  stream = false
+  add_generation_kwargs = { }
+
+
+[database]
+provider = "postgres"
+
+  [database.kg_creation_settings]
+    kg_entity_description_prompt = "graphrag_entity_description"
+    kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    fragment_merge_count = 4 # number of fragments to merge into a single extraction
+    max_knowledge_triples = 100
+    max_description_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction
+
+  [database.kg_entity_deduplication_settings]
+    kg_entity_deduplication_type = "by_name"
+    kg_entity_deduplication_prompt = "graphrag_entity_deduplication"
+    max_description_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
+
+  [database.kg_enrichment_settings]
+    community_reports_prompt = "graphrag_community_reports"
+    max_summary_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering
+    leiden_params = {}
+
+  [database.kg_search_settings]
+    entities_level = "document" # set to collection if you've run deduplication
+    map_system_prompt = "graphrag_map_system"
+    reduce_system_prompt = "graphrag_reduce_system"
+    generation_config = { model = "ollama/llama3.1" }
+
+
+[embedding]
+provider = "ollama"
+base_model = "mxbai-embed-large"
+base_dimension = 1_024
+batch_size = 128
+add_title_as_prefix = true
+concurrent_request_limit = 2
+
+[ingestion]
+provider = "unstructured_local"
+strategy = "auto"
+chunking_strategy = "by_title"
+new_after_n_chars = 512
+max_characters = 1_024
+combine_under_n_chars = 128
+overlap = 20
+
+[orchestration]
+provider = "hatchet"
diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml
@@ -59,11 +59,3 @@ provider = "postgres"
 
 [orchestration]
 provider = "simple"
-
-
-[ingestion]
-vision_img_model = "ollama/llama3.2-vision"
-vision_pdf_model = "ollama/llama3.2-vision"
-
-  [ingestion.extra_parsers]
-    pdf = "zerox"
diff --git a/py/core/configs/local_llm_lm_studio.toml b/py/core/configs/local_llm_lm_studio.toml
@@ -0,0 +1,62 @@
+[agent]
+provider = "litellm"
+system_instruction_name = "rag_agent"
+tool_names = ["search"]
+
+  [agent.generation_config]
+  model = "lm_studio/llama-3.2-3b-instruct"
+
+[completion]
+provider = "litellm"
+concurrent_request_limit = 1
+
+  [completion.generation_config]
+  model = "lm_studio/llama-3.2-3b-instruct"
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 1_024
+  stream = false
+  add_generation_kwargs = { }
+
+[embedding]
+provider = "litellm"
+base_model = "lm_studio/text-embedding-nomic-embed-text-v1.5-embedding"
+base_dimension = 1_024
+batch_size = 128
+add_title_as_prefix = true
+concurrent_request_limit = 2
+
+[database]
+provider = "postgres"
+
+  [database.kg_creation_settings]
+    kg_entity_description_prompt = "graphrag_entity_description"
+    kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    fragment_merge_count = 4 # number of fragments to merge into a single extraction
+    max_knowledge_triples = 100
+    max_description_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for triplet extraction
+
+  [database.kg_entity_deduplication_settings]
+    kg_entity_deduplication_type = "by_name"
+    kg_entity_deduplication_prompt = "graphrag_entity_deduplication"
+    max_description_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for deduplication
+
+  [database.kg_enrichment_settings]
+    community_reports_prompt = "graphrag_community_reports"
+    max_summary_input_length = 65536
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" } # and other params, model used for node description and graph clustering
+    leiden_params = {}
+
+  [database.kg_search_settings]
+    entities_level = "document" # set to collection if you've run deduplication
+    map_system_prompt = "graphrag_map_system"
+    reduce_system_prompt = "graphrag_reduce_system"
+    generation_config = { model = "lm_studio/llama-3.2-3b-instruct" }
+
+
+[orchestration]
+provider = "simple"
diff --git a/py/core/configs/local_llm_ollama.toml b/py/core/configs/local_llm_ollama.toml
@@ -0,0 +1,61 @@
+[agent]
+system_instruction_name = "rag_agent"
+tool_names = ["search"]
+
+  [agent.generation_config]
+  model = "ollama/llama3.1"
+
+[completion]
+provider = "litellm"
+concurrent_request_limit = 1
+
+  [completion.generation_config]
+  model = "ollama/llama3.1"
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 1_024
+  stream = false
+  add_generation_kwargs = { }
+
+[embedding]
+provider = "ollama"
+base_model = "mxbai-embed-large"
+base_dimension = 1_024
+batch_size = 128
+add_title_as_prefix = true
+concurrent_request_limit = 2
+
+[database]
+provider = "postgres"
+
+  [database.kg_creation_settings]
+    kg_entity_description_prompt = "graphrag_entity_description"
+    kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    fragment_merge_count = 4 # number of fragments to merge into a single extraction
+    max_knowledge_triples = 100
+    max_description_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction
+
+  [database.kg_entity_deduplication_settings]
+    kg_entity_deduplication_type = "by_name"
+    kg_entity_deduplication_prompt = "graphrag_entity_deduplication"
+    max_description_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
+
+  [database.kg_enrichment_settings]
+    community_reports_prompt = "graphrag_community_reports"
+    max_summary_input_length = 65536
+    generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering
+    leiden_params = {}
+
+  [database.kg_search_settings]
+    entities_level = "document" # set to collection if you've run deduplication
+    map_system_prompt = "graphrag_map_system"
+    reduce_system_prompt = "graphrag_reduce_system"
+    generation_config = { model = "ollama/llama3.1" }
+
+
+[orchestration]
+provider = "simple"
diff --git a/py/core/providers/embeddings/litellm.py b/py/core/providers/embeddings/litellm.py
@@ -24,15 +24,17 @@ def __init__(
     ) -> None:
         super().__init__(config)
 
+        # Allow LiteLLM to automatically drop parameters that are not supported by the model
+        litellm.drop_params = True
+
         self.litellm_embedding = embedding
         self.litellm_aembedding = aembedding
 
-        provider = config.provider
-        if not provider:
+        if not config.provider:
             raise ValueError(
                 "Must set provider in order to initialize `LiteLLMEmbeddingProvider`."
             )
-        if provider != "litellm":
+        if config.provider != "litellm":
             raise ValueError(
                 "LiteLLMEmbeddingProvider must be initialized with provider `litellm`."
             )
@@ -42,18 +44,13 @@ def __init__(
             )
 
         self.base_model = config.base_model
-        if "amazon" in self.base_model:
-            logger.warn("Amazon embedding model detected, dropping params")
-            litellm.drop_params = True
         self.base_dimension = config.base_dimension
 
     def _get_embedding_kwargs(self, **kwargs):
-        embedding_kwargs = {
+        return {
             "model": self.base_model,
             "dimensions": self.base_dimension,
-        }
-        embedding_kwargs.update(kwargs)
-        return embedding_kwargs
+        } | kwargs
 
     async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]:
         texts = task["texts"]