From fdeacebf3d94b34fe03b3c60949629ae16598ea7 Mon Sep 17 00:00:00 2001
From: Nicola <nicorb932@hotmail.com>
Date: Mon, 21 Aug 2023 14:59:17 +0200
Subject: [PATCH 1/5] add link to documentation in `Custom LLM` description

---
 core/cat/factory/llm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/cat/factory/llm.py b/core/cat/factory/llm.py
index 92a1d715..7f17a0e3 100644
--- a/core/cat/factory/llm.py
+++ b/core/cat/factory/llm.py
@@ -60,6 +60,7 @@ class Config:
             "description":
                 "LLM on a custom endpoint. "
                 "See docs for examples.",
+            "link": "https://cheshirecat.ai/2023/08/19/custom-large-language-model/"
         }
 
 

From 1d57e6859f53fc7cfb78ca21e6f27100de5f1014 Mon Sep 17 00:00:00 2001
From: Nicola <nicorb932@hotmail.com>
Date: Mon, 21 Aug 2023 15:48:24 +0200
Subject: [PATCH 2/5] fixed consistency in schemas

---
 core/cat/factory/llm.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/core/cat/factory/llm.py b/core/cat/factory/llm.py
index 7f17a0e3..9e7d4dbe 100644
--- a/core/cat/factory/llm.py
+++ b/core/cat/factory/llm.py
@@ -32,6 +32,7 @@ class Config:
                 "A dumb LLM just telling that the Cat is not configured. "
                 "There will be a nice LLM here "
                 "once consumer hardware allows it.",
+            "link": ""
         }
 
 
@@ -73,6 +74,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "OpenAI ChatGPT",
             "description": "Chat model from OpenAI",
+            "link": "https://platform.openai.com/docs/models/overview"
         }
 
 
@@ -87,6 +89,7 @@ class Config:
             "description":
                 "OpenAI GPT-3. More expensive but "
                 "also more flexible than ChatGPT.",
+            "link": "https://platform.openai.com/docs/models/overview"
         }
 
 
@@ -107,6 +110,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "Azure OpenAI Chat Models",
             "description": "Chat model from Azure OpenAI",
+            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service"
         }
 
 
@@ -128,6 +132,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "Azure OpenAI Completion models",
             "description": "Configuration for Cognitive Services Azure OpenAI",
+            "link": "https://azure.microsoft.com/en-us/products/ai-services/openai-service"
         }
 
 
@@ -140,6 +145,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "Cohere",
             "description": "Configuration for Cohere language model",
+            "link": "https://docs.cohere.com/docs/models"
         }
 
 
@@ -158,6 +164,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "HuggingFace TextGen Inference",
             "description": "Configuration for HuggingFace TextGen Inference",
+            "link": "https://huggingface.co/text-generation-inference"
         }
 
 
@@ -175,6 +182,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "HuggingFace Hub",
             "description": "Configuration for HuggingFace Hub language models",
+            "link": "https://huggingface.co/models"
         }
 
 
@@ -189,6 +197,7 @@ class Config:
             "humanReadableName": "HuggingFace Endpoint",
             "description":
                 "Configuration for HuggingFace Endpoint language models",
+            "link": "https://huggingface.co/inference-endpoints"
         }
 
 
@@ -201,6 +210,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "Anthropic",
             "description": "Configuration for Anthropic language model",
+            "link": "https://www.anthropic.com/product"
         }
 
 
@@ -213,6 +223,7 @@ class Config:
         schema_extra = {
             "humanReadableName": "Google PaLM",
             "description": "Configuration for Google PaLM language model",
+            "link": "https://developers.generativeai.google/models/language"
         }
 
 

From 4f195d1b2aa23116f881a65850090fe4020b7bac Mon Sep 17 00:00:00 2001
From: Nicola <nicorb932@hotmail.com>
Date: Tue, 22 Aug 2023 15:22:37 +0200
Subject: [PATCH 3/5] hook to customize the `RabbitHole` parsers

---
 .../core_plugin/hooks/rabbithole.py           | 32 +++++++++++++++----
 core/cat/rabbit_hole.py                       | 15 +++++----
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py b/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py
index 0407a875..995c517e 100644
--- a/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py
+++ b/core/cat/mad_hatter/core_plugin/hooks/rabbithole.py
@@ -8,18 +8,38 @@
 
 from typing import List
 
-from cat.log import log
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from cat.mad_hatter.decorators import hook
 from langchain.docstore.document import Document
 
 
+@hook(priority=0)
+def rabbithole_instantiates_parsers(file_handlers: dict, cat) -> dict:
+    """Hook the available parsers for ingesting files in the declarative memory.
+
+    Allows replacing or extending existing supported mime types and related parsers to customize the file ingestion.
+
+    Parameters
+    ----------
+    file_handlers : dict
+        Keys are the supported mime types and values are the related parsers.
+    cat : CheshireCat
+        Cheshire Cat instance.
+
+    Returns
+    -------
+    file_handlers : dict
+        Edited dictionary of supported mime types and related parsers.
+    """
+    return file_handlers
+
+
 # Hook called just before of inserting a document in vector memory
 @hook(priority=0)
 def before_rabbithole_insert_memory(doc: Document, cat) -> Document:
     """Hook the `Document` before is inserted in the vector memory.
 
-    Allows to edit and enhance a single `Document` before the *RabbitHole* add it to the declarative vector memory.
+    Allows editing and enhancing a single `Document` before the *RabbitHole* add it to the declarative vector memory.
 
     Parameters
     ----------
@@ -51,7 +71,7 @@ def before_rabbithole_insert_memory(doc: Document, cat) -> Document:
 def before_rabbithole_splits_text(doc: Document, cat) -> Document:
     """Hook the `Document` before is split.
 
-    Allows to edit the whole uploaded `Document` before the *RabbitHole* recursively splits it in shorter ones.
+    Allows editing the whole uploaded `Document` before the *RabbitHole* recursively splits it in shorter ones.
 
     For instance, the hook allows to change the text or edit/add metadata.
 
@@ -76,7 +96,7 @@ def before_rabbithole_splits_text(doc: Document, cat) -> Document:
 def rabbithole_splits_text(text, chunk_size: int, chunk_overlap: int, cat) -> List[Document]:
     """Hook into the recursive split pipeline.
 
-    Allows to edit the recursive split the *RabbitHole* applies to chunk the ingested documents.
+    Allows editing the recursive split the *RabbitHole* applies to chunk the ingested documents.
 
     This is applied when ingesting a documents and urls from a script, using an endpoint or from the GUI.
 
@@ -120,7 +140,7 @@ def rabbithole_splits_text(text, chunk_size: int, chunk_overlap: int, cat) -> Li
 def after_rabbithole_splitted_text(chunks: List[Document], cat) -> List[Document]:
     """Hook the `Document` after is split.
 
-    Allows to edit the list of `Document` right after the *RabbitHole* chunked them in smaller ones.
+    Allows editing the list of `Document` right after the *RabbitHole* chunked them in smaller ones.
 
     Parameters
     ----------
@@ -146,7 +166,7 @@ def after_rabbithole_splitted_text(chunks: List[Document], cat) -> List[Document
 def before_rabbithole_stores_documents(docs: List[Document], cat) -> List[Document]:
     """Hook into the memory insertion pipeline.
 
-    Allows to modify how the list of `Document` is inserted in the vector memory.
+    Allows modifying how the list of `Document` is inserted in the vector memory.
 
     For example, this hook is a good point to summarize the incoming documents and save both original and
     summarized contents.
diff --git a/core/cat/rabbit_hole.py b/core/cat/rabbit_hole.py
index 5f70c530..04be89c7 100644
--- a/core/cat/rabbit_hole.py
+++ b/core/cat/rabbit_hole.py
@@ -27,13 +27,15 @@ class RabbitHole:
     def __init__(self, cat):
         self.cat = cat
 
-        self.file_handlers = {
+        file_handlers = {
             "application/pdf": PDFMinerParser(),
             "text/plain": TextParser(),
             "text/markdown": TextParser(),
             "text/html": BS4HTMLParser()
         }
 
+        self.file_handlers = cat.mad_hatter.execute_hook("rabbithole_instantiates_parsers", file_handlers)
+
     def ingest_memory(self, file: UploadFile):
         """Upload memories to the declarative memory from a JSON file.
 
@@ -44,7 +46,7 @@ def ingest_memory(self, file: UploadFile):
 
         Notes
         -----
-        This method allows to upload a JSON file containing vector and text memories directly to the declarative memory.
+        This method allows uploading a JSON file containing vector and text memories directly to the declarative memory.
         When doing this, please, make sure the embedder used to export the memories is the same as the one used
         when uploading.
         The method also performs a check on the dimensionality of the embeddings (i.e. length of each vector).
@@ -230,7 +232,7 @@ def file_to_docs(
     def send_rabbit_thought(self, thought):
         """Append a message to the notification list.
 
-        This method receive a string and create the message to append to the list of notifications.
+        This method receives a string and creates the message to append to the list of notifications.
 
         Parameters
         ----------
@@ -245,7 +247,6 @@ def send_rabbit_thought(self, thought):
             "why": {},
         })
 
-
     def store_documents(self, docs: List[Document], source: str) -> None:
         """Add documents to the Cat's declarative memory.
 
@@ -278,11 +279,11 @@ def store_documents(self, docs: List[Document], source: str) -> None:
 
         # classic embed
         time_last_notification = time.time()
-        time_interval = 10 # a notification every 10 secs
+        time_interval = 10  # a notification every 10 secs
         for d, doc in enumerate(docs):
             if time.time() - time_last_notification > time_interval:
                 time_last_notification = time.time()
-                perc_read = int( d / len(docs) * 100 )
+                perc_read = int(d / len(docs) * 100)
                 self.send_rabbit_thought(f"Read {perc_read}% of {source}")
 
             doc.metadata["source"] = source
@@ -308,7 +309,7 @@ def store_documents(self, docs: List[Document], source: str) -> None:
         # notify client
         finished_reading_message = f"Finished reading {source}, " \
                                    f"I made {len(docs)} thoughts on it."
-        
+
         self.send_rabbit_thought(finished_reading_message)
 
         print(f"\n\nDone uploading {source}")

From 51893449ffd08d949de1b51e5e851f79015cd7ca Mon Sep 17 00:00:00 2001
From: Piero Savastano <piero.savastano@gmail.com>
Date: Tue, 22 Aug 2023 18:49:19 +0200
Subject: [PATCH 4/5] take away uvicorn command from docker-compose

---
 core/cat/main.py   | 21 +++++++++++++++++++++
 docker-compose.yml | 17 ++++-------------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/core/cat/main.py b/core/cat/main.py
index 363bb67d..80833423 100644
--- a/core/cat/main.py
+++ b/core/cat/main.py
@@ -1,6 +1,8 @@
 import os
 from contextlib import asynccontextmanager
 
+import uvicorn
+
 from fastapi import Depends, FastAPI
 from fastapi.routing import APIRoute
 from fastapi.responses import JSONResponse
@@ -90,3 +92,22 @@ async def validation_exception_handler(request, exc):
 
 # openapi customization
 cheshire_cat_api.openapi = get_openapi_configuration_function(cheshire_cat_api)
+
+# RUN!
+if __name__ == "__main__":
+    
+    # debugging utilities, to deactivate put `DEBUG=false` in .env
+    debug_config = {}
+    if os.getenv("DEBUG", "true") == "true":
+        debug_config = {
+            "reload": True,
+            "reload_includes": ["plugin.json"],
+            "reload_excludes": ["*test_*.*", "*mock_*.*"]
+        }
+
+    uvicorn.run(
+        "cat.main:cheshire_cat_api",
+        host="0.0.0.0",
+        port=80,
+        **debug_config
+    )
diff --git a/docker-compose.yml b/docker-compose.yml
index 2c580ac9..40f76985 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,24 +18,15 @@ services:
       - CORE_USE_SECURE_PROTOCOLS=${CORE_USE_SECURE_PROTOCOLS:-}
       - API_KEY=${API_KEY:-}
       - LOG_LEVEL=${LOG_LEVEL:-WARNING}
+      - DEBUG=${DEBUG:-true}
     ports:
       - ${CORE_PORT:-1865}:80
     volumes:
       - ./core:/app
     command:
-      - uvicorn
-      - cat.main:cheshire_cat_api
-      - --host
-      - "0.0.0.0"
-      - --port
-      - "80"
-      - --reload # take away in prod
-      - --reload-include
-      - "plugin.json"
-      - --reload-exclude # TODO: can't exclude the whole tests/ folder, so excluding files with test_ or mock_ in their name
-      - "*test_*.*"
-      - --reload-exclude
-      - "*mock_*.*"
+      - python
+      - "-m"
+      - "cat.main"
     restart: unless-stopped
 
   cheshire-cat-vector-memory:

From bd897eb764fa7ef580e62210e0eb1e4538d85df5 Mon Sep 17 00:00:00 2001
From: Piero Savastano <piero.savastano@gmail.com>
Date: Tue, 22 Aug 2023 19:07:33 +0200
Subject: [PATCH 5/5] version 1.0.3: rabbithole with custom MIME types and
 parsers; uvicorn command out of docker-compose

---
 core/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/pyproject.toml b/core/pyproject.toml
index 7d77edc5..be2666ae 100644
--- a/core/pyproject.toml
+++ b/core/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "Cheshire-Cat"
 description = "Open source and customizable AI architecture"
-version = "1.0.2"
+version = "1.0.3"
 requires-python = ">=3.10"
 license = { file="LICENSE" }
 authors = [