Merge branch 'update_pytest-html' into release/0.4.0

pinecone-io · Dec 10, 2023 · d6ba50a · d6ba50a
2 parents 94b733c + 89e870f
commit d6ba50a
Show file tree

Hide file tree

Showing 14 changed files with 232 additions and 36 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,22 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/python
+{
+	"name": "Python 3",
+	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"image": "mcr.microsoft.com/devcontainers/python:1-3.9-bullseye"
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "pip3 install --user -r requirements.txt",
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
+}
diff --git a/.env.example b/.env.example
@@ -0,0 +1,5 @@
+PINECONE_API_KEY="<PINECONE_API_KEY>"
+PINECONE_ENVIRONMENT="<PINECONE_ENVIRONMENT>"
+OPENAI_API_KEY="<OPENAI_API_KEY>"
+INDEX_NAME="<INDEX_NAME>"
+CANOPY_CONFIG_FILE="config/config.yaml"
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,54 @@
+name: ✨ Feature
+description: Propose a straightforward extension
+title: "[Feature] <title>"
+labels: ["enhancement", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this feature request!
+  - type: checkboxes
+    attributes:
+      label: Is this your first time submitting a feature request?
+      description: >
+        We want to make sure that features are distinct and discoverable,
+        so that other members of the community can find them and offer their thoughts.
+
+        Issues are the right place to request straightforward extensions of existing functionality.
+      options:
+        - label: I have searched the existing issues, and I could not find an existing issue for this feature
+          required: true
+        - label: I am requesting a straightforward extension of existing functionality
+  - type: textarea
+    attributes:
+      label: Describe the feature
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Describe alternatives you've considered
+      description: |
+        A clear and concise description of any alternative solutions or features you've considered.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Who will this benefit?
+      description: |
+        What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
+    validations:
+      required: false
+  - type: input
+    attributes:
+      label: Are you interested in contributing this feature?
+      description: Let us know if you want to write some code, and how we can help.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Anything else?
+      description: |
+        Links? References? Anything that will give us more context about the feature you are suggesting!
+    validations:
+      required: false
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,4 @@ cython_debug/
 datafiles/*
 canopy-api-docs.html
 .vscode/
+*.jsonl
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,28 @@
+## [0.3.0] - 2023-12-10
+
+### Bug fixes
+* Fix some typos, add dev container, faux streaming [#200](https://github.com/pinecone-io/canopy/pull/200) (Thanks @eburnette!)
+* CLI requires OpenAI API key, even if OpenAI is not being used by[#208](https://github.com/pinecone-io/canopy/pull/208)
+* CLI: read config file from env location[#190](https://github.com/pinecone-io/canopy/pull/190) (Thanks @MichaelAnckaert!)
+
+
+### Documentation
+* Add document field explanations and python version badges [#187](https://github.com/pinecone-io/canopy/pull/187)
+* Update README.md [#192](https://github.com/pinecone-io/canopy/pull/192) (Thanks @tomer-w!)
+* Tweaks to CLI help texts [#193](https://github.com/pinecone-io/canopy/pull/193) (Thanks @jseldess!)
+* Update README.md and change href [#202](https://github.com/pinecone-io/canopy/pull/202)
+
+### CI Improvements
+* Added bug-report template [#184](https://github.com/pinecone-io/canopy/pull/184)
+* Add feature-request.yml [#209](https://github.com/pinecone-io/canopy/pull/209)
+
+### Added
+* Add Anyscale Endpoint support and Llama Tokenizer [#173](https://github.com/pinecone-io/canopy/pull/173) (Thanks @kylehh!)
+* Add last message query generator [#210](https://github.com/pinecone-io/canopy/pull/210)
+
+
+**Full Changelog**: https://github.com/pinecone-io/canopy/compare/V0.2.0...V0.3.0
+
 ## [0.2.0] - 2023-11-15
 
 ### Bug fixes

diff --git a/README.md b/README.md
@@ -1,10 +1,10 @@
 # Canopy
 
 <p align="center">
-<a href="https://pypi.org/project/fastapi" target="_blank">
+<a href="https://pypi.org/project/canopy-sdk" target="_blank">
     <img src="https://img.shields.io/pypi/pyversions/canopy-sdk" alt="Supported Python versions">
 </a>
-<a href="https://pypi.org/project/fastapi" target="_blank">
+<a href="https://pypi.org/project/canopy-sdk" target="_blank">
     <img src="https://img.shields.io/pypi/v/canopy-sdk?label=pypi%20package" alt="Package version">
 </a>
 </p>
@@ -198,6 +198,8 @@ This will open a chat interface in your terminal. You can ask questions and the
 
 To compare the chat response with and without RAG use the `--no-rag` flag
 
+> **Note**: This method is only supported with OpenAI at the moment.
+
 ```bash
 canopy chat --no-rag
 ```

diff --git a/config/anyscale.yaml b/config/anyscale.yaml
@@ -24,14 +24,4 @@ chat_engine:
   # The query builder is responsible for generating textual queries given user message history.
   # --------------------------------------------------------------------
   query_builder:
-    type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator]
-    params:
-      prompt: *query_builder_prompt     # The query builder's system prompt for calling the LLM
-      function_description:             # A function description passed to the LLM's `function_calling` API
-        Query search engine for relevant information
-
-    llm:  # The LLM that the query builder will use to generate queries.
-      #Use OpenAI for function call for now
-      type: OpenAILLM
-      params:
-        model_name: gpt-3.5-turbo
+    type: LastMessageQueryGenerator     # Options: [FunctionCallingQueryGenerator, LastMessageQueryGenerator]
diff --git a/config/config.yaml b/config/config.yaml
@@ -59,7 +59,7 @@ chat_engine:
   # The query builder is responsible for generating textual queries given user message history.
   # --------------------------------------------------------------------
   query_builder:
-    type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator]
+    type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator, LastMessageQueryGenerator]
     params:
       prompt: *query_builder_prompt     # The query builder's system prompt for calling the LLM
       function_description:             # A function description passed to the LLM's `function_calling` API

diff --git a/pyproject.toml b/pyproject.toml
@@ -40,7 +40,7 @@ jupyter = "^1.0.0"
 pytest = "^7.3.2"
 mypy = "^1.4.1"
 flake8 = "^6.1.0"
-pytest-html = "^3.2.0"
+pytest-html = "^4.1.0"
 flake8-pyproject = "^1.2.3"
 asyncio = "^3.4.3"
 pytest-asyncio = "^0.14.0"

diff --git a/src/canopy/chat_engine/query_generator/__init__.py b/src/canopy/chat_engine/query_generator/__init__.py
@@ -1,2 +1,3 @@
 from .base import QueryGenerator
 from .function_calling import FunctionCallingQueryGenerator
+from .last_message import LastMessageQueryGenerator
diff --git a/src/canopy/chat_engine/query_generator/last_message.py b/src/canopy/chat_engine/query_generator/last_message.py
@@ -0,0 +1,36 @@
+from typing import List
+
+from canopy.chat_engine.query_generator import QueryGenerator
+from canopy.models.data_models import Messages, Query, Role
+
+
+class LastMessageQueryGenerator(QueryGenerator):
+    """
+        Returns the last message as a query without running any LLMs. This can be
+        considered as the most basic query generation. Please use other query generators
+        for more accurate results.
+    """
+
+    def generate(self,
+                 messages: Messages,
+                 max_prompt_tokens: int) -> List[Query]:
+        """
+            max_prompt_token is dismissed since we do not consume any token for
+            generating the queries.
+        """
+
+        if len(messages) == 0:
+            raise ValueError("Passed chat history does not contain any messages. "
+                             "Please include at least one message in the history.")
+
+        last_message = messages[-1]
+
+        if last_message.role != Role.USER:
+            raise ValueError(f"Expected a UserMessage, got {type(last_message)}.")
+
+        return [Query(text=last_message.content)]
+
+    async def agenerate(self,
+                        messages: Messages,
+                        max_prompt_tokens: int) -> List[Query]:
+        return self.generate(messages, max_prompt_tokens)
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
@@ -214,9 +214,10 @@ def health(url):
     )
 )
 @click.argument("index-name", nargs=1, envvar="INDEX_NAME", type=str, required=True)
-@click.option("--config", "-c", default=None,
-              help="Path to a Canopy config file. Optional, otherwise configuration "
-                   "defaults will be used.")
+@click.option("--config", "-c", default=None, envvar="CANOPY_CONFIG_FILE",
+              help="Path to a canopy config file. Can also be set by the "
+                   "`CANOPY_CONFIG_FILE` envrionment variable. Otherwise, the built-in"
+                   "defualt configuration will be used.")
 def new(index_name: str, config: Optional[str]):
     _initialize_tokenizer()
     kb_config = _load_kb_config(config)
@@ -279,9 +280,10 @@ def _batch_documents_by_chunks(chunker: Chunker,
                    "be uploaded. "
                    "When set to True, the upsert process will continue on failure, as "
                    "long as less than 10% of the documents have failed to be uploaded.")
-@click.option("--config", "-c", default=None,
-              help="Path to a Canopy config file. Optional, otherwise configuration "
-                   "defaults will be used.")
+@click.option("--config", "-c", default=None, envvar="CANOPY_CONFIG_FILE",
+              help="Path to a canopy config file. Can also be set by the "
+                   "`CANOPY_CONFIG_FILE` envrionment variable. Otherwise, the built-in"
+                   "defualt configuration will be used.")
 def upsert(index_name: str,
            data_path: str,
            allow_failures: bool,
@@ -381,13 +383,23 @@ def _chat(
     model,
     history,
     message,
+    openai_api_key=None,
     api_base=None,
     stream=True,
     print_debug_info=False,
 ):
+    if openai_api_key is None:
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+    if openai_api_key is None and api_base is None:
+        raise CLIError(
+            "No OpenAI API key provided. When using the `--no-rag` flag "
+            "You will need to have a valid OpenAI API key. "
+            "Please set the OPENAI_API_KEY environment "
+            "variable."
+        )
     output = ""
     history += [{"role": "user", "content": message}]
-    client = openai.OpenAI(base_url=api_base)
+    client = openai.OpenAI(base_url=api_base, api_key=openai_api_key)
 
     start = time.time()
     try:
@@ -404,24 +416,24 @@ def _chat(
     if stream:
         for chunk in openai_response:
             openai_response_id = chunk.id
-            intenal_model = chunk.model
+            internal_model = chunk.model
             text = chunk.choices[0].delta.content or ""
             output += text
             click.echo(text, nl=False)
         click.echo()
         debug_info = ChatDebugInfo(
             id=openai_response_id,
-            intenal_model=intenal_model,
+            internal_model=internal_model,
             duration_in_sec=round(duration_in_sec, 2),
         )
     else:
-        intenal_model = openai_response.model
+        internal_model = openai_response.model
         text = openai_response.choices[0].message.content or ""
         output = text
         click.echo(text, nl=False)
         debug_info = ChatDebugInfo(
             id=openai_response.id,
-            intenal_model=intenal_model,
+            internal_model=internal_model,
             duration_in_sec=duration_in_sec,
             prompt_tokens=openai_response.usage.prompt_tokens,
             generated_tokens=openai_response.usage.completion_tokens,
@@ -468,18 +480,20 @@ def chat(chat_server_url, rag, debug, stream):
     )
     for c in note_msg:
         click.echo(click.style(c, fg="red"), nl=False)
-        time.sleep(0.01)
+        if (stream):
+            time.sleep(0.01)
     click.echo()
     note_white_message = (
-        "This method should be used by developers to test the RAG data and model"
+        "This method should be used by developers to test the RAG data and model "
         "during development. "
         "When you are ready to deploy, run the Canopy server as a REST API "
         "backend for your chatbot UI. \n\n"
         "Let's Chat!"
     )
     for c in note_white_message:
         click.echo(click.style(c, fg="white"), nl=False)
-        time.sleep(0.01)
+        if (stream):
+            time.sleep(0.01)
     click.echo()
 
     history_with_pinecone = []
@@ -514,6 +528,7 @@ def chat(chat_server_url, rag, debug, stream):
             history=history_with_pinecone,
             message=message,
             stream=stream,
+            openai_api_key="canopy",
             api_base=chat_server_url,
             print_debug_info=debug,
         )
@@ -522,7 +537,7 @@ def chat(chat_server_url, rag, debug, stream):
             _ = _chat(
                 speaker="Without Context (No RAG)",
                 speaker_color="yellow",
-                model=dubug_info.intenal_model,
+                model=dubug_info.internal_model,
                 history=history_without_pinecone,
                 message=message,
                 stream=stream,
@@ -553,18 +568,21 @@ def chat(chat_server_url, rag, debug, stream):
         """
     )
 )
+@click.option("--stream/--no-stream", default=True,
+              help="Stream the response from the RAG chatbot word by word.")
 @click.option("--host", default="0.0.0.0",
               help="Hostname or address to bind the server to. Defaults to 0.0.0.0")
 @click.option("--port", default=8000,
               help="TCP port to bind the server to. Defaults to 8000")
 @click.option("--reload/--no-reload", default=False,
               help="Set the server to reload on code changes. Defaults to False")
-@click.option("--config", "-c", default=None,
-              help="Path to a canopy config file. Optional, otherwise configuration "
-                   "defaults will be used.")
+@click.option("--config", "-c", default=None, envvar="CANOPY_CONFIG_FILE",
+              help="Path to a canopy config file. Can also be set by the "
+                   "`CANOPY_CONFIG_FILE` envrionment variable. Otherwise, the built-in"
+                   "defualt configuration will be used.")
 @click.option("--index-name", default=None,
               help="Index name, if not provided already as an environment variable.")
-def start(host: str, port: str, reload: bool,
+def start(host: str, port: str, reload: bool, stream: bool,
           config: Optional[str], index_name: Optional[str]):
     validate_pinecone_connection()
     _validate_chat_engine(config)
@@ -584,7 +602,8 @@ def start(host: str, port: str, reload: bool,
     )
     for c in note_msg + msg_suffix:
         click.echo(click.style(c, fg="red"), nl=False)
-        time.sleep(0.01)
+        if (stream):
+            time.sleep(0.01)
     click.echo()
 
     if index_name:

diff --git a/src/canopy_server/models/v1/api_models.py b/src/canopy_server/models/v1/api_models.py
@@ -57,7 +57,7 @@ class HealthStatus(BaseModel):
 class ChatDebugInfo(BaseModel):
     id: str
     duration_in_sec: float
-    intenal_model: str
+    internal_model: str
     prompt_tokens: Optional[int] = None
     generated_tokens: Optional[int] = None