openlayer-ai · whoseoyster · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/examples/tracing/rag/rag_tracing.ipynb b/examples/tracing/rag/rag_tracing.ipynb
@@ -19,7 +19,6 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import openai\n",
     "\n",
     "# OpenAI env variables\n",
     "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n",
@@ -58,13 +57,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import random\n",
-    "import time\n",
+    "from typing import List\n",
     "\n",
     "import numpy as np\n",
     "from openai import OpenAI\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
     "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
     "\n",
     "from openlayer.lib import trace, trace_openai"
    ]
@@ -93,31 +91,35 @@
     "\n",
     "        Answers to a user query with the LLM.\n",
     "        \"\"\"\n",
-    "        context = self.retrieve_context(user_query)\n",
+    "        context = self.retrieve_contexts(user_query)\n",
     "        prompt = self.inject_prompt(user_query, context)\n",
     "        answer = self.generate_answer_with_gpt(prompt)\n",
     "        return answer\n",
     "\n",
     "    @trace()\n",
-    "    def retrieve_context(self, query: str) -> str:\n",
+    "    def retrieve_contexts(self, query: str) -> List[str]:\n",
     "        \"\"\"Context retriever.\n",
     "\n",
     "        Given the query, returns the most similar context (using TFIDF).\n",
     "        \"\"\"\n",
     "        query_vector = self.vectorizer.transform([query])\n",
     "        cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()\n",
     "        most_relevant_idx = np.argmax(cosine_similarities)\n",
-    "        return self.context_sections[most_relevant_idx]\n",
+    "        contexts = [self.context_sections[most_relevant_idx]]\n",
+    "        return contexts\n",
     "\n",
-    "    @trace()\n",
-    "    def inject_prompt(self, query: str, context: str):\n",
+    "    # You can also specify the name of the `context_kwarg` to unlock RAG metrics that\n",
+    "    # evaluate the performance of the context retriever. The value of the `context_kwarg`\n",
+    "    # should be a list of strings.\n",
+    "    @trace(context_kwarg=\"contexts\")\n",
+    "    def inject_prompt(self, query: str, contexts: List[str]) -> List[dict]:\n",
     "        \"\"\"Combines the query with the context and returns\n",
     "        the prompt (formatted to conform with OpenAI models).\"\"\"\n",
     "        return [\n",
     "            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
     "            {\n",
     "                \"role\": \"user\",\n",
-    "                \"content\": f\"Answer the user query using only the following context: {context}. \\nUser query: {query}\",\n",
+    "                \"content\": f\"Answer the user query using only the following context: {contexts[0]}. \\nUser query: {query}\",\n",
     "            },\n",
     "        ]\n",
     "\n",
@@ -172,7 +174,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f960a36f-3438-4c81-8cdb-ca078aa509cd",
+   "id": "a45d5562",
    "metadata": {},
    "outputs": [],
    "source": []

diff --git a/src/openlayer/lib/core/base_model.py b/src/openlayer/lib/core/base_model.py
@@ -42,9 +42,7 @@ class OpenlayerModel(abc.ABC):
     def run_from_cli(self) -> None:
         """Run the model from the command line."""
         parser = argparse.ArgumentParser(description="Run data through a model.")
-        parser.add_argument(
-            "--dataset-path", type=str, required=True, help="Path to the dataset"
-        )
+        parser.add_argument("--dataset-path", type=str, required=True, help="Path to the dataset")
         parser.add_argument(
             "--output-dir",
             type=str,
@@ -87,9 +85,7 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
             # Filter row_dict to only include keys that are valid parameters
             # for the 'run' method
             row_dict = row.to_dict()
-            filtered_kwargs = {
-                k: v for k, v in row_dict.items() if k in run_signature.parameters
-            }
+            filtered_kwargs = {k: v for k, v in row_dict.items() if k in run_signature.parameters}
 
             # Call the run method with filtered kwargs
             output = self.run(**filtered_kwargs)
@@ -111,6 +107,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
                     df.at[index, "cost"] = processed_trace["cost"]
                 if "tokens" in processed_trace:
                     df.at[index, "tokens"] = processed_trace["tokens"]
+                if "context" in processed_trace:
+                    df.at[index, "context"] = processed_trace["context"]
 
         config = {
             "outputColumnName": "output",
@@ -126,6 +124,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
             config["costColumnName"] = "cost"
         if "tokens" in df.columns:
             config["numOfTokenColumnName"] = "tokens"
+        if "context" in df.columns:
+            config["contextColumnName"] = "context"
 
         return df, config