From ea9f11910ecde61afa1930087df1b12a087b4b64 Mon Sep 17 00:00:00 2001 From: Gustavo Cid Ornelas Date: Wed, 25 Sep 2024 10:16:46 -0300 Subject: [PATCH 1/2] chore: show how to log context in RAG notebook example --- examples/tracing/rag/rag_tracing.ipynb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/examples/tracing/rag/rag_tracing.ipynb b/examples/tracing/rag/rag_tracing.ipynb index febf6710..a6bf01b2 100644 --- a/examples/tracing/rag/rag_tracing.ipynb +++ b/examples/tracing/rag/rag_tracing.ipynb @@ -19,7 +19,6 @@ "outputs": [], "source": [ "import os\n", - "import openai\n", "\n", "# OpenAI env variables\n", "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n", @@ -58,13 +57,12 @@ "metadata": {}, "outputs": [], "source": [ - "import random\n", - "import time\n", + "from typing import List\n", "\n", "import numpy as np\n", "from openai import OpenAI\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "from openlayer.lib import trace, trace_openai" ] @@ -93,13 +91,13 @@ "\n", " Answers to a user query with the LLM.\n", " \"\"\"\n", - " context = self.retrieve_context(user_query)\n", + " context = self.retrieve_contexts(user_query)\n", " prompt = self.inject_prompt(user_query, context)\n", " answer = self.generate_answer_with_gpt(prompt)\n", " return answer\n", "\n", " @trace()\n", - " def retrieve_context(self, query: str) -> str:\n", + " def retrieve_contexts(self, query: str) -> List[str]:\n", " \"\"\"Context retriever.\n", "\n", " Given the query, returns the most similar context (using TFIDF).\n", @@ -107,17 +105,21 @@ " query_vector = self.vectorizer.transform([query])\n", " cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()\n", " most_relevant_idx = np.argmax(cosine_similarities)\n", - " return self.context_sections[most_relevant_idx]\n", + " contexts = [self.context_sections[most_relevant_idx]]\n", + " return contexts\n", "\n", - " @trace()\n", - " def inject_prompt(self, query: str, context: str):\n", + " # You can also specify the name of the `context_kwarg` to unlock RAG metrics that\n", + " # evaluate the performance of the context retriever. The value of the `context_kwarg`\n", + " # should be a list of strings.\n", + " @trace(context_kwarg=\"contexts\")\n", + " def inject_prompt(self, query: str, contexts: List[str]) -> List[dict]:\n", " \"\"\"Combines the query with the context and returns\n", " the prompt (formatted to conform with OpenAI models).\"\"\"\n", " return [\n", " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", " {\n", " \"role\": \"user\",\n", - " \"content\": f\"Answer the user query using only the following context: {context}. \\nUser query: {query}\",\n", + " \"content\": f\"Answer the user query using only the following context: {contexts[0]}. \\nUser query: {query}\",\n", " },\n", " ]\n", "\n", @@ -172,7 +174,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f960a36f-3438-4c81-8cdb-ca078aa509cd", + "id": "a45d5562", "metadata": {}, "outputs": [], "source": [] From c431a9de350edbf936af62d0e672f99beecb0165 Mon Sep 17 00:00:00 2001 From: Gustavo Cid Ornelas Date: Wed, 25 Sep 2024 10:38:37 -0300 Subject: [PATCH 2/2] fix: make sure that context logging works in development mode --- src/openlayer/lib/core/base_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/openlayer/lib/core/base_model.py b/src/openlayer/lib/core/base_model.py index a105e0bf..306526ff 100644 --- a/src/openlayer/lib/core/base_model.py +++ b/src/openlayer/lib/core/base_model.py @@ -42,9 +42,7 @@ class OpenlayerModel(abc.ABC): def run_from_cli(self) -> None: """Run the model from the command line.""" parser = argparse.ArgumentParser(description="Run data through a model.") - parser.add_argument( - "--dataset-path", type=str, required=True, help="Path to the dataset" - ) + parser.add_argument("--dataset-path", type=str, required=True, help="Path to the dataset") parser.add_argument( "--output-dir", type=str, @@ -87,9 +85,7 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: # Filter row_dict to only include keys that are valid parameters # for the 'run' method row_dict = row.to_dict() - filtered_kwargs = { - k: v for k, v in row_dict.items() if k in run_signature.parameters - } + filtered_kwargs = {k: v for k, v in row_dict.items() if k in run_signature.parameters} # Call the run method with filtered kwargs output = self.run(**filtered_kwargs) @@ -111,6 +107,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: df.at[index, "cost"] = processed_trace["cost"] if "tokens" in processed_trace: df.at[index, "tokens"] = processed_trace["tokens"] + if "context" in processed_trace: + df.at[index, "context"] = processed_trace["context"] config = { "outputColumnName": "output", @@ -126,6 +124,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: config["costColumnName"] = "cost" if "tokens" in df.columns: config["numOfTokenColumnName"] = "tokens" + if "context" in df.columns: + config["contextColumnName"] = "context" return df, config