Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: show how to log context in RAG notebook example #345

Merged
merged 2 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions examples/tracing/rag/rag_tracing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"outputs": [],
"source": [
"import os\n",
"import openai\n",
"\n",
"# OpenAI env variables\n",
"os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n",
Expand Down Expand Up @@ -58,13 +57,12 @@
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import time\n",
"from typing import List\n",
"\n",
"import numpy as np\n",
"from openai import OpenAI\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"from openlayer.lib import trace, trace_openai"
]
Expand Down Expand Up @@ -93,31 +91,35 @@
"\n",
" Answers to a user query with the LLM.\n",
" \"\"\"\n",
" context = self.retrieve_context(user_query)\n",
" context = self.retrieve_contexts(user_query)\n",
" prompt = self.inject_prompt(user_query, context)\n",
" answer = self.generate_answer_with_gpt(prompt)\n",
" return answer\n",
"\n",
" @trace()\n",
" def retrieve_context(self, query: str) -> str:\n",
" def retrieve_contexts(self, query: str) -> List[str]:\n",
" \"\"\"Context retriever.\n",
"\n",
" Given the query, returns the most similar context (using TFIDF).\n",
" \"\"\"\n",
" query_vector = self.vectorizer.transform([query])\n",
" cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()\n",
" most_relevant_idx = np.argmax(cosine_similarities)\n",
" return self.context_sections[most_relevant_idx]\n",
" contexts = [self.context_sections[most_relevant_idx]]\n",
" return contexts\n",
"\n",
" @trace()\n",
" def inject_prompt(self, query: str, context: str):\n",
" # You can also specify the name of the `context_kwarg` to unlock RAG metrics that\n",
" # evaluate the performance of the context retriever. The value of the `context_kwarg`\n",
" # should be a list of strings.\n",
" @trace(context_kwarg=\"contexts\")\n",
" def inject_prompt(self, query: str, contexts: List[str]) -> List[dict]:\n",
" \"\"\"Combines the query with the context and returns\n",
" the prompt (formatted to conform with OpenAI models).\"\"\"\n",
" return [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": f\"Answer the user query using only the following context: {context}. \\nUser query: {query}\",\n",
" \"content\": f\"Answer the user query using only the following context: {contexts[0]}. \\nUser query: {query}\",\n",
" },\n",
" ]\n",
"\n",
Expand Down Expand Up @@ -172,7 +174,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "f960a36f-3438-4c81-8cdb-ca078aa509cd",
"id": "a45d5562",
"metadata": {},
"outputs": [],
"source": []
Expand Down
12 changes: 6 additions & 6 deletions src/openlayer/lib/core/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ class OpenlayerModel(abc.ABC):
def run_from_cli(self) -> None:
"""Run the model from the command line."""
parser = argparse.ArgumentParser(description="Run data through a model.")
parser.add_argument(
"--dataset-path", type=str, required=True, help="Path to the dataset"
)
parser.add_argument("--dataset-path", type=str, required=True, help="Path to the dataset")
parser.add_argument(
"--output-dir",
type=str,
Expand Down Expand Up @@ -87,9 +85,7 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
# Filter row_dict to only include keys that are valid parameters
# for the 'run' method
row_dict = row.to_dict()
filtered_kwargs = {
k: v for k, v in row_dict.items() if k in run_signature.parameters
}
filtered_kwargs = {k: v for k, v in row_dict.items() if k in run_signature.parameters}

# Call the run method with filtered kwargs
output = self.run(**filtered_kwargs)
Expand All @@ -111,6 +107,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
df.at[index, "cost"] = processed_trace["cost"]
if "tokens" in processed_trace:
df.at[index, "tokens"] = processed_trace["tokens"]
if "context" in processed_trace:
df.at[index, "context"] = processed_trace["context"]

config = {
"outputColumnName": "output",
Expand All @@ -126,6 +124,8 @@ def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
config["costColumnName"] = "cost"
if "tokens" in df.columns:
config["numOfTokenColumnName"] = "tokens"
if "context" in df.columns:
config["contextColumnName"] = "context"

return df, config

Expand Down