From d054183cb64889b0aaa9d355485cb1c428c9cc6c Mon Sep 17 00:00:00 2001 From: Vyacheslav Morov Date: Tue, 22 Oct 2024 21:09:43 +0200 Subject: [PATCH] Fix llm judge example and fix ignore for llm examples. --- .github/workflows/examples.yml | 2 +- example_test.py | 4 ++-- .../how_to_use_llm_judge_template.ipynb | 11 +++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index b36aa7e80e..6f4062502b 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -42,7 +42,7 @@ jobs: if: matrix.minimal run: pip install -r requirements.min.txt - name: Prepare examples dependencies - run: pip install catboost sentence-transformers + run: pip install catboost sentence-transformers openai - name: Export examples run: jupyter nbconvert --to python examples/*/*.ipynb --output-dir example_scripts - name: Run examples diff --git a/example_test.py b/example_test.py index 9132656429..ae4c71ffe4 100644 --- a/example_test.py +++ b/example_test.py @@ -12,8 +12,8 @@ "comparing_custom_statest_with_classic_distributions.py", "how_to_evaluate_llm_with_text_descriptors.py", "how_to_run_drift_report_for_text_data.py", # too slow & torch version conflict? - "llm_evaluation_tutorial.ipynb", # cloud usage - "llm_tracing_tutorial.ipynb", # cloud usage + "llm_evaluation_tutorial.py", # cloud usage + "llm_tracing_tutorial.py", # cloud usage ] diff --git a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb index d6f57a6ea5..3623818fa7 100644 --- a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb +++ b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb @@ -191,9 +191,7 @@ "id": "204d90a4-694e-406b-949a-f7ba3b601eac", "metadata": {}, "outputs": [], - "source": [ - "print(ToxicityLLMEval().get_template().get_prompt_template())" - ] + "source": "print(ToxicityLLMEval().get_template().get_template())" }, { "cell_type": "code", @@ -308,7 +306,7 @@ "source": [ "#that's how you can see the prompt\n", "\n", - "print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())" + "print(ContextQualityLLMEval(question=\"question\").get_template().get_template())" ] }, { @@ -414,12 +412,13 @@ " pre_messages=[(\"system\", \"You are a judge which evaluates text.\")],\n", " ),\n", " provider = \"openai\",\n", - " model = \"gpt-4o-mini\"\n", + " model = \"gpt-4o-mini\",\n", + " display_name=\"test\"\n", ")\n", "\n", "report = Report(metrics=[\n", " TextEvals(column_name=\"response\", descriptors=[\n", - " custom_judge(display_name=\"test\")\n", + " custom_judge\n", " ])\n", "])\n", "\n",