From d3f73159fcac64edfe04df304796c594d65c6873 Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Tue, 13 Aug 2024 15:15:56 +0100
Subject: [PATCH 01/10] refactor (backend): improve error handling for
 auto_contains_json evaluator

---
 .../agenta_backend/routers/evaluators_router.py     |  4 ++--
 .../agenta_backend/services/evaluators_service.py   | 13 +++++++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/agenta-backend/agenta_backend/routers/evaluators_router.py b/agenta-backend/agenta_backend/routers/evaluators_router.py
index afc825854..dcfe672d5 100644
--- a/agenta-backend/agenta_backend/routers/evaluators_router.py
+++ b/agenta-backend/agenta_backend/routers/evaluators_router.py
@@ -103,11 +103,11 @@ async def evaluator_run(
         )
         return result
     except Exception as e:
-        logger.error(f"Error while running evaluator: {str(e)}")
+        logger.error(f"Error while running {evaluator_key} evaluator: {str(e)}")
         raise HTTPException(
             status_code=500,
             detail={
-                "message": "Error while running evaluator",
+                "message": f"Error while running {evaluator_key} evaluator",
                 "stacktrace": traceback.format_exc(),
             },
         )
diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index f15825357..2e378c2ba 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -607,9 +607,18 @@ async def auto_contains_json(
     settings_values: Dict[str, Any],  # pylint: disable=unused-argument
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        if not isinstance(output, str):
+            # Attempt to retrieve 'data' key from output if it's a dictionary
+            output = output.get("data", "") if isinstance(output, dict) else output
+
+            # If output is still not a string, raise an exception
+            if not isinstance(output, str):
+                raise Exception(
+                    f"Evaluator 'contains_json' requires the output to be a string, but received {type(output).__name__} instead. "
+                    f"Please ensure the output of the application is a valid string, or that the 'data' key in the dictionary contains a string."
+                )
+
         response = await contains_json(
             input=EvaluatorInputInterface(**{"inputs": {"prediction": output}})
         )

From 08b9e8774d02c477a2f58be6b01ecc7f177ac176 Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Tue, 13 Aug 2024 15:17:56 +0100
Subject: [PATCH 02/10] feat (tests): add tests for dictionary-based output
 handling in contains_json evaluator

---
 agenta-backend/agenta_backend/tests/unit/test_evaluators.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
index c0bfbfade..418299ddf 100644
--- a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
+++ b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
@@ -179,6 +179,9 @@ async def test_auto_contains_all(output, substrings, case_sensitive, expected):
         ("No JSON here!", False),
         ("{Malformed JSON, nope!}", False),
         ('{"valid": "json", "number": 123}', True),
+        ({"data": {"message": "The capital of Azerbaijan is Baku."}}, None),
+        ({"data": '{"message": "The capital of Azerbaijan is Baku."}'}, True),
+        ({"data": "The capital of Azerbaijan is Baku."}, False),
     ],
 )
 @pytest.mark.asyncio

From d1fe5aac7b58535dd52c2254f1b3b10d411b92b9 Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Tue, 20 Aug 2024 11:25:19 +0100
Subject: [PATCH 03/10] chore (backend): remove redundant error message

---
 agenta-backend/agenta_backend/services/evaluators_service.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index 2e378c2ba..e4d96e72c 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -616,7 +616,6 @@ async def auto_contains_json(
             if not isinstance(output, str):
                 raise Exception(
                     f"Evaluator 'contains_json' requires the output to be a string, but received {type(output).__name__} instead. "
-                    f"Please ensure the output of the application is a valid string, or that the 'data' key in the dictionary contains a string."
                 )
 
         response = await contains_json(

From 23be8b6edd5dab2c0af9e30dadd3fe68ff53636e Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Wed, 21 Aug 2024 21:04:07 +0100
Subject: [PATCH 04/10] refactor (backend): centralize validation of string and
 json output and use functions in evaluators

---
 .../services/evaluators_service.py            | 136 +++++++++++-------
 1 file changed, 81 insertions(+), 55 deletions(-)

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index 413238871..e3bd9ae6a 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -29,6 +29,69 @@
 logger.setLevel(logging.DEBUG)
 
 
+def validate_string_output(
+    evaluator_key: str, output: Union[str, Dict[str, Any]]
+) -> str:
+    """Checks and validate the output to be of type string.
+
+    Args:
+        evaluator_key (str): the key of the evaluator
+        output (Union[str, Dict[str, Any]]): the llm response
+
+    Raises:
+        Exception: requires output to be a string
+
+    Returns:
+        str: output
+    """
+
+    output = output.get("data", "") if isinstance(output, dict) else output
+    if not isinstance(output, str):
+        raise Exception(
+            f"Evaluator {evaluator_key} requires the output to be a string, but received {type(output).__name__} instead. "
+        )
+    return output
+
+
+def validate_json_output(evaluator_key: str, output: Union[str, Dict[str, Any]]) -> str:
+    """Checks and validate the output to be of type JSON string.
+
+    Args:
+        evaluator_key (str): the key of the evaluator
+        output (Union[str, Dict[str, Any]]): the llm response
+
+    Raises:
+        Exception: requires output to be a JSON string
+
+    Returns:
+        str: output
+    """
+
+    output = output.get("data", "") if isinstance(output, dict) else output
+    if isinstance(output, dict):
+        output = json.dumps(output)
+    elif isinstance(output, str):
+        try:
+            json.loads(output)
+        except json.JSONDecodeError:
+            raise Exception(
+                f"Evaluator {evaluator_key} requires the output to be a JSON string."
+            )
+
+    if not isinstance(
+        output,
+        (
+            str,
+            dict,
+        ),
+    ):
+        raise Exception(
+            f"Evaluator {evaluator_key} requires the output to be either a JSON string or object, but received {type(output).__name__} instead."
+        )
+
+    return output
+
+
 async def map(
     mapping_input: EvaluatorMappingInputInterface,
 ) -> EvaluatorMappingOutputInterface:
@@ -94,9 +157,9 @@ async def auto_exact_match(
     Returns:
         Result: A Result object containing the evaluation result.
     """
-    if not isinstance(output, str):
-        output = output.get("data", "")
+
     try:
+        output = validate_string_output("exact_match", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {"ground_truth": correct_answer, "prediction": output}
         response = exact_match(input=EvaluatorInputInterface(**{"inputs": inputs}))
@@ -136,9 +199,8 @@ async def auto_regex_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("regex_test", output)
         inputs = {"ground_truth": data_point, "prediction": output}
         response = await regex_test(
             input=EvaluatorInputInterface(
@@ -174,9 +236,8 @@ async def auto_field_match_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("field_match_test", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {"ground_truth": correct_answer, "prediction": output}
         response = await field_match_test(
@@ -210,9 +271,8 @@ async def auto_webhook_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("webhook_test", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {"prediction": output, "ground_truth": correct_answer}
         response = await webhook_test(
@@ -272,9 +332,8 @@ async def auto_custom_code_run(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("custom_code_run", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {
             "app_config": app_params,
@@ -332,9 +391,9 @@ async def auto_ai_critique(
     Returns:
         Result: Evaluation result.
     """
-    if not isinstance(output, str):
-        output = output.get("data", "")
+
     try:
+        output = validate_string_output("ai_critique", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {
             "prompt_user": app_params.get("prompt_user", ""),
@@ -391,9 +450,8 @@ async def auto_starts_with(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("starts_with", output)
         inputs = {"prediction": output}
         response = await starts_with(
             input=EvaluatorInputInterface(
@@ -433,9 +491,8 @@ async def auto_ends_with(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("ends_with", output)
         inputs = {"prediction": output}
         response = await ends_with(
             input=EvaluatorInputInterface(
@@ -476,9 +533,8 @@ async def auto_contains(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("contains", output)
         inputs = {"prediction": output}
         response = await contains(
             input=EvaluatorInputInterface(
@@ -519,9 +575,8 @@ async def auto_contains_any(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("contains_any", output)
         inputs = {"prediction": output}
         response = await contains_any(
             input=EvaluatorInputInterface(
@@ -564,9 +619,8 @@ async def auto_contains_all(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("contains_all", output)
         response = await contains_all(
             input=EvaluatorInputInterface(
                 **{"inputs": {"prediction": output}, "settings": settings_values}
@@ -608,16 +662,7 @@ async def auto_contains_json(
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
     try:
-        if not isinstance(output, str):
-            # Attempt to retrieve 'data' key from output if it's a dictionary
-            output = output.get("data", "") if isinstance(output, dict) else output
-
-            # If output is still not a string, raise an exception
-            if not isinstance(output, str):
-                raise Exception(
-                    f"Evaluator 'contains_json' requires the output to be a string, but received {type(output).__name__} instead. "
-                )
-
+        output = validate_json_output("contains_json", output)
         response = await contains_json(
             input=EvaluatorInputInterface(**{"inputs": {"prediction": output}})
         )
@@ -758,22 +803,7 @@ async def auto_json_diff(
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
     try:
-        output = output.get("data", "") if isinstance(output, dict) else output
-
-        if isinstance(output, dict):
-            output = json.dumps(output)
-        elif isinstance(output, str):
-            try:
-                json.loads(output)
-            except:
-                raise Exception(
-                    f"Evaluator 'auto_json_diff' requires string outputs to be JSON strings."
-                )
-        else:
-            raise Exception(
-                f"Evaluator 'auto_json_diff' requires the output to be either a JSON string or a JSON object, but received {type(output).__name__} instead."
-            )
-
+        output = validate_json_output("json_diff", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         response = await json_diff(
             input=EvaluatorInputInterface(
@@ -1043,9 +1073,8 @@ async def auto_levenshtein_distance(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("levenshtein_distance", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         response = await levenshtein_distance(
             input=EvaluatorInputInterface(
@@ -1086,9 +1115,8 @@ async def auto_similarity_match(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("similarity_match", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         response = await similarity_match(
             input=EvaluatorInputInterface(
@@ -1168,10 +1196,8 @@ async def auto_semantic_similarity(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
-
     try:
+        output = validate_string_output("semantic_similarity", output)
         correct_answer = get_correct_answer(data_point, settings_values)
         inputs = {"prediction": output, "ground_truth": correct_answer}
         response = await semantic_similarity(

From b6db4f15d4891bdf97fc9d4540a17af53a589ede Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Wed, 21 Aug 2024 21:09:19 +0100
Subject: [PATCH 05/10] feat (tests): update parameters for BaseResponse
 compatibility and reflect changes in test cases

- Added parameters in 'test_auto_json_diff' for BaseResponse compatibility
- Updated parameters in 'test_auto_contains_json' to align with recent changes
---
 .../tests/unit/test_evaluators.py             | 47 ++++++++++++++++---
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
index 87388eca4..eba88ac1f 100644
--- a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
+++ b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
@@ -1,8 +1,7 @@
 import os
 import pytest
 
-from test_traces import simple_rag_trace
-
+from agenta_backend.tests.unit.test_traces import simple_rag_trace
 from agenta_backend.services.evaluators_service import (
     auto_levenshtein_distance,
     auto_starts_with,
@@ -175,13 +174,13 @@ async def test_auto_contains_all(output, substrings, case_sensitive, expected):
 @pytest.mark.parametrize(
     "output, expected",
     [
-        ('Some random text {"key": "value"} more text', True),
-        ("No JSON here!", False),
-        ("{Malformed JSON, nope!}", False),
+        ('Some random text {"key": "value"} more text', None),
+        ("No JSON here!", None),
+        ("{Malformed JSON, nope!}", None),
         ('{"valid": "json", "number": 123}', True),
-        ({"data": {"message": "The capital of Azerbaijan is Baku."}}, None),
+        ({"data": {"message": "The capital of Azerbaijan is Baku."}}, True),
         ({"data": '{"message": "The capital of Azerbaijan is Baku."}'}, True),
-        ({"data": "The capital of Azerbaijan is Baku."}, False),
+        ({"data": "The capital of Azerbaijan is Baku."}, None),
     ],
 )
 @pytest.mark.asyncio
@@ -235,6 +234,40 @@ async def test_auto_contains_json(output, expected):
             0.0,
             1.0,
         ),
+        (
+            {
+                "correct_answer": '{"user": {"name": "John", "details": {"age": 30, "location": "New York"}}}'
+            },
+            {
+                "data": '{"USER": {"NAME": "John", "DETAILS": {"AGE": 30, "LOCATION": "New York"}}}'
+            },
+            {
+                "predict_keys": True,
+                "compare_schema_only": False,
+                "case_insensitive_keys": True,
+                "correct_answer_key": "correct_answer",
+            },
+            0.0,
+            1.0,
+        ),
+        (
+            {
+                "correct_answer": '{"user": {"name": "John", "details": {"age": 30, "location": "New York"}}}'
+            },
+            {
+                "data": {
+                    "output": '{"USER": {"NAME": "John", "DETAILS": {"AGE": 30, "LOCATION": "New York"}}}'
+                }
+            },
+            {
+                "predict_keys": True,
+                "compare_schema_only": False,
+                "case_insensitive_keys": True,
+                "correct_answer_key": "correct_answer",
+            },
+            0.0,
+            1.0,
+        ),
     ],
 )
 @pytest.mark.asyncio

From 80f3effb11db8cc992d6ba7171e44a2bbe3b896a Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Wed, 21 Aug 2024 21:10:58 +0100
Subject: [PATCH 06/10] minor refactor (backend): update 'validate_json_output'
 function return-type and docstring

---
 .../agenta_backend/services/evaluators_service.py           | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index e3bd9ae6a..207860146 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -53,8 +53,8 @@ def validate_string_output(
     return output
 
 
-def validate_json_output(evaluator_key: str, output: Union[str, Dict[str, Any]]) -> str:
-    """Checks and validate the output to be of type JSON string.
+def validate_json_output(evaluator_key: str, output: Union[str, Dict[str, Any]]) -> Union[str, dict]:
+    """Checks and validate the output to be of type JSON string or dictionary.
 
     Args:
         evaluator_key (str): the key of the evaluator
@@ -64,7 +64,7 @@ def validate_json_output(evaluator_key: str, output: Union[str, Dict[str, Any]])
         Exception: requires output to be a JSON string
 
     Returns:
-        str: output
+        str, dict: output
     """
 
     output = output.get("data", "") if isinstance(output, dict) else output

From 892a351be6d1fb72b68d9020e275e6b017e1de13 Mon Sep 17 00:00:00 2001
From: Abram <israelvictory87@gmail.com>
Date: Wed, 21 Aug 2024 21:12:19 +0100
Subject: [PATCH 07/10] chore (style): format evaluators_service with
 black@23.12.0

---
 agenta-backend/agenta_backend/services/evaluators_service.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index 207860146..0f70a32a5 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -53,7 +53,9 @@ def validate_string_output(
     return output
 
 
-def validate_json_output(evaluator_key: str, output: Union[str, Dict[str, Any]]) -> Union[str, dict]:
+def validate_json_output(
+    evaluator_key: str, output: Union[str, Dict[str, Any]]
+) -> Union[str, dict]:
     """Checks and validate the output to be of type JSON string or dictionary.
 
     Args:

From 3cad5dbeba185be7375a8a419cbef138d6fa4138 Mon Sep 17 00:00:00 2001
From: Juan Pablo Vega <jp@agenta.ai>
Date: Fri, 23 Aug 2024 13:11:47 +0200
Subject: [PATCH 08/10] Enforce in Union[str, Dict[str, Any]] in BaseResponse
 in SDK

---
 .../agenta/sdk/decorators/llm_entrypoint.py   | 21 ++++++++++++-------
 agenta-cli/pyproject.toml                     |  2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/agenta-cli/agenta/sdk/decorators/llm_entrypoint.py b/agenta-cli/agenta/sdk/decorators/llm_entrypoint.py
index 025b55a9b..e7f6cf9dd 100644
--- a/agenta-cli/agenta/sdk/decorators/llm_entrypoint.py
+++ b/agenta-cli/agenta/sdk/decorators/llm_entrypoint.py
@@ -216,9 +216,11 @@ async def wrapper(*args, **kwargs) -> Any:
                 {
                     "func": func.__name__,
                     "endpoint": route,
-                    "params": {**config_params, **func_signature.parameters}
-                    if not config
-                    else func_signature.parameters,
+                    "params": (
+                        {**config_params, **func_signature.parameters}
+                        if not config
+                        else func_signature.parameters
+                    ),
                     "config": config,
                 }
             )
@@ -229,9 +231,11 @@ async def wrapper(*args, **kwargs) -> Any:
             {
                 "func": func.__name__,
                 "endpoint": route,
-                "params": {**config_params, **func_signature.parameters}
-                if not config
-                else func_signature.parameters,
+                "params": (
+                    {**config_params, **func_signature.parameters}
+                    if not config
+                    else func_signature.parameters
+                ),
                 "config": config,
             }
         )
@@ -402,7 +406,7 @@ async def execute_function(
 
             # PATCH : if result is not a dict, make it a dict
             if not isinstance(result, dict):
-                data = result
+                data = str(result)
             else:
                 # PATCH : if result is a legacy dict, clean it up
                 if (
@@ -410,7 +414,8 @@ async def execute_function(
                     and "cost" in result.keys()
                     and "usage" in result.keys()
                 ):
-                    data = result["message"]
+                    data = str(result["message"])
+
             # END OF PATH
 
             if data is None:
diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml
index d015e2923..89acb8a67 100644
--- a/agenta-cli/pyproject.toml
+++ b/agenta-cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.24.0"
+version = "0.24.1a0"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = ["Mahmoud Mabrouk <mahmoud@agenta.ai>"]

From 35e6fecc6406ac1b120dadd96313e599616b7104 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 23 Aug 2024 13:36:57 +0100
Subject: [PATCH 09/10] fix(frontend): Migrate Inter font to use @next/font

---
 agenta-web/src/pages/_app.tsx                 |  31 ++--
 .../src/styles/tokens/antd-themeConfig.json   | 152 ++++++------------
 2 files changed, 67 insertions(+), 116 deletions(-)

diff --git a/agenta-web/src/pages/_app.tsx b/agenta-web/src/pages/_app.tsx
index 760acc966..e7eed71ea 100644
--- a/agenta-web/src/pages/_app.tsx
+++ b/agenta-web/src/pages/_app.tsx
@@ -13,6 +13,9 @@ import AppContextProvider from "@/contexts/app.context"
 import ProfileContextProvider from "@/contexts/profile.context"
 import "ag-grid-community/styles/ag-grid.css"
 import "ag-grid-community/styles/ag-theme-alpine.css"
+import {Inter} from "next/font/google"
+
+const inter = Inter({subsets: ["latin"]})
 
 // Initialize the Posthog client
 if (typeof window !== "undefined") {
@@ -44,22 +47,20 @@ export default function App({Component, pageProps}: AppProps) {
             <Head>
                 <title>Agenta: The LLMOps platform.</title>
                 <link rel="shortcut icon" href="/assets/favicon.ico" />
-                <style>
-                    @import
-                    url('https://fonts.googleapis.com/css2?family=Inter:ital,opsz,wght@0,14..32,100..900;1,14..32,100..900&display=swap');
-                </style>
             </Head>
-            <PostHogProvider client={posthog}>
-                <ThemeContextProvider>
-                    <ProfileContextProvider>
-                        <AppContextProvider>
-                            <Layout>
-                                <Component {...pageProps} />
-                            </Layout>
-                        </AppContextProvider>
-                    </ProfileContextProvider>
-                </ThemeContextProvider>
-            </PostHogProvider>
+            <main className={inter.className}>
+                <PostHogProvider client={posthog}>
+                    <ThemeContextProvider>
+                        <ProfileContextProvider>
+                            <AppContextProvider>
+                                <Layout>
+                                    <Component {...pageProps} />
+                                </Layout>
+                            </AppContextProvider>
+                        </ProfileContextProvider>
+                    </ThemeContextProvider>
+                </PostHogProvider>
+            </main>
         </>
     )
 }
diff --git a/agenta-web/src/styles/tokens/antd-themeConfig.json b/agenta-web/src/styles/tokens/antd-themeConfig.json
index 4975d2287..ca186750a 100644
--- a/agenta-web/src/styles/tokens/antd-themeConfig.json
+++ b/agenta-web/src/styles/tokens/antd-themeConfig.json
@@ -281,8 +281,7 @@
         "paddingContentVerticalSM": 8,
         "controlPaddingHorizontal": 12,
         "controlPaddingHorizontalSM": 8,
-        "fontFamily": "Inter",
-        "fontFamilyCode": "Inter",
+
         "fontSize": 12,
         "fontSizeLG": 14,
         "fontSizeSM": 10,
@@ -378,8 +377,7 @@
             "hoverBg": "#ffffff",
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10,
-            "fontFamily": "Inter"
+            "inputFontSizeSM": 10
         },
         "Transfer": {
             "listWidthLG": 250,
@@ -411,8 +409,7 @@
             "colorBgContainerDisabled": "rgba(5, 23, 41, 0.04)",
             "colorBgContainer": "#ffffff",
             "itemHeight": 28,
-            "headerHeight": 34,
-            "fontFamily": "Inter"
+            "headerHeight": 34
         },
         "Segmented": {
             "segmentedBgColorSelected": "#ffffff",
@@ -447,8 +444,7 @@
             "itemColor": "#586673",
             "itemActiveBg": "rgba(5, 23, 41, 0.15)",
             "trackPadding": 2,
-            "trackBg": "#ffffff",
-            "fontFamily": "Inter"
+            "trackBg": "#ffffff"
         },
         "Switch": {
             "trackPadding": 2,
@@ -473,8 +469,7 @@
             "colorPrimaryHover": "#394857",
             "colorPrimaryBorder": "#d6dee6",
             "colorPrimary": "#1c2c3d",
-            "handleBg": "#ffffff",
-            "fontFamily": "Inter"
+            "handleBg": "#ffffff"
         },
         "TimePicker": {
             "timeColumnWidth": 56,
@@ -563,8 +558,7 @@
             "withoutTimeCellHeight": 66,
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10,
-            "fontFamily": "Inter"
+            "inputFontSizeSM": 10
         },
         "Timeline": {
             "itemPaddingBottom": 20,
@@ -587,8 +581,7 @@
             "colorError": "#d61010",
             "colorBgContainer": "#ffffff",
             "tailColor": "rgba(5, 23, 41, 0.06)",
-            "dotBg": "#ffffff",
-            "fontFamily": "Inter"
+            "dotBg": "#ffffff"
         },
         "Tabs": {
             "horizontalItemGutter": 24,
@@ -635,8 +628,7 @@
             "cardBg": "rgba(5, 23, 41, 0.02)",
             "titleFontSize": 12,
             "titleFontSizeLG": 14,
-            "titleFontSizeSM": 12,
-            "fontFamily": "Inter"
+            "titleFontSizeSM": 12
         },
         "Table": {
             "stickyScrollBarBorderRadius": 100,
@@ -699,8 +691,7 @@
             "headerSortHoverBg": "#f0f0f0",
             "cellFontSize": 12,
             "cellFontSizeMD": 12,
-            "cellFontSizeSM": 12,
-            "fontFamily": "Inter"
+            "cellFontSizeSM": 12
         },
         "Steps": {
             "dotSize": 8,
@@ -750,8 +741,7 @@
             "iconSize": 28,
             "finishIconBorderColor": "#1677ff",
             "customIconFontSize": 24,
-            "iconFontSize": 12,
-            "fontFamily": "Inter"
+            "iconFontSize": 12
         },
         "Spin": {
             "dotSizeSM": 14,
@@ -766,8 +756,7 @@
             "colorText": "#1c2c3d",
             "colorPrimary": "#1c2c3d",
             "colorBgContainer": "#ffffff",
-            "dotSizeLG": 28,
-            "fontFamily": "Inter"
+            "dotSizeLG": 28
         },
         "Slider": {
             "railSize": 4,
@@ -804,8 +793,7 @@
             "handleActiveColor": "#1c2c3d",
             "dotBorderColor": "#eaeff5",
             "dotActiveBorderColor": "#d6dee6",
-            "handleColorDisabled": "#bfbfbf",
-            "fontFamily": "Inter"
+            "handleColorDisabled": "#bfbfbf"
         },
         "Rate": {
             "rateStarSize": 20,
@@ -815,8 +803,7 @@
             "fontSize": 12,
             "controlHeightLG": 34,
             "colorText": "#1c2c3d",
-            "colorFillContent": "rgba(5, 23, 41, 0.06)",
-            "fontFamily": "Inter"
+            "colorFillContent": "rgba(5, 23, 41, 0.06)"
         },
         "Radio": {
             "radioSize": 16,
@@ -860,8 +847,7 @@
             "buttonCheckedColorDisabled": "#bdc7d1",
             "buttonCheckedBgDisabled": "rgba(5, 23, 41, 0.15)",
             "buttonCheckedBg": "#ffffff",
-            "buttonBg": "#ffffff",
-            "fontFamily": "Inter"
+            "buttonBg": "#ffffff"
         },
         "Popover": {
             "titleMinWidth": 177,
@@ -879,8 +865,7 @@
             "colorTextHeading": "#1c2c3d",
             "colorText": "#1c2c3d",
             "colorSplit": "rgba(5, 23, 41, 0.06)",
-            "colorBgElevated": "#ffffff",
-            "fontFamily": "Inter"
+            "colorBgElevated": "#ffffff"
         },
         "Notification": {
             "width": 384,
@@ -907,8 +892,7 @@
             "colorError": "#d61010",
             "colorBgElevated": "#ffffff",
             "lineHeightLG": 1.5714285714285714,
-            "lineWidthFocus": 4,
-            "fontFamily": "Inter"
+            "lineWidthFocus": 4
         },
         "Tooltip": {
             "paddingSM": 12,
@@ -921,8 +905,7 @@
             "borderRadius": 8,
             "colorTextLightSolid": "#ffffff",
             "colorText": "#1c2c3d",
-            "colorBgSpotlight": "rgba(5, 23, 41, 0.9)",
-            "fontFamily": "Inter"
+            "colorBgSpotlight": "rgba(5, 23, 41, 0.9)"
         },
         "Menu": {
             "subMenuItemBg": "rgba(0, 0, 0, 0)",
@@ -986,8 +969,7 @@
             "itemMarginInline": 4,
             "iconMarginInlineEnd": 10,
             "groupTitleFontSize": 12,
-            "groupTitleLineHeight": 1.6666666666666667,
-            "fontFamily": "Inter"
+            "groupTitleLineHeight": 1.6666666666666667
         },
         "InputNumber": {
             "paddingInlineSM": 7,
@@ -1039,8 +1021,7 @@
             "hoverBg": "#ffffff",
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10,
-            "fontFamily": "Inter"
+            "inputFontSizeSM": 10
         },
         "Image": {
             "previewOperationSize": 18,
@@ -1086,8 +1067,7 @@
             "headerFontSize": 14,
             "headerFontSizeSM": 12,
             "fontHeight": 22,
-            "fontSizeLG": 14,
-            "fontFamily": "Inter"
+            "fontSizeLG": 14
         },
         "Carousel": {
             "dotWidth": 16,
@@ -1099,8 +1079,7 @@
             "controlHeightSM": 24,
             "controlHeightLG": 34,
             "colorText": "#1c2c3d",
-            "colorBgContainer": "#ffffff",
-            "fontFamily": "Inter"
+            "colorBgContainer": "#ffffff"
         },
         "Cascader": {
             "dropdownHeight": 180,
@@ -1136,8 +1115,7 @@
             "colorBgContainer": "#ffffff",
             "optionSelectedBg": "#f5f7fa",
             "menuPadding": 4,
-            "optionSelectedFontWeight": 600,
-            "fontFamily": "Inter"
+            "optionSelectedFontWeight": 600
         },
         "Calendar": {
             "yearControlWidth": 80,
@@ -1179,8 +1157,7 @@
             "itemActiveBg": "#f5f7fa",
             "fullPanelBg": "#ffffff",
             "fullBg": "#ffffff",
-            "fontHeightSM": 20,
-            "fontFamily": "Inter"
+            "fontHeightSM": 20
         },
         "Button": {
             "paddingInlineSM": 7,
@@ -1272,8 +1249,7 @@
             "textFontSize": 10,
             "textFontSizeSM": 10,
             "fontHeight": 22,
-            "lineHeight": 1.6666666666666667,
-            "fontFamily": "Inter"
+            "lineHeight": 1.6666666666666667
         },
         "Form": {
             "screenXSMax": 575,
@@ -1309,8 +1285,7 @@
             "labelColonMarginInlineEnd": 8,
             "labelColonMarginInlineStart": 2,
             "labelHeight": 28,
-            "labelFontSize": 12,
-            "fontFamily": "Inter"
+            "labelFontSize": 12
         },
         "Avatar": {
             "marginXXS": 4,
@@ -1333,8 +1308,7 @@
             "containerSize": 28,
             "textFontSize": 18,
             "textFontSizeLG": 20,
-            "textFontSizeSM": 12,
-            "fontFamily": "Inter"
+            "textFontSizeSM": 12
         },
         "Tour": {
             "sizePopupArrow": 16,
@@ -1360,7 +1334,7 @@
             "closeBtnSize": 22,
             "primaryNextBtnHoverBg": "#f0f0f0",
             "primaryPrevBtnBg": "rgba(255, 255, 255, 0.15)",
-            "fontFamily": "Inter",
+
             "lineWidthFocus": 4
         },
         "QRCode": {
@@ -1373,8 +1347,7 @@
             "borderRadiusLG": 10,
             "colorWhite": "#ffffff",
             "colorText": "#1c2c3d",
-            "colorSplit": "rgba(5, 23, 41, 0.06)",
-            "fontFamily": "Inter"
+            "colorSplit": "rgba(5, 23, 41, 0.06)"
         },
         "Upload": {
             "paddingXS": 8,
@@ -1407,8 +1380,7 @@
             "colorBgMask": "rgba(5, 23, 41, 0.45)",
             "fontHeight": 22,
             "fontHeightSM": 20,
-            "lineWidthFocus": 4,
-            "fontFamily": "Inter"
+            "lineWidthFocus": 4
         },
         "Typography": {
             "paddingSM": 12,
@@ -1439,8 +1411,7 @@
             "colorLink": "#1c2c3d",
             "colorErrorHover": "#de4040",
             "colorErrorActive": "#ab0d0d",
-            "colorError": "#d61010",
-            "fontFamilyCode": "Inter"
+            "colorError": "#d61010"
         },
         "TreeSelect": {
             "paddingXS": 8,
@@ -1470,8 +1441,7 @@
             "colorBgContainer": "#ffffff",
             "titleHeight": 24,
             "nodeSelectedBg": "#f5f7fa",
-            "nodeHoverBg": "rgba(5, 23, 41, 0.04)",
-            "fontFamily": "Inter"
+            "nodeHoverBg": "rgba(5, 23, 41, 0.04)"
         },
         "Tree": {
             "paddingSM": 12,
@@ -1505,8 +1475,7 @@
             "nodeSelectedBg": "#f5f7fa",
             "nodeHoverBg": "rgba(5, 23, 41, 0.04)",
             "directoryNodeSelectedColor": "#ffffff",
-            "directoryNodeSelectedBg": "#1c2c3d",
-            "fontFamily": "Inter"
+            "directoryNodeSelectedBg": "#1c2c3d"
         },
         "Tag": {
             "paddingXXS": 4,
@@ -1538,8 +1507,7 @@
             "colorErrorBorder": "#ef9f9f",
             "colorErrorBg": "#fbe7e7",
             "defaultColor": "#1c2c3d",
-            "defaultBg": "rgba(5, 23, 41, 0.02)",
-            "fontFamily": "Inter"
+            "defaultBg": "rgba(5, 23, 41, 0.02)"
         },
         "Statistic": {
             "padding": 16,
@@ -1551,8 +1519,7 @@
             "colorTextDescription": "#758391",
             "colorText": "#1c2c3d",
             "contentFontSize": 20,
-            "titleFontSize": 12,
-            "fontFamily": "Inter"
+            "titleFontSize": 12
         },
         "Skeleton": {
             "padding": 16,
@@ -1626,8 +1593,7 @@
             "multipleItemBg": "rgba(5, 23, 41, 0.06)",
             "clearBg": "#ffffff",
             "optionFontSize": 12,
-            "optionSelectedFontWeight": 600,
-            "fontFamily": "Inter"
+            "optionSelectedFontWeight": 600
         },
         "Result": {
             "paddingXS": 8,
@@ -1664,8 +1630,7 @@
             "colorBgContainer": "#ffffff",
             "remainingColor": "rgba(5, 23, 41, 0.06)",
             "defaultColor": "#1c2c3d",
-            "circleTextColor": "#1c2c3d",
-            "fontFamily": "Inter"
+            "circleTextColor": "#1c2c3d"
         },
         "Popconfirm": {
             "marginXXS": 4,
@@ -1725,8 +1690,7 @@
             "itemBg": "#ffffff",
             "itemActiveColorDisabled": "#bdc7d1",
             "itemActiveBgDisabled": "rgba(5, 23, 41, 0.15)",
-            "itemActiveBg": "#ffffff",
-            "fontFamily": "Inter"
+            "itemActiveBg": "#ffffff"
         },
         "Modal": {
             "screenSMMax": 767,
@@ -1763,8 +1727,7 @@
             "contentBg": "#ffffff",
             "titleFontSize": 16,
             "titleLineHeight": 1.25,
-            "fontHeight": 22,
-            "fontFamily": "Inter"
+            "fontHeight": 22
         },
         "Message": {
             "paddingXS": 8,
@@ -1780,8 +1743,7 @@
             "colorSuccess": "#389e0d",
             "colorInfo": "#1c2c3d",
             "colorError": "#d61010",
-            "contentBg": "#ffffff",
-            "fontFamily": "Inter"
+            "contentBg": "#ffffff"
         },
         "List": {
             "screenSM": 576,
@@ -1818,8 +1780,7 @@
             "headerBg": "rgba(0, 0, 0, 0)",
             "footerBg": "rgba(0, 0, 0, 0)",
             "avatarMarginRight": 16,
-            "descriptionFontSize": 12,
-            "fontFamily": "Inter"
+            "descriptionFontSize": 12
         },
         "FloatButton": {
             "paddingXXS": 4,
@@ -1842,8 +1803,7 @@
             "colorPrimaryHover": "#394857",
             "colorPrimary": "#1c2c3d",
             "colorFillContent": "rgba(5, 23, 41, 0.06)",
-            "colorBgElevated": "#ffffff",
-            "fontFamily": "Inter"
+            "colorBgElevated": "#ffffff"
         },
         "Empty": {
             "colorTextDisabled": "#bdc7d1",
@@ -1879,8 +1839,7 @@
             "colorPrimary": "#1c2c3d",
             "colorError": "#d61010",
             "colorBgElevated": "#ffffff",
-            "paddingBlock": 5,
-            "fontFamily": "Inter"
+            "paddingBlock": 5
         },
         "Drawer": {
             "paddingXS": 8,
@@ -1909,8 +1868,7 @@
             "colorTextHeading": "#1c2c3d",
             "colorText": "#1c2c3d",
             "colorSplit": "rgba(5, 23, 41, 0.06)",
-            "lineHeight": 1.6666666666666667,
-            "fontFamily": "Inter"
+            "lineHeight": 1.6666666666666667
         },
         "Descriptions": {
             "paddingXS": 8,
@@ -1935,8 +1893,7 @@
             "colorFillAlter": "rgba(5, 23, 41, 0.02)",
             "titleColor": "#1c2c3d",
             "labelBg": "rgba(5, 23, 41, 0.02)",
-            "contentColor": "#1c2c3d",
-            "fontFamily": "Inter"
+            "contentColor": "#1c2c3d"
         },
         "DatePicker": {
             "sizePopupArrow": 16,
@@ -2036,8 +1993,7 @@
             "inputFontSizeLG": 14,
             "inputFontSizeSM": 10,
             "fontHeight": 22,
-            "fontHeightLG": 24,
-            "fontFamily": "Inter"
+            "fontHeightLG": 24
         },
         "Collapse": {
             "paddingXXS": 4,
@@ -2062,8 +2018,7 @@
             "contentBg": "#ffffff",
             "fontHeight": 22,
             "fontHeightLG": 24,
-            "lineHeightLG": 1.5714285714285714,
-            "fontFamily": "Inter"
+            "lineHeightLG": 1.5714285714285714
         },
         "Checkbox": {
             "paddingXS": 8,
@@ -2084,8 +2039,7 @@
             "colorPrimary": "#1c2c3d",
             "colorBorder": "#bdc7d1",
             "colorBgContainerDisabled": "rgba(5, 23, 41, 0.04)",
-            "colorBgContainer": "#ffffff",
-            "fontFamily": "Inter"
+            "colorBgContainer": "#ffffff"
         },
         "Breadcrumb": {
             "paddingXXS": 4,
@@ -2105,8 +2059,7 @@
             "itemColor": "#758391",
             "separatorMargin": 8,
             "iconFontSize": 12,
-            "fontHeight": 22,
-            "fontFamily": "Inter"
+            "fontHeight": 22
         },
         "Anchor": {
             "paddingXXS": 4,
@@ -2118,8 +2071,7 @@
             "colorSplit": "rgba(5, 23, 41, 0.06)",
             "colorPrimary": "#1c2c3d",
             "linkPaddingInlineStart": 16,
-            "linkPaddingBlock": 4,
-            "fontFamily": "Inter"
+            "linkPaddingBlock": 4
         },
         "Alert": {
             "paddingMD": 20,
@@ -2150,8 +2102,7 @@
             "colorErrorBorder": "#ef9f9f",
             "colorErrorBg": "#fbe7e7",
             "colorError": "#d61010",
-            "withDescriptionIconSize": 20,
-            "fontFamily": "Inter"
+            "withDescriptionIconSize": 20
         },
         "Space": {
             "paddingXS": 8,
@@ -2202,7 +2153,7 @@
             "optionActiveBg": "rgba(0, 0, 0, 0.04)",
             "optionHeight": 32,
             "optionFontSize": 12,
-            "fontFamily": "Inter",
+
             "fontSize": 12,
             "fontSizeIcon": 12,
             "fontSizeLG": 14,
@@ -2229,7 +2180,6 @@
             "fontSizeXL": 18
         },
         "Mentions": {
-            "fontFamily": "Inter",
             "fontSize": 12,
             "lineHeight": 1.6666666666666667
         }

From 2402f94b120e0408ced3993ebbb2afea4be91132 Mon Sep 17 00:00:00 2001
From: Juan Pablo Vega <jp@agenta.ai>
Date: Fri, 23 Aug 2024 16:03:54 +0200
Subject: [PATCH 10/10] fix exception message and bump SDK out of pre-release

---
 agenta-backend/agenta_backend/services/evaluators_service.py | 2 +-
 agenta-cli/pyproject.toml                                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index 0f70a32a5..ce40e091c 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -77,7 +77,7 @@ def validate_json_output(
             json.loads(output)
         except json.JSONDecodeError:
             raise Exception(
-                f"Evaluator {evaluator_key} requires the output to be a JSON string."
+                f"Evaluator {evaluator_key} requires the output to be a JSON string or object."
             )
 
     if not isinstance(
diff --git a/agenta-cli/pyproject.toml b/agenta-cli/pyproject.toml
index 89acb8a67..6c0546dac 100644
--- a/agenta-cli/pyproject.toml
+++ b/agenta-cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.24.1a0"
+version = "0.24.1"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = ["Mahmoud Mabrouk <mahmoud@agenta.ai>"]