diff --git a/README.md b/README.md
index 2763e5c22..592eb69be 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,4 @@
-  <div align="center" style="margin: 30px">
-  <a href="https://docs.agenta.ai/self-host/migration/migration-to-postgres">
-    Important: We are migrating from MongoDB to PostgreSQL in v0.19! Follow this guide to migrate your data.
-  </a>
-  </div>
-  </br>
-  </br>
-
 <div align="center" style="margin: 30px">
-  <div>
-    <a href="https://agentaai.notion.site/Careers-at-agenta-fb8a76d4d9044e27bd3a32678818dbc8/">
-      <img src="https://github.com/Agenta-AI/agenta/raw/main/.github/images/we-are-hiring.svg" alt="We are hiring! Join our team!" width="350" height="60" alt="We are hiring! Join our team!" width="350" height="60">
-    </a> 
-  </div>
-
   <a href="https://agenta.ai/">
     <div  align="center" >
       <picture >
@@ -76,12 +62,9 @@
   <br />
       <div  align="center" >
       <picture >
-        <source width="800" media="(prefers-color-scheme: dark)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/cf6d4713-4558-4c6c-9e1b-ee4eab261f4c"  >
-        <source width="800" media="(prefers-color-scheme: light)" srcset="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a"  >
-        <img alt="Mockup agenta" src="https://github.com/Agenta-AI/agenta/assets/4510758/ae9cf11f-8ef9-4b67-98c7-4a40341fa87a" >
+        <img alt="Glamour Shot" src="https://github.com/user-attachments/assets/32e95ddb-e001-4462-b92e-72bf4cc78597" >
       </picture>
     </div>
-
 </div>
 <br />
 <br />
diff --git a/agenta-backend/agenta_backend/migrations/postgres/data_migrations/applications.py b/agenta-backend/agenta_backend/migrations/postgres/data_migrations/applications.py
index d0e76a8f8..56a842c47 100644
--- a/agenta-backend/agenta_backend/migrations/postgres/data_migrations/applications.py
+++ b/agenta-backend/agenta_backend/migrations/postgres/data_migrations/applications.py
@@ -6,7 +6,7 @@
 
 import click
 from sqlalchemy.future import select
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, delete
 from sqlalchemy.orm import sessionmaker, Session
 
 from agenta_backend.models.deprecated_models import (
@@ -34,6 +34,7 @@ def update_evaluators_with_app_name():
                 records = (
                     session.execute(
                         select(DeprecatedEvaluatorConfigDB)
+                        .filter(DeprecatedEvaluatorConfigDB.app_id.isnot(None))
                         .offset(offset)
                         .limit(BATCH_SIZE)
                     )
@@ -54,6 +55,13 @@ def update_evaluators_with_app_name():
                 session.commit()
                 offset += BATCH_SIZE
 
+            # Delete deprecated evaluator configs with app_id as None
+            session.execute(
+                delete(DeprecatedEvaluatorConfigDB).where(
+                    DeprecatedEvaluatorConfigDB.app_id.is_(None)
+                )
+            )
+            session.commit()
         except Exception as e:
             session.rollback()
             click.echo(
diff --git a/agenta-backend/agenta_backend/migrations/postgres/versions/5c29a64204f4_added_modified_by_id_column_to_apps_db.py b/agenta-backend/agenta_backend/migrations/postgres/versions/5c29a64204f4_added_modified_by_id_column_to_apps_db.py
index e8a444b10..cd3ea9cb2 100644
--- a/agenta-backend/agenta_backend/migrations/postgres/versions/5c29a64204f4_added_modified_by_id_column_to_apps_db.py
+++ b/agenta-backend/agenta_backend/migrations/postgres/versions/5c29a64204f4_added_modified_by_id_column_to_apps_db.py
@@ -5,6 +5,8 @@
 Create Date: 2024-08-25 17:56:11.732929
 
 """
+
+import os
 from typing import Sequence, Union
 
 from alembic import op
@@ -18,16 +20,32 @@
 depends_on: Union[str, Sequence[str], None] = None
 
 
+def unique_constraint_exists(engine: sa.Engine, table_name: str, constraint_name: str):
+    with engine.connect() as conn:
+        result = conn.execute(
+            sa.text(
+                f"""
+        SELECT conname FROM pg_constraint
+        WHERE conname = '{constraint_name}' AND conrelid = '{table_name}'::regclass;
+        """
+            )
+        )
+        return result.fetchone() is not None
+
+
 def upgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
     op.add_column("app_db", sa.Column("modified_by_id", sa.UUID(), nullable=True))
-    op.create_foreign_key(None, "app_db", "users", ["modified_by_id"], ["id"])
+    op.create_foreign_key(
+        "app_db_modified_by_id_fkey", "app_db", "users", ["modified_by_id"], ["id"]
+    )
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_constraint(None, "app_db", type_="foreignkey")
-    op.drop_constraint(None, "app_db", type_="unique")
+    engine = sa.create_engine(os.getenv("POSTGRES_URI"))  # type: ignore
+    if not unique_constraint_exists(engine, "app_db", "app_db_modified_by_id_fkey"):
+        op.drop_constraint("app_db_modified_by_id_fkey", "app_db", type_="foreignkey")
     op.drop_column("app_db", "modified_by_id")
     # ### end Alembic commands ###
diff --git a/agenta-backend/agenta_backend/migrations/postgres/versions/b80c708c21bb_initial_migration.py b/agenta-backend/agenta_backend/migrations/postgres/versions/b80c708c21bb_initial_migration.py
index f69ed47b8..6bc66e751 100644
--- a/agenta-backend/agenta_backend/migrations/postgres/versions/b80c708c21bb_initial_migration.py
+++ b/agenta-backend/agenta_backend/migrations/postgres/versions/b80c708c21bb_initial_migration.py
@@ -5,6 +5,8 @@
 Create Date: 2024-07-11 13:20:31.705894
 
 """
+
+import os
 from typing import Sequence, Union
 
 from alembic import op
@@ -23,6 +25,19 @@
 depends_on: Union[str, Sequence[str], None] = None
 
 
+def unique_constraint_exists(engine: sa.Engine, table_name: str, constraint_name: str):
+    with engine.connect() as conn:
+        result = conn.execute(
+            sa.text(
+                f"""
+        SELECT conname FROM pg_constraint
+        WHERE conname = '{constraint_name}' AND conrelid = '{table_name}'::regclass;
+        """
+            )
+        )
+        return result.fetchone() is not None
+
+
 def first_time_user_from_agenta_v019_upwards_upgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
     op.create_table(
@@ -475,26 +490,104 @@ def first_time_user_from_agenta_v019_upwards_upgrade() -> None:
 
 def returning_user_from_agenta_v018_downwards_upgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
-    op.create_unique_constraint(None, "app_db", ["id"])
-    op.create_unique_constraint(None, "app_variant_revisions", ["id"])
-    op.create_unique_constraint(None, "app_variants", ["id"])
-    op.create_unique_constraint(None, "bases", ["id"])
-    op.create_unique_constraint(None, "deployments", ["id"])
-    op.create_unique_constraint(None, "docker_images", ["id"])
-    op.create_unique_constraint(None, "environments", ["id"])
-    op.create_unique_constraint(None, "environments_revisions", ["id"])
-    op.create_unique_constraint(None, "evaluation_aggregated_results", ["id"])
-    op.create_unique_constraint(None, "evaluation_scenario_results", ["id"])
-    op.create_unique_constraint(None, "evaluation_scenarios", ["id"])
-    op.create_unique_constraint(None, "evaluations", ["id"])
-    op.create_unique_constraint(None, "evaluators_configs", ["id"])
-    op.create_unique_constraint(None, "human_evaluation_variants", ["id"])
-    op.create_unique_constraint(None, "human_evaluations", ["id"])
-    op.create_unique_constraint(None, "human_evaluations_scenarios", ["id"])
-    op.create_unique_constraint(None, "ids_mapping", ["id"])
-    op.create_unique_constraint(None, "templates", ["id"])
-    op.create_unique_constraint(None, "testsets", ["id"])
-    op.create_unique_constraint(None, "users", ["id"])
+    engine = sa.create_engine(os.getenv("POSTGRES_URI"))  # type: ignore
+    if not unique_constraint_exists(engine, "app_db", "app_db_pkey"):
+        op.create_unique_constraint("app_db_pkey", "app_db", ["id"])
+
+    if not unique_constraint_exists(
+        engine, "app_variant_revisions", "app_variant_revisions_pkey"
+    ):
+        op.create_unique_constraint(
+            "app_variant_revisions_pkey", "app_variant_revisions", ["id"]
+        )
+
+    if not unique_constraint_exists(engine, "app_variants", "app_variants_pkey"):
+        op.create_unique_constraint("app_variants_pkey", "app_variants", ["id"])
+
+    if not unique_constraint_exists(engine, "bases", "bases_pkey"):
+        op.create_unique_constraint("bases_pkey", "bases", ["id"])
+
+    if not unique_constraint_exists(engine, "deployments", "deployments_pkey"):
+        op.create_unique_constraint("deployments_pkey", "deployments", ["id"])
+
+    if not unique_constraint_exists(engine, "docker_images", "docker_images_pkey"):
+        op.create_unique_constraint("docker_images_pkey", "docker_images", ["id"])
+
+    if not unique_constraint_exists(engine, "environments", "environments_pkey"):
+        op.create_unique_constraint("environments_pkey", "environments", ["id"])
+
+    if not unique_constraint_exists(
+        engine, "environments_revisions", "environments_revisions_pkey"
+    ):
+        op.create_unique_constraint(
+            "environments_revisions_pkey", "environments_revisions", ["id"]
+        )
+
+    if not unique_constraint_exists(
+        engine, "evaluation_aggregated_results", "evaluation_aggregated_results_pkey"
+    ):
+        op.create_unique_constraint(
+            "evaluation_aggregated_results_pkey",
+            "evaluation_aggregated_results",
+            ["id"],
+        )
+
+    if not unique_constraint_exists(
+        engine, "evaluation_scenario_results", "evaluation_scenario_results_pkey"
+    ):
+        op.create_unique_constraint(
+            "evaluation_scenario_results_pkey", "evaluation_scenario_results", ["id"]
+        )
+
+    if not unique_constraint_exists(
+        engine, "evaluation_scenarios", "evaluation_scenarios_pkey"
+    ):
+        op.create_unique_constraint(
+            "evaluation_scenarios_pkey", "evaluation_scenarios", ["id"]
+        )
+
+    if not unique_constraint_exists(engine, "evaluations", "evaluations_pkey"):
+        op.create_unique_constraint("evaluations_pkey", "evaluations", ["id"])
+
+    if not unique_constraint_exists(
+        engine, "evaluators_configs", "evaluators_configs_pkey"
+    ):
+        op.create_unique_constraint(
+            "evaluators_configs_pkey", "evaluators_configs", ["id"]
+        )
+
+    if not unique_constraint_exists(
+        engine, "human_evaluation_variants", "human_evaluation_variants_pkey"
+    ):
+        op.create_unique_constraint(
+            "human_evaluation_variants_pkey", "human_evaluation_variants", ["id"]
+        )
+
+    if not unique_constraint_exists(
+        engine, "human_evaluations", "human_evaluations_pkey"
+    ):
+        op.create_unique_constraint(
+            "human_evaluations_pkey", "human_evaluations", ["id"]
+        )
+
+    if not unique_constraint_exists(
+        engine, "human_evaluations_scenarios", "human_evaluations_scenarios_pkey"
+    ):
+        op.create_unique_constraint(
+            "human_evaluations_scenarios_pkey", "human_evaluations_scenarios", ["id"]
+        )
+
+    if not unique_constraint_exists(engine, "ids_mapping", "ids_mapping_pkey"):
+        op.create_unique_constraint("ids_mapping_pkey", "ids_mapping", ["id"])
+
+    if not unique_constraint_exists(engine, "templates", "templates_pkey"):
+        op.create_unique_constraint("templates_pkey", "templates", ["id"])
+
+    if not unique_constraint_exists(engine, "testsets", "testsets_pkey"):
+        op.create_unique_constraint("testsets_pkey", "testsets", ["id"])
+
+    if not unique_constraint_exists(engine, "users", "users_pkey"):
+        op.create_unique_constraint("users_pkey", "users", ["id"])
     # ### end Alembic commands ###
 
 
@@ -528,26 +621,46 @@ def first_time_user_from_agenta_v019_upwards_downgrade() -> None:
 
 def returning_user_from_agenta_v018_downwards_downgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_constraint(None, "users", type_="unique")
-    op.drop_constraint(None, "testsets", type_="unique")
-    op.drop_constraint(None, "templates", type_="unique")
-    op.drop_constraint(None, "ids_mapping", type_="unique")
-    op.drop_constraint(None, "human_evaluations_scenarios", type_="unique")
-    op.drop_constraint(None, "human_evaluations", type_="unique")
-    op.drop_constraint(None, "human_evaluation_variants", type_="unique")
-    op.drop_constraint(None, "evaluators_configs", type_="unique")
-    op.drop_constraint(None, "evaluations", type_="unique")
-    op.drop_constraint(None, "evaluation_scenarios", type_="unique")
-    op.drop_constraint(None, "evaluation_scenario_results", type_="unique")
-    op.drop_constraint(None, "evaluation_aggregated_results", type_="unique")
-    op.drop_constraint(None, "environments_revisions", type_="unique")
-    op.drop_constraint(None, "environments", type_="unique")
-    op.drop_constraint(None, "docker_images", type_="unique")
-    op.drop_constraint(None, "deployments", type_="unique")
-    op.drop_constraint(None, "bases", type_="unique")
-    op.drop_constraint(None, "app_variants", type_="unique")
-    op.drop_constraint(None, "app_variant_revisions", type_="unique")
-    op.drop_constraint(None, "app_db", type_="unique")
+    op.drop_constraint("users_pkey", "users", type_="unique")
+    op.drop_constraint("testsets_pkey", "testsets", type_="unique")
+    op.drop_constraint("templates_pkey", "templates", type_="unique")
+    op.drop_constraint("ids_mapping_pkey", "ids_mapping", type_="unique")
+    op.drop_constraint(
+        "human_evaluations_scenarios_pkey",
+        "human_evaluations_scenarios",
+        type_="unique",
+    )
+    op.drop_constraint("human_evaluations_pkey", "human_evaluations", type_="unique")
+    op.drop_constraint(
+        "human_evaluation_variants_pkey", "human_evaluation_variants", type_="unique"
+    )
+    op.drop_constraint("evaluators_configs_pkey", "evaluators_configs", type_="unique")
+    op.drop_constraint("evaluations_pkey", "evaluations", type_="unique")
+    op.drop_constraint(
+        "evaluation_scenarios_pkey", "evaluation_scenarios", type_="unique"
+    )
+    op.drop_constraint(
+        "evaluation_scenario_results_pkey",
+        "evaluation_scenario_results",
+        type_="unique",
+    )
+    op.drop_constraint(
+        "evaluation_aggregated_results_pkey",
+        "evaluation_aggregated_results",
+        type_="unique",
+    )
+    op.drop_constraint(
+        "environments_revisions_pkey", "environments_revisions", type_="unique"
+    )
+    op.drop_constraint("environments_pkey", "environments", type_="unique")
+    op.drop_constraint("docker_images_pkey", "docker_images", type_="unique")
+    op.drop_constraint("deployments_pkey", "deployments", type_="unique")
+    op.drop_constraint("bases_key", "bases", type_="unique")
+    op.drop_constraint("app_variants_pkey", "app_variants", type_="unique")
+    op.drop_constraint(
+        "app_variant_revisions_pkey", "app_variant_revisions", type_="unique"
+    )
+    op.drop_constraint("app_db_pkey", "app_db", type_="unique")
     # ### end Alembic commands ###
 
 
diff --git a/agenta-backend/agenta_backend/models/api/evaluation_model.py b/agenta-backend/agenta_backend/models/api/evaluation_model.py
index f79ebba16..0f2b1b364 100644
--- a/agenta-backend/agenta_backend/models/api/evaluation_model.py
+++ b/agenta-backend/agenta_backend/models/api/evaluation_model.py
@@ -1,7 +1,9 @@
 from enum import Enum
 from datetime import datetime
-from pydantic import BaseModel
 from typing import Optional, List, Dict, Any, Union
+
+from pydantic import BaseModel, Field, model_validator
+
 from agenta_backend.models.api.api_models import Result
 
 
@@ -12,6 +14,8 @@ class Evaluator(BaseModel):
     settings_template: dict
     description: Optional[str] = None
     oss: Optional[bool] = False
+    requires_llm_api_keys: Optional[bool] = False
+    tags: List[str]
 
 
 class EvaluatorConfig(BaseModel):
@@ -80,6 +84,25 @@ class Evaluation(BaseModel):
     updated_at: datetime
 
 
+class EvaluatorInputInterface(BaseModel):
+    inputs: Dict[str, Any] = Field(default_factory=dict)
+    settings: Optional[Dict[str, Any]] = None
+    credentials: Optional[Dict[str, Any]] = None
+
+
+class EvaluatorOutputInterface(BaseModel):
+    outputs: Dict[str, Any]
+
+
+class EvaluatorMappingInputInterface(BaseModel):
+    inputs: Dict[str, Any]
+    mapping: Dict[str, Any]
+
+
+class EvaluatorMappingOutputInterface(BaseModel):
+    outputs: Dict[str, Any]
+
+
 class SimpleEvaluationOutput(BaseModel):
     id: str
     variant_ids: List[str]
diff --git a/agenta-backend/agenta_backend/models/api/testset_model.py b/agenta-backend/agenta_backend/models/api/testset_model.py
index a5c82b230..02ccbe62b 100644
--- a/agenta-backend/agenta_backend/models/api/testset_model.py
+++ b/agenta-backend/agenta_backend/models/api/testset_model.py
@@ -48,6 +48,7 @@ class TestSetOutputResponse(BaseModel):
     id: str = Field(..., alias="_id")
     name: str
     created_at: str
+    updated_at: str
 
     class Config:
         allow_population_by_field_name = True
diff --git a/agenta-backend/agenta_backend/models/converters.py b/agenta-backend/agenta_backend/models/converters.py
index 180d3bbd2..ce2ca870b 100644
--- a/agenta-backend/agenta_backend/models/converters.py
+++ b/agenta-backend/agenta_backend/models/converters.py
@@ -413,6 +413,7 @@ async def environment_db_and_revision_to_extended_output(
     environment_output_extended = EnvironmentOutputExtended(
         name=environment_db.name,
         app_id=str(environment_db.app_id),
+        project_id=str(environment_db.project_id),
         deployed_app_variant_id=deployed_app_variant_id,
         deployed_variant_name=deployed_variant_name,
         deployed_app_variant_revision_id=str(
diff --git a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
index 2ecf7b6f7..23393a2e7 100644
--- a/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
+++ b/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
@@ -41,6 +41,7 @@
         },
         "description": "Exact Match evaluator determines if the output exactly matches the specified correct answer, ensuring precise alignment with expected results.",
         "oss": True,
+        "tags": ["functional"],
     },
     {
         "name": "Contains JSON",
@@ -49,6 +50,7 @@
         "settings_template": {},
         "description": "'Contains JSON' evaluator checks if the output contains the a valid JSON.",
         "oss": True,
+        "tags": ["functional", "classifiers"],
     },
     {
         "name": "Similarity Match",
@@ -75,11 +77,13 @@
         },
         "description": "Similarity Match evaluator checks if the generated answer is similar to the expected answer. You need to provide the similarity threshold. It uses the Jaccard similarity to compare the answers.",
         "oss": True,
+        "tags": ["similarity", "functional"],
     },
     {
         "name": "Semantic Similarity Match",
         "key": "auto_semantic_similarity",
         "direct_use": False,
+        "requires_llm_api_keys": True,
         "description": "Semantic Similarity Match evaluator measures the similarity between two pieces of text by analyzing their meaning and context. It compares the semantic content, providing a score that reflects how closely the texts match in terms of meaning, rather than just exact word matches.",
         "settings_template": {
             "correct_answer_key": {
@@ -92,6 +96,7 @@
             },
         },
         "oss": True,
+        "tags": ["similarity", "ai_llm"],
     },
     {
         "name": "Regex Test",
@@ -114,6 +119,7 @@
             },
         },
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "JSON Field Match",
@@ -138,6 +144,7 @@
         },
         "description": "JSON Field Match evaluator compares specific fields within JSON (JavaScript Object Notation) data. This matching can involve finding similarities or correspondences between fields in different JSON objects.",
         "oss": True,
+        "tags": ["functional"],
     },
     {
         "name": "JSON Diff Match",
@@ -176,11 +183,13 @@
             },
         },
         "oss": True,
+        "tags": ["similarity", "functional"],
     },
     {
         "name": "LLM-as-a-judge",
         "key": "auto_ai_critique",
         "direct_use": False,
+        "requires_llm_api_keys": True,
         "settings_template": {
             "prompt_template": {
                 "label": "Prompt Template",
@@ -200,16 +209,25 @@
         },
         "description": "AI Critique evaluator sends the generated answer and the correct_answer to an LLM model and uses it to evaluate the correctness of the answer. You need to provide the evaluation prompt (or use the default prompt).",
         "oss": True,
+        "tags": ["ai_llm", "functional"],
     },
     {
         "name": "Code Evaluation",
         "key": "auto_custom_code_run",
         "direct_use": False,
         "settings_template": {
+            "requires_llm_api_keys": {
+                "label": "Requires LLM API Key(s)",
+                "type": "boolean",
+                "required": True,
+                "default": False,
+                "advanced": True,
+                "description": "Indicates whether the evaluation requires LLM API key(s) to function.",
+            },
             "code": {
                 "label": "Evaluation Code",
                 "type": "code",
-                "default": "from typing import Dict\n\ndef evaluate(\n    app_params: Dict[str, str],\n    inputs: Dict[str, str],\n    output: Union[str, Dict[str, Any]], # output of the llm app\n    datapoint: Dict[str, str] # contains the testset row \n) -> float:\n    if output in datapoint.get('correct_answer', None):\n        return 1.0\n    else:\n        return 0.0\n",
+                "default": "from typing import Dict, Union, Any\n\ndef evaluate(\n    app_params: Dict[str, str],\n    inputs: Dict[str, str],\n    output: Union[str, Dict[str, Any]], # output of the llm app\n    correct_answer: str # contains the testset row \n) -> float:\n    if output in correct_answer:\n        return 1.0\n    else:\n        return 0.0\n",
                 "description": "Code for evaluating submissions",
                 "required": True,
             },
@@ -224,12 +242,21 @@
         },
         "description": "Code Evaluation allows you to write your own evaluator in Python. You need to provide the Python code for the evaluator.",
         "oss": True,
+        "tags": ["functional"],
     },
     {
         "name": "Webhook test",
         "key": "auto_webhook_test",
         "direct_use": False,
         "settings_template": {
+            "requires_llm_api_keys": {
+                "label": "Requires LLM API Key(s)",
+                "type": "boolean",
+                "required": True,
+                "default": False,
+                "advanced": True,
+                "description": "Indicates whether the evaluation requires LLM API key(s) to function.",
+            },
             "webhook_url": {
                 "label": "Webhook URL",
                 "type": "string",
@@ -247,6 +274,7 @@
         },
         "description": "Webhook test evaluator sends the generated answer and the correct_answer to a webhook and expects a response, in JSON format, indicating the correctness of the answer, along with a 200 HTTP status. You need to provide the URL of the webhook and the response of the webhook must be between 0 and 1.",
         "oss": True,
+        "tags": ["functional"],
     },
     {
         "name": "Starts With",
@@ -268,6 +296,7 @@
         },
         "description": "Starts With evaluator checks if the output starts with a specified prefix, considering case sensitivity based on the settings.",
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "Ends With",
@@ -289,6 +318,7 @@
         },
         "description": "Ends With evaluator checks if the output ends with a specified suffix, considering case sensitivity based on the settings.",
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "Contains",
@@ -310,6 +340,7 @@
         },
         "description": "Contains evaluator checks if the output contains a specified substring, considering case sensitivity based on the settings.",
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "Contains Any",
@@ -331,6 +362,7 @@
         },
         "description": "Contains Any evaluator checks if the output contains any of the specified substrings from a comma-separated list, considering case sensitivity based on the settings.",
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "Contains All",
@@ -352,6 +384,7 @@
         },
         "description": "Contains All evaluator checks if the output contains all of the specified substrings from a comma-separated list, considering case sensitivity based on the settings.",
         "oss": True,
+        "tags": ["classifiers", "functional"],
     },
     {
         "name": "Levenshtein Distance",
@@ -375,20 +408,25 @@
         },
         "description": "This evaluator calculates the Levenshtein distance between the output and the correct answer. If a threshold is provided in the settings, it returns a boolean indicating whether the distance is within the threshold. If no threshold is provided, it returns the actual Levenshtein distance as a numerical value.",
         "oss": True,
+        "tags": ["functional"],
     },
     {
         "name": "RAG Faithfulness",
         "key": "rag_faithfulness",
         "direct_use": False,
+        "requires_llm_api_keys": True,
         "settings_template": rag_evaluator_settings_template,
         "description": "RAG Faithfulness evaluator assesses the accuracy and reliability of responses generated by Retrieval-Augmented Generation (RAG) models. It evaluates how faithfully the responses adhere to the retrieved documents or sources, ensuring that the generated text accurately reflects the information from the original sources.",
+        "tags": ["rag"],
     },
     {
         "name": "RAG Context Relevancy",
         "key": "rag_context_relevancy",
         "direct_use": False,
+        "requires_llm_api_keys": True,
         "settings_template": rag_evaluator_settings_template,
         "description": "RAG Context Relevancy evaluator measures how relevant the retrieved documents or contexts are to the given question or prompt. It ensures that the selected documents provide the necessary information for generating accurate and meaningful responses, improving the overall quality of the RAG model's output.",
+        "tags": ["rag"],
     },
 ]
 
diff --git a/agenta-backend/agenta_backend/routers/app_router.py b/agenta-backend/agenta_backend/routers/app_router.py
index d2f2e2fc4..ae89ecae0 100644
--- a/agenta-backend/agenta_backend/routers/app_router.py
+++ b/agenta-backend/agenta_backend/routers/app_router.py
@@ -550,7 +550,7 @@ async def create_app_and_variant_from_template(
         )
         app_variant_db = await app_manager.add_variant_based_on_image(
             app=app,
-            project_id=request.state.project_id,
+            project_id=str(app.project_id),
             variant_name="app.default",
             docker_id_or_template_uri=(  # type: ignore
                 template_db.template_uri if isCloudProd() else template_db.digest
@@ -570,7 +570,7 @@ async def create_app_and_variant_from_template(
         await db_manager.add_testset_to_app_variant(
             template_name=template_db.name,  # type: ignore
             app_name=app.app_name,  # type: ignore
-            project_id=request.state.project_id,
+            project_id=str(app.project_id),
         )
 
         logger.debug(
@@ -579,7 +579,7 @@ async def create_app_and_variant_from_template(
             else "Step 6: We create ready-to use evaluators"
         )
         await evaluator_manager.create_ready_to_use_evaluators(
-            app_name=app.app_name, project_id=request.state.project_id
+            app_name=app.app_name, project_id=str(app.project_id)
         )
 
         logger.debug(
@@ -619,7 +619,7 @@ async def create_app_and_variant_from_template(
             envvars = {} if payload.env_vars is None else payload.env_vars
         await app_manager.start_variant(
             app_variant_db,
-            request.state.project_id,
+            str(app.project_id),
             envvars,
             user_uid=request.state.user_id,
         )
diff --git a/agenta-backend/agenta_backend/routers/evaluation_router.py b/agenta-backend/agenta_backend/routers/evaluation_router.py
index 479bf68e1..d3cd29818 100644
--- a/agenta-backend/agenta_backend/routers/evaluation_router.py
+++ b/agenta-backend/agenta_backend/routers/evaluation_router.py
@@ -5,6 +5,7 @@
 from fastapi.responses import JSONResponse
 from fastapi import HTTPException, Request, status, Response, Query
 
+from agenta_backend.services import helpers
 from agenta_backend.models import converters
 
 from agenta_backend.tasks.evaluations import evaluate
@@ -15,9 +16,6 @@
     NewEvaluation,
     DeleteEvaluation,
 )
-from agenta_backend.services.evaluator_manager import (
-    check_ai_critique_inputs,
-)
 from agenta_backend.services import evaluation_service, db_manager, app_manager
 
 if isCloudEE():
@@ -113,8 +111,9 @@ async def create_evaluation(
                     status_code=403,
                 )
 
-        success, response = await check_ai_critique_inputs(
-            payload.evaluators_configs, payload.lm_providers_keys
+        llm_provider_keys = helpers.format_llm_provider_keys(payload.lm_providers_keys)
+        success, response = await helpers.ensure_required_llm_keys_exist(
+            payload.evaluators_configs, llm_provider_keys
         )
         if not success:
             return response
@@ -136,8 +135,8 @@ async def create_evaluation(
                 evaluators_config_ids=payload.evaluators_configs,
                 testset_id=payload.testset_id,
                 evaluation_id=evaluation.id,
-                rate_limit_config=payload.rate_limit.dict(),
-                lm_providers_keys=payload.lm_providers_keys,
+                rate_limit_config=payload.rate_limit.model_dump(),
+                lm_providers_keys=llm_provider_keys,
             )
             evaluations.append(evaluation)
 
@@ -435,9 +434,8 @@ async def delete_evaluations(
         )
         logger.debug("Successfully updated last_modified_by app information")
 
-        await evaluation_service.delete_evaluations(
-            payload.evaluations_ids, str(evaluation.project_id)
-        )
+        logger.debug(f"Deleting evaluations {payload.evaluations_ids}...")
+        await evaluation_service.delete_evaluations(payload.evaluations_ids)
         return Response(status_code=status.HTTP_204_NO_CONTENT)
     except Exception as exc:
         raise HTTPException(status_code=500, detail=str(exc))
diff --git a/agenta-backend/agenta_backend/routers/evaluators_router.py b/agenta-backend/agenta_backend/routers/evaluators_router.py
index 7729c97ab..5ad1a9bf4 100644
--- a/agenta-backend/agenta_backend/routers/evaluators_router.py
+++ b/agenta-backend/agenta_backend/routers/evaluators_router.py
@@ -1,4 +1,5 @@
 import logging
+import traceback
 
 from typing import List, Optional
 from fastapi import HTTPException, Request
@@ -6,13 +7,22 @@
 
 
 from agenta_backend.utils.common import APIRouter, isCloudEE
-from agenta_backend.services import evaluator_manager, db_manager, app_manager
+from agenta_backend.services import (
+    evaluator_manager,
+    db_manager,
+    evaluators_service,
+    app_manager,
+)
 
 from agenta_backend.models.api.evaluation_model import (
     Evaluator,
     EvaluatorConfig,
     NewEvaluatorConfig,
     UpdateEvaluatorConfig,
+    EvaluatorInputInterface,
+    EvaluatorOutputInterface,
+    EvaluatorMappingInputInterface,
+    EvaluatorMappingOutputInterface,
 )
 
 if isCloudEE():
@@ -48,6 +58,63 @@ async def get_evaluators_endpoint():
         raise HTTPException(status_code=500, detail=str(e))
 
 
+@router.post("/map/", response_model=EvaluatorMappingOutputInterface)
+async def evaluator_data_map(request: Request, payload: EvaluatorMappingInputInterface):
+    """Endpoint to map the experiment data tree to evaluator interface.
+
+    Args:
+        request (Request): The request object.
+        payload (EvaluatorMappingInputInterface): The payload containing the request data.
+
+    Returns:
+        EvaluatorMappingOutputInterface: the evaluator mapping output object
+    """
+
+    try:
+        mapped_outputs = await evaluators_service.map(mapping_input=payload)
+        return mapped_outputs
+    except Exception as e:
+        logger.error(f"Error mapping data tree: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "message": "Error mapping data tree",
+                "stacktrace": traceback.format_exc(),
+            },
+        )
+
+
+@router.post("/{evaluator_key}/run/", response_model=EvaluatorOutputInterface)
+async def evaluator_run(
+    request: Request, evaluator_key: str, payload: EvaluatorInputInterface
+):
+    """Endpoint to evaluate LLM app run
+
+    Args:
+        request (Request): The request object.
+        evaluator_key (str): The key of the evaluator.
+        payload (EvaluatorInputInterface): The payload containing the request data.
+
+    Returns:
+        result: EvaluatorOutputInterface object containing the outputs.
+    """
+
+    try:
+        result = await evaluators_service.run(
+            evaluator_key=evaluator_key, evaluator_input=payload
+        )
+        return result
+    except Exception as e:
+        logger.error(f"Error while running {evaluator_key} evaluator: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "message": f"Error while running {evaluator_key} evaluator",
+                "stacktrace": traceback.format_exc(),
+            },
+        )
+
+
 @router.get("/configs/", response_model=List[EvaluatorConfig])
 async def get_evaluator_configs(
     app_id: str,
diff --git a/agenta-backend/agenta_backend/routers/human_evaluation_router.py b/agenta-backend/agenta_backend/routers/human_evaluation_router.py
index 846c551ee..a5573923e 100644
--- a/agenta-backend/agenta_backend/routers/human_evaluation_router.py
+++ b/agenta-backend/agenta_backend/routers/human_evaluation_router.py
@@ -481,7 +481,9 @@ async def delete_evaluations(
     """
 
     try:
-        evaluation = await db_manager.fetch_evaluation_by_id(payload.evaluations_ids[0])
+        evaluation = await db_manager.fetch_human_evaluation_by_id(
+            payload.evaluations_ids[0]
+        )
         if isCloudEE():
             has_permission = await check_action_access(
                 user_uid=request.state.user_id,
diff --git a/agenta-backend/agenta_backend/routers/testset_router.py b/agenta-backend/agenta_backend/routers/testset_router.py
index ddd39bcc2..05268749c 100644
--- a/agenta-backend/agenta_backend/routers/testset_router.py
+++ b/agenta-backend/agenta_backend/routers/testset_router.py
@@ -5,7 +5,7 @@
 import logging
 import requests
 from typing import Optional, List
-
+from datetime import datetime, timezone
 from pydantic import ValidationError
 
 from fastapi.responses import JSONResponse
@@ -298,6 +298,7 @@ async def update_testset(
         testset_update = {
             "name": csvdata.name,
             "csvdata": csvdata.csvdata,
+            "updated_at": datetime.now(timezone.utc),
         }
         await db_manager.update_testset(
             testset_id=str(testset.id), values_to_update=testset_update
@@ -351,6 +352,7 @@ async def get_testsets(
             _id=str(testset.id),  # type: ignore
             name=testset.name,
             created_at=str(testset.created_at),
+            updated_at=str(testset.updated_at),
         )
         for testset in testsets
     ]
diff --git a/agenta-backend/agenta_backend/services/app_manager.py b/agenta-backend/agenta_backend/services/app_manager.py
index 1ac1f2b28..84036b769 100644
--- a/agenta-backend/agenta_backend/services/app_manager.py
+++ b/agenta-backend/agenta_backend/services/app_manager.py
@@ -44,6 +44,7 @@
     from agenta_backend.services import deployment_manager
 
 if isCloudEE():
+    from agenta_backend.commons.services import db_manager_ee
     from agenta_backend.commons.services import (
         api_key_service,
     )  # noqa pylint: disable-all
@@ -106,9 +107,10 @@ async def start_variant(
         )
         if isCloudEE():
             user = await db_manager.get_user(user_uid=user_uid)
+            project = await db_manager_ee.get_project_by_id(project_id=project_id)
             api_key = await api_key_service.create_api_key(
                 str(user.id),
-                project_id=project_id,
+                workspace_id=str(project.workspace_id),
                 expiration_date=None,
                 hidden=True,
             )
@@ -213,9 +215,21 @@ async def get_appdb_str_by_id(object_id: str, object_type: str) -> str:
                     f"Deployment with id {object_id} not found"
                 )
             return str(deployment_db.app_id)
+        elif object_type == "evaluation":
+            evaluation_db = await db_manager.fetch_evaluation_by_id(object_id)
+            if evaluation_db is None:
+                raise db_manager.NoResultFound(
+                    f"Evaluation with id {object_id} not found"
+                )
+            return str(evaluation_db.app_id)
+        else:
+            raise ValueError(
+                f"Could not update last_modified_by application information. Unsupported type: {object_type}"
+            )
 
     user = await db_manager.get_user(user_uid=user_uid)
     app_id = await get_appdb_str_by_id(object_id=object_id, object_type=object_type)
+    assert app_id is not None, f"app_id in {object_type} cannot be None"
     await db_manager.update_app(
         app_id=app_id,
         values_to_update={
diff --git a/agenta-backend/agenta_backend/services/db_manager.py b/agenta-backend/agenta_backend/services/db_manager.py
index 26526d155..70293e23b 100644
--- a/agenta-backend/agenta_backend/services/db_manager.py
+++ b/agenta-backend/agenta_backend/services/db_manager.py
@@ -2644,7 +2644,7 @@ async def fetch_evaluations_by_resource(
                 .join(HumanEvaluationVariantDB)
                 .filter(
                     HumanEvaluationVariantDB.variant_id.in_(ids),
-                    EvaluationDB.project_id == uuid.UUID(project_id),
+                    HumanEvaluationDB.project_id == uuid.UUID(project_id),
                 )
                 .options(load_only(HumanEvaluationDB.id))  # type: ignore
             )
@@ -2652,7 +2652,7 @@ async def fetch_evaluations_by_resource(
             res_human_evaluations = result_human_evaluations.scalars().all()
             return res_evaluations + res_human_evaluations
 
-        if resource_type == "testset":
+        elif resource_type == "testset":
             result_evaluations = await session.execute(
                 select(EvaluationDB)
                 .filter(
@@ -2665,7 +2665,8 @@ async def fetch_evaluations_by_resource(
                 select(HumanEvaluationDB)
                 .filter(
                     HumanEvaluationDB.testset_id.in_(ids),
-                    EvaluationDB.project_id == uuid.UUID(project_id),
+                    HumanEvaluationDB.project_id
+                    == uuid.UUID(project_id),  # Fixed to match HumanEvaluationDB
                 )
                 .options(load_only(HumanEvaluationDB.id))  # type: ignore
             )
@@ -2673,7 +2674,7 @@ async def fetch_evaluations_by_resource(
             res_human_evaluations = result_human_evaluations.scalars().all()
             return res_evaluations + res_human_evaluations
 
-        if resource_type == "evaluator_config":
+        elif resource_type == "evaluator_config":
             query = (
                 select(EvaluationDB)
                 .join(EvaluationDB.evaluator_configs)
@@ -2692,19 +2693,15 @@ async def fetch_evaluations_by_resource(
         )
 
 
-async def delete_evaluations(evaluation_ids: List[str], project_id: str) -> None:
+async def delete_evaluations(evaluation_ids: List[str]) -> None:
     """Delete evaluations based on the ids provided from the db.
 
     Args:
         evaluations_ids (list[str]): The IDs of the evaluation
-        project_id (str): The ID of the project
     """
 
     async with db_engine.get_session() as session:
-        query = select(EvaluationDB).where(
-            EvaluationDB.id.in_(evaluation_ids),
-            EvaluationDB.project_id == uuid.UUID(project_id),
-        )
+        query = select(EvaluationDB).where(EvaluationDB.id.in_(evaluation_ids))
         result = await session.execute(query)
         evaluations = result.scalars().all()
         for evaluation in evaluations:
@@ -2860,13 +2857,17 @@ async def fetch_evaluator_config(evaluator_config_id: str):
         return evaluator_config
 
 
-async def check_if_ai_critique_exists_in_list_of_evaluators_configs(
-    evaluators_configs_ids: List[str],
+async def check_if_evaluators_exist_in_list_of_evaluators_configs(
+    evaluators_configs_ids: List[str], evaluators_keys: List[str]
 ) -> bool:
-    """Fetch evaluator configurations from the database.
+    """Check if the provided evaluators exist in the database within the given evaluator configurations.
+
+    Arguments:
+        evaluators_configs_ids (List[str]): List of evaluator configuration IDs to search within.
+        evaluators_keys (List[str]): List of evaluator keys to check for existence.
 
     Returns:
-        EvaluatorConfigDB: the evaluator configuration object.
+        bool: True if all evaluators exist, False otherwise.
     """
 
     async with db_engine.get_session() as session:
@@ -2875,15 +2876,18 @@ async def check_if_ai_critique_exists_in_list_of_evaluators_configs(
             for evaluator_config_id in evaluators_configs_ids
         ]
 
-        query = select(EvaluatorConfigDB).where(
+        query = select(EvaluatorConfigDB.id, EvaluatorConfigDB.evaluator_key).where(
             EvaluatorConfigDB.id.in_(evaluator_config_uuids),
-            EvaluatorConfigDB.evaluator_key == "auto_ai_critique",
+            EvaluatorConfigDB.evaluator_key.in_(evaluators_keys),
         )
-
         result = await session.execute(query)
-        evaluators_configs = result.scalars().all()
 
-        return bool(evaluators_configs)
+        # NOTE: result.all() returns the records as a list of tuples
+        # 0 is the evaluator_id and 1 is evaluator_key
+        fetched_evaluators_keys = {config[1] for config in result.all()}
+
+        # Ensure the passed evaluators are found in the fetched evaluator keys
+        return any(key in fetched_evaluators_keys for key in evaluators_keys)
 
 
 async def fetch_evaluator_config_by_appId(
diff --git a/agenta-backend/agenta_backend/services/evaluation_service.py b/agenta-backend/agenta_backend/services/evaluation_service.py
index 43908791b..7f8f2290f 100644
--- a/agenta-backend/agenta_backend/services/evaluation_service.py
+++ b/agenta-backend/agenta_backend/services/evaluation_service.py
@@ -425,6 +425,11 @@ async def create_new_evaluation(
     variant_db = await db_manager.get_app_variant_instance_by_id(
         variant_id=variant_id, project_id=project_id
     )
+
+    assert variant_db is not None, f"App variant with ID {variant_id} cannot be None."
+    assert (
+        variant_db.revision is not None
+    ), f"Revision of App variant with ID {variant_id} cannot be None"
     variant_revision = await db_manager.fetch_app_variant_revision_by_variant(
         app_variant_id=variant_id, project_id=project_id, revision=variant_db.revision  # type: ignore
     )
diff --git a/agenta-backend/agenta_backend/services/evaluator_manager.py b/agenta-backend/agenta_backend/services/evaluator_manager.py
index 3ebbc6d8c..bb5d6b19f 100644
--- a/agenta-backend/agenta_backend/services/evaluator_manager.py
+++ b/agenta-backend/agenta_backend/services/evaluator_manager.py
@@ -167,28 +167,3 @@ async def create_ready_to_use_evaluators(app_name: str, project_id: str):
             evaluator_key=evaluator.key,
             settings_values=settings_values,
         )
-
-
-async def check_ai_critique_inputs(
-    evaluators_configs: List[str], lm_providers_keys: Optional[Dict[str, Any]]
-) -> Tuple[bool, Optional[JSONResponse]]:
-    """
-    Checks if AI critique exists in evaluators configs and validates lm_providers_keys.
-
-    Args:
-        evaluators_configs (List[str]): List of evaluator configurations.
-        lm_providers_keys (Optional[Dict[str, Any]]): Language model provider keys.
-
-    Returns:
-        Tuple[bool, Optional[JSONResponse]]: Returns a tuple containing a boolean indicating success,
-                                             and a JSONResponse in case of error.
-    """
-    if await db_manager.check_if_ai_critique_exists_in_list_of_evaluators_configs(
-        evaluators_configs
-    ):
-        if not lm_providers_keys:
-            return False, JSONResponse(
-                {"detail": "Missing LM provider Key"},
-                status_code=400,
-            )
-    return True, None
diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
index 42db9a2da..a9637dee5 100644
--- a/agenta-backend/agenta_backend/services/evaluators_service.py
+++ b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -2,7 +2,6 @@
 import json
 import asyncio
 import logging
-import asyncio
 import traceback
 from typing import Any, Dict, Union
 
@@ -14,15 +13,63 @@
 
 from agenta_backend.services.security import sandbox
 from agenta_backend.models.shared_models import Error, Result
+from agenta_backend.models.api.evaluation_model import (
+    EvaluatorInputInterface,
+    EvaluatorOutputInterface,
+    EvaluatorMappingInputInterface,
+    EvaluatorMappingOutputInterface,
+)
 from agenta_backend.utils.traces import (
     process_distributed_trace_into_trace_tree,
     get_field_value_from_trace_tree,
 )
 
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
 
+def validate_string_output(
+    evaluator_key: str, output: Union[str, Dict[str, Any]]
+) -> str:
+    """Checks and validate the output to be of type string.
+
+    Args:
+        evaluator_key (str): the key of the evaluator
+        output (Union[str, Dict[str, Any]]): the llm response
+
+    Raises:
+        Exception: requires output to be a string
+
+    Returns:
+        str: output
+    """
+
+    output = output.get("data", "") if isinstance(output, dict) else output
+    if not isinstance(output, str):
+        raise Exception(
+            f"Evaluator {evaluator_key} requires the output to be a string, but received {type(output).__name__} instead. "
+        )
+    return output
+
+
+async def map(
+    mapping_input: EvaluatorMappingInputInterface,
+) -> EvaluatorMappingOutputInterface:
+    """
+    Maps the evaluator inputs based on the provided mapping and data tree.
+
+    Returns:
+        EvaluatorMappingOutputInterface: A dictionary containing the mapped evaluator inputs.
+    """
+
+    mapping_outputs = {}
+    trace = process_distributed_trace_into_trace_tree(mapping_input.inputs["trace"])
+    for to_key, from_key in mapping_input.mapping.items():
+        mapping_outputs[to_key] = get_field_value_from_trace_tree(trace, from_key)
+    return {"outputs": mapping_outputs}
+
+
 def get_correct_answer(
     data_point: Dict[str, Any], settings_values: Dict[str, Any]
 ) -> Any:
@@ -42,6 +89,10 @@ def get_correct_answer(
     correct_answer_key = settings_values.get("correct_answer_key")
     if correct_answer_key is None:
         raise ValueError("No correct answer keys provided.")
+    if isinstance(correct_answer_key, str) and correct_answer_key.startswith(
+        "testcase."
+    ):
+        correct_answer_key = correct_answer_key[len("testcase.") :]
     if correct_answer_key not in data_point:
         raise ValueError(
             f"Correct answer column '{correct_answer_key}' not found in the test set."
@@ -49,7 +100,7 @@ def get_correct_answer(
     return data_point[correct_answer_key]
 
 
-def auto_exact_match(
+async def auto_exact_match(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -71,12 +122,15 @@ def auto_exact_match(
     Returns:
         Result: A Result object containing the evaluation result.
     """
-    if not isinstance(output, str):
-        output = output.get("data", "")
+
     try:
+        output = validate_string_output("exact_match", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-        exact_match = True if output == correct_answer else False
-        result = Result(type="bool", value=exact_match)
+        inputs = {"ground_truth": correct_answer, "prediction": output}
+        response = await exact_match(
+            input=EvaluatorInputInterface(**{"inputs": inputs})
+        )
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except ValueError as e:
         return Result(
@@ -97,7 +151,14 @@ def auto_exact_match(
         )
 
 
-def auto_regex_test(
+async def exact_match(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    prediction = input.inputs.get("prediction", "")
+    ground_truth = input.inputs.get("ground_truth", "")
+    success = True if prediction == ground_truth else False
+    return {"outputs": {"success": success}}
+
+
+async def auto_regex_test(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -105,14 +166,15 @@ def auto_regex_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        re_pattern = re.compile(settings_values["regex_pattern"], re.IGNORECASE)
-        result = (
-            bool(re_pattern.search(output)) == settings_values["regex_should_match"]
+        output = validate_string_output("regex_test", output)
+        inputs = {"ground_truth": data_point, "prediction": output}
+        response = await regex_test(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
         )
-        return Result(type="bool", value=result)
+        return Result(type="bool", value=response["outputs"]["success"])
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
@@ -124,7 +186,16 @@ def auto_regex_test(
         )
 
 
-def field_match_test(
+async def regex_test(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    pattern = re.compile(input.settings["regex_pattern"], re.IGNORECASE)
+    result = (
+        bool(pattern.search(input.inputs["prediction"]))
+        == input.settings["regex_should_match"]
+    )
+    return {"outputs": {"success": result}}
+
+
+async def auto_field_match_test(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -132,13 +203,14 @@ def field_match_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("field_match_test", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-        output_json = json.loads(output)
-        result = output_json[settings_values["json_field"]] == correct_answer
-        return Result(type="bool", value=result)
+        inputs = {"ground_truth": correct_answer, "prediction": output}
+        response = await field_match_test(
+            input=EvaluatorInputInterface(**{"inputs": inputs})
+        )
+        return Result(type="bool", value=response["outputs"]["success"])
     except ValueError as e:
         return Result(
             type="error",
@@ -152,7 +224,13 @@ def field_match_test(
         return Result(type="bool", value=False)
 
 
-def auto_webhook_test(
+async def field_match_test(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    prediction_json = json.loads(input.inputs["prediction"])
+    result = prediction_json == input.inputs["ground_truth"]
+    return {"outputs": {"success": result}}
+
+
+async def auto_webhook_test(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -160,38 +238,16 @@ def auto_webhook_test(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("webhook_test", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-
-        with httpx.Client() as client:
-            payload = {
-                "correct_answer": correct_answer,
-                "output": output,
-                "inputs": inputs,
-            }
-            response = client.post(url=settings_values["webhook_url"], json=payload)
-            response.raise_for_status()
-            response_data = response.json()
-            score = response_data.get("score", None)
-            if score is None and not isinstance(score, (int, float)):
-                return Result(
-                    type="error",
-                    value=None,
-                    error=Error(
-                        message="Error during Auto Webhook evaluation; Webhook did not return a score",
-                    ),
-                )
-            if score < 0 or score > 1:
-                return Result(
-                    type="error",
-                    value=None,
-                    error=Error(
-                        message="Error during Auto Webhook evaluation; Webhook returned an invalid score. Score must be between 0 and 1",
-                    ),
-                )
-            return Result(type="number", value=score)
+        inputs = {"prediction": output, "ground_truth": correct_answer}
+        response = await webhook_test(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
+        )
+        return Result(type="number", value=response["outputs"]["score"])
     except httpx.HTTPError as e:
         return Result(
             type="error",
@@ -221,7 +277,21 @@ def auto_webhook_test(
         )
 
 
-def auto_custom_code_run(
+async def webhook_test(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    with httpx.Client() as client:
+        payload = {
+            "correct_answer": input.inputs["ground_truth"],
+            "output": input.inputs["prediction"],
+            "inputs": input.inputs,
+        }
+        response = client.post(url=input.settings["webhook_url"], json=payload)
+        response.raise_for_status()
+        response_data = response.json()
+        score = response_data.get("score", None)
+        return {"outputs": {"score": score}}
+
+
+async def auto_custom_code_run(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -229,20 +299,20 @@ def auto_custom_code_run(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        result = sandbox.execute_code_safely(
-            app_params=app_params,
-            inputs=inputs,
-            output=output,
-            correct_answer=data_point.get(
-                "correct_answer", None
-            ),  # for backward compatibility
-            code=settings_values["code"],
-            datapoint=data_point,
+        output = validate_string_output("custom_code_run", output)
+        correct_answer = get_correct_answer(data_point, settings_values)
+        inputs = {
+            "app_config": app_params,
+            "prediction": output,
+            "ground_truth": correct_answer,
+        }
+        response = await custom_code_run(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
         )
-        return Result(type="number", value=result)
+        return Result(type="number", value=response["outputs"]["score"])
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
@@ -254,7 +324,19 @@ def auto_custom_code_run(
         )
 
 
-def auto_ai_critique(
+async def custom_code_run(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    result = sandbox.execute_code_safely(
+        app_params=input.inputs["app_config"],
+        inputs=input.inputs,
+        output=input.inputs["prediction"],
+        correct_answer=input.inputs["ground_truth"],
+        code=input.settings["code"],
+        datapoint=input.inputs["ground_truth"],
+    )
+    return {"outputs": {"score": result}}
+
+
+async def auto_ai_critique(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -276,34 +358,28 @@ def auto_ai_critique(
     Returns:
         Result: Evaluation result.
     """
-    if not isinstance(output, str):
-        output = output.get("data", "")
+
     try:
+        output = validate_string_output("ai_critique", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-        openai_api_key = lm_providers_keys["OPENAI_API_KEY"]
-
-        chain_run_args = {
-            "llm_app_prompt_template": app_params.get("prompt_user", ""),
-            "variant_output": output,
-            "correct_answer": correct_answer,
+        inputs = {
+            "prompt_user": app_params.get("prompt_user", "").format(**data_point),
+            "prediction": output,
+            "ground_truth": correct_answer,
         }
-
-        for key, value in inputs.items():
-            chain_run_args[key] = value
-
-        prompt_template = settings_values["prompt_template"]
-        messages = [
-            {"role": "system", "content": prompt_template},
-            {"role": "user", "content": str(chain_run_args)},
-        ]
-
-        client = OpenAI(api_key=openai_api_key)
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo", messages=messages, temperature=0.01
+        settings = {
+            "prompt_template": settings_values.get("prompt_template", ""),
+        }
+        response = await ai_critique(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": inputs,
+                    "settings": settings,
+                    "credentials": lm_providers_keys,
+                }
+            )
         )
-
-        evaluation_output = response.choices[0].message.content.strip()
-        return Result(type="text", value=evaluation_output)
+        return Result(type="text", value=str(response["outputs"]["score"]))
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
@@ -315,7 +391,39 @@ def auto_ai_critique(
         )
 
 
-def auto_starts_with(
+async def ai_critique(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    openai_api_key = input.credentials.get("OPENAI_API_KEY", None)
+
+    if not openai_api_key:
+        raise Exception(
+            "No OpenAI key was found. AI Critique evaluator requires a valid OpenAI API key to function. Please configure your OpenAI API and try again."
+        )
+
+    chain_run_args = {
+        "llm_app_prompt_template": input.inputs.get("prompt_user", ""),
+        "variant_output": input.inputs["prediction"],
+        "correct_answer": input.inputs["ground_truth"],
+    }
+    for key, value in input.inputs.items():
+        chain_run_args[key] = value
+
+    prompt_template = input.settings.get("prompt_template", "")
+    messages = [
+        {"role": "system", "content": prompt_template},
+        {"role": "user", "content": str(chain_run_args)},
+    ]
+
+    print(input)
+
+    client = AsyncOpenAI(api_key=openai_api_key)
+    response = await client.chat.completions.create(
+        model="gpt-3.5-turbo", messages=messages, temperature=0.8
+    )
+    evaluation_output = response.choices[0].message.content.strip()
+    return {"outputs": {"score": evaluation_output}}
+
+
+async def auto_starts_with(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -323,18 +431,15 @@ def auto_starts_with(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        prefix = settings_values.get("prefix", "")
-        case_sensitive = settings_values.get("case_sensitive", True)
-
-        if not case_sensitive:
-            output = output.lower()
-            prefix = prefix.lower()
-
-        result = Result(type="bool", value=output.startswith(prefix))
-        return result
+        output = validate_string_output("starts_with", output)
+        inputs = {"prediction": output}
+        response = await starts_with(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
+        )
+        return Result(type="text", value=response["outputs"]["success"])
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
@@ -346,7 +451,20 @@ def auto_starts_with(
         )
 
 
-def auto_ends_with(
+async def starts_with(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    prefix = input.settings.get("prefix", "")
+    case_sensitive = input.settings.get("case_sensitive", True)
+
+    output = str(input.inputs["prediction"])
+    if not case_sensitive:
+        output = output.lower()
+        prefix = prefix.lower()
+
+    result = output.startswith(prefix)
+    return {"outputs": {"success": result}}
+
+
+async def auto_ends_with(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -354,17 +472,15 @@ def auto_ends_with(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        suffix = settings_values.get("suffix", "")
-        case_sensitive = settings_values.get("case_sensitive", True)
-
-        if not case_sensitive:
-            output = output.lower()
-            suffix = suffix.lower()
-
-        result = Result(type="bool", value=output.endswith(suffix))
+        output = validate_string_output("ends_with", output)
+        inputs = {"prediction": output}
+        response = await ends_with(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
+        )
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except Exception as e:  # pylint: disable=broad-except
         return Result(
@@ -377,7 +493,20 @@ def auto_ends_with(
         )
 
 
-def auto_contains(
+async def ends_with(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    suffix = input.settings.get("suffix", "")
+    case_sensitive = input.settings.get("case_sensitive", True)
+
+    output = str(input.inputs["prediction"])
+    if not case_sensitive:
+        output = output.lower()
+        suffix = suffix.lower()
+
+    result = output.endswith(suffix)
+    return {"outputs": {"success": result}}
+
+
+async def auto_contains(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -385,17 +514,15 @@ def auto_contains(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        substring = settings_values.get("substring", "")
-        case_sensitive = settings_values.get("case_sensitive", True)
-
-        if not case_sensitive:
-            output = output.lower()
-            substring = substring.lower()
-
-        result = Result(type="bool", value=substring in output)
+        output = validate_string_output("contains", output)
+        inputs = {"prediction": output}
+        response = await contains(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
+        )
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except Exception as e:  # pylint: disable=broad-except
         return Result(
@@ -408,7 +535,20 @@ def auto_contains(
         )
 
 
-def auto_contains_any(
+async def contains(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    substring = input.settings.get("substring", "")
+    case_sensitive = input.settings.get("case_sensitive", True)
+
+    output = str(input.inputs["prediction"])
+    if not case_sensitive:
+        output = output.lower()
+        substring = substring.lower()
+
+    result = substring in output
+    return {"outputs": {"success": result}}
+
+
+async def auto_contains_any(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -416,20 +556,15 @@ def auto_contains_any(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        substrings_str = settings_values.get("substrings", "")
-        substrings = [substring.strip() for substring in substrings_str.split(",")]
-        case_sensitive = settings_values.get("case_sensitive", True)
-
-        if not case_sensitive:
-            output = output.lower()
-            substrings = [substring.lower() for substring in substrings]
-
-        result = Result(
-            type="bool", value=any(substring in output for substring in substrings)
+        output = validate_string_output("contains_any", output)
+        inputs = {"prediction": output}
+        response = await contains_any(
+            input=EvaluatorInputInterface(
+                **{"inputs": inputs, "settings": settings_values}
+            )
         )
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except Exception as e:  # pylint: disable=broad-except
         return Result(
@@ -442,7 +577,22 @@ def auto_contains_any(
         )
 
 
-def auto_contains_all(
+async def contains_any(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    substrings_str = input.settings.get("substrings", "")
+    substrings = [substring.strip() for substring in substrings_str.split(",")]
+    case_sensitive = input.settings.get("case_sensitive", True)
+
+    output = str(input.inputs["prediction"])
+    if not case_sensitive:
+        output = output.lower()
+        substrings = [substring.lower() for substring in substrings]
+
+    return {
+        "outputs": {"success": any(substring in output for substring in substrings)}
+    }
+
+
+async def auto_contains_all(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -450,20 +600,14 @@ def auto_contains_all(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        substrings_str = settings_values.get("substrings", "")
-        substrings = [substring.strip() for substring in substrings_str.split(",")]
-        case_sensitive = settings_values.get("case_sensitive", True)
-
-        if not case_sensitive:
-            output = output.lower()
-            substrings = [substring.lower() for substring in substrings]
-
-        result = Result(
-            type="bool", value=all(substring in output for substring in substrings)
+        output = validate_string_output("contains_all", output)
+        response = await contains_all(
+            input=EvaluatorInputInterface(
+                **{"inputs": {"prediction": output}, "settings": settings_values}
+            )
         )
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except Exception as e:  # pylint: disable=broad-except
         return Result(
@@ -476,7 +620,21 @@ def auto_contains_all(
         )
 
 
-def auto_contains_json(
+async def contains_all(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    substrings_str = input.settings.get("substrings", "")
+    substrings = [substring.strip() for substring in substrings_str.split(",")]
+    case_sensitive = input.settings.get("case_sensitive", True)
+
+    output = str(input.inputs["prediction"])
+    if not case_sensitive:
+        output = output.lower()
+        substrings = [substring.lower() for substring in substrings]
+
+    result = all(substring in output for substring in substrings)
+    return {"outputs": {"success": result}}
+
+
+async def auto_contains_json(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -484,20 +642,21 @@ def auto_contains_json(
     settings_values: Dict[str, Any],  # pylint: disable=unused-argument
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        try:
-            start_index = output.index("{")
-            end_index = output.rindex("}") + 1
-            potential_json = output[start_index:end_index]
-
-            json.loads(potential_json)
-            contains_json = True
-        except (ValueError, json.JSONDecodeError):
-            contains_json = False
-
-        return Result(type="bool", value=contains_json)
+        # parsing llm app output format if v2
+        output = output.get("data", "") if isinstance(output, dict) else output
+        if isinstance(output, dict):
+            output = json.dumps(
+                output
+            )  # contains_json expects inputs.prediction to be a string
+        elif not isinstance(output, (str, dict)):
+            raise Exception(
+                f"Evaluator contains_json requires the app output to be either a JSON string or object, but received {type(output).__name__} instead."
+            )
+        response = await contains_json(
+            input=EvaluatorInputInterface(**{"inputs": {"prediction": output}})
+        )
+        return Result(type="bool", value=response["outputs"]["success"])
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
@@ -509,6 +668,19 @@ def auto_contains_json(
         )
 
 
+async def contains_json(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    try:
+        start_index = str(input.inputs["prediction"]).index("{")
+        end_index = str(input.inputs["prediction"]).rindex("}") + 1
+        potential_json = str(input.inputs["prediction"])[start_index:end_index]
+        json.loads(potential_json)
+        contains_json = True
+    except (ValueError, json.JSONDecodeError) as e:
+        contains_json = False
+
+    return {"outputs": {"success": contains_json}}
+
+
 def flatten_json(json_obj: Union[list, dict]) -> Dict[str, Any]:
     """
     This function takes a (nested) JSON object and flattens it into a single-level dictionary where each key represents the path to the value in the original JSON structure. This is done recursively, ensuring that the full hierarchical context is preserved in the keys.
@@ -612,7 +784,7 @@ def diff(ground_truth: Any, app_output: Any, compare_schema_only: bool) -> float
         return 0.0
 
 
-def auto_json_diff(
+async def auto_json_diff(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Any,
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -621,30 +793,28 @@ def auto_json_diff(
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
     try:
-        output = output.get("data", "") if isinstance(output, dict) else output
+        # 2. extract ground truth from data point
+        correct_answer = get_correct_answer(data_point, settings_values)
 
-        if isinstance(output, dict):
-            output = json.dumps(output)
-        elif isinstance(output, str):
-            try:
-                json.loads(output)
-            except:
-                raise Exception(
-                    f"Evaluator 'auto_json_diff' requires string outputs to be JSON strings."
-                )
-        else:
-            raise Exception(
-                f"Evaluator 'auto_json_diff' requires the output to be either a JSON string or a JSON object, but received {type(output).__name__} instead."
+        response = await json_diff(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": {"prediction": output, "ground_truth": correct_answer},
+                    "settings": settings_values,
+                }
             )
-
-        correct_answer = get_correct_answer(data_point, settings_values)
-        average_score = compare_jsons(
-            ground_truth=json.loads(correct_answer),
-            app_output=json.loads(output),
-            settings_values=settings_values,
         )
-        return Result(type="number", value=average_score)
-    except (ValueError, json.JSONDecodeError, Exception):
+        return Result(type="number", value=response["outputs"]["score"])
+    except json.JSONDecodeError:
+        return Result(
+            type="error",
+            value=None,
+            error=Error(
+                message="Expected answer is not a valid JSON",
+                stacktrace=traceback.format_exc(),
+            ),
+        )
+    except (ValueError, Exception):
         return Result(
             type="error",
             value=None,
@@ -655,7 +825,55 @@ def auto_json_diff(
         )
 
 
-def rag_faithfulness(
+async def json_diff(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    ground_truth = input.inputs["ground_truth"]
+    if isinstance(ground_truth, str):
+        ground_truth = json.loads(ground_truth)  # if this fails we will return an error
+
+    # 1. extract llm app output if app output format is v2+
+    app_output = input.inputs["prediction"]
+    assert isinstance(
+        app_output, (str, dict)
+    ), "App output is expected to be a string or a JSON object"
+    app_output = (
+        app_output.get("data", "") if isinstance(app_output, dict) else app_output
+    )
+    if isinstance(app_output, str):
+        try:
+            app_output = json.loads(app_output)
+        except json.JSONDecodeError:
+            app_output = (
+                {}
+            )  # we will return 0 score for json diff in case we cannot parse the output as json
+
+    score = compare_jsons(
+        ground_truth=ground_truth,
+        app_output=app_output,
+        settings_values=input.settings,
+    )
+    return {"outputs": {"score": score}}
+
+
+async def measure_rag_consistency(
+    input: EvaluatorInputInterface,
+) -> EvaluatorOutputInterface:
+    openai_api_key = input.credentials.get("OPENAI_API_KEY", None)
+    if not openai_api_key:
+        raise Exception(
+            "No OpenAI key was found. RAG evaluator requires a valid OpenAI API key to function. Please configure your OpenAI API and try again."
+        )
+
+    # Initialize RAG evaluator to calculate faithfulness score
+    faithfulness = Faithfulness(api_key=openai_api_key)
+    eval_score = await faithfulness._run_eval_async(
+        output=input.inputs["answer_key"],
+        input=input.inputs["question_key"],
+        context=input.inputs["contexts_key"],
+    )
+    return {"outputs": {"score": eval_score.score}}
+
+
+async def rag_faithfulness(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -711,23 +929,20 @@ def rag_faithfulness(
 
             raise ValueError(message)
 
-        openai_api_key = lm_providers_keys.get("OPENAI_API_KEY", None)
-
-        if not openai_api_key:
-            raise Exception(
-                "No LLM keys OpenAI key found. Please configure your OpenAI keys and try again."
-            )
-
-        # Initialize RAG evaluator to calculate faithfulness score
-        loop = asyncio.get_event_loop()
-        faithfulness = Faithfulness(api_key=openai_api_key)
-        eval_score = loop.run_until_complete(
-            faithfulness._run_eval_async(
-                output=answer_val, input=question_val, context=contexts_val
+        measurement = await measure_rag_consistency(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": {
+                        "question_key": question_val,
+                        "contexts_key": contexts_val,
+                        "answer_key": answer_val,
+                    },
+                    "settings": settings_values,
+                    "credentials": lm_providers_keys,
+                }
             )
         )
-
-        return Result(type="number", value=eval_score.score)
+        return Result(type="number", value=measurement["outputs"]["score"])
 
     except Exception:
         return Result(
@@ -740,7 +955,26 @@ def rag_faithfulness(
         )
 
 
-def rag_context_relevancy(
+async def measure_context_coherence(
+    input: EvaluatorInputInterface,
+) -> EvaluatorOutputInterface:
+    openai_api_key = input.credentials.get("OPENAI_API_KEY", None)
+    if not openai_api_key:
+        raise Exception(
+            "No OpenAI key was found. RAG evaluator requires a valid OpenAI API key to function. Please configure your OpenAI API and try again."
+        )
+
+    # Initialize RAG evaluator to calculate context relevancy score
+    context_rel = ContextRelevancy(api_key=openai_api_key)
+    eval_score = await context_rel._run_eval_async(
+        output=input.inputs["answer_key"],
+        input=input.inputs["question_key"],
+        context=input.inputs["contexts_key"],
+    )
+    return {"outputs": {"score": eval_score.score}}
+
+
+async def rag_context_relevancy(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],  # pylint: disable=unused-argument
@@ -796,22 +1030,20 @@ def rag_context_relevancy(
 
             raise ValueError(message)
 
-        openai_api_key = lm_providers_keys.get("OPENAI_API_KEY", None)
-
-        if not openai_api_key:
-            raise Exception(
-                "No LLM keys OpenAI key found. Please configure your OpenAI keys and try again."
-            )
-
-        # Initialize RAG evaluator to calculate context relevancy score
-        loop = asyncio.get_event_loop()
-        context_rel = ContextRelevancy(api_key=openai_api_key)
-        eval_score = loop.run_until_complete(
-            context_rel._run_eval_async(
-                output=answer_val, input=question_val, context=contexts_val
+        measurement = await measure_context_coherence(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": {
+                        "question_key": question_val,
+                        "contexts_key": contexts_val,
+                        "answer_key": answer_val,
+                    },
+                    "settings": settings_values,
+                    "credentials": lm_providers_keys,
+                }
             )
         )
-        return Result(type="number", value=eval_score.score)
+        return Result(type="number", value=measurement["outputs"]["score"])
 
     except Exception:
         return Result(
@@ -824,27 +1056,35 @@ def rag_context_relevancy(
         )
 
 
-def levenshtein_distance(s1, s2):
-    if len(s1) < len(s2):
-        return levenshtein_distance(s2, s1)  # pylint: disable=arguments-out-of-order
+async def levenshtein_distance(
+    input: EvaluatorInputInterface,
+) -> EvaluatorOutputInterface:
+    prediction = input.inputs["prediction"]
+    ground_truth = input.inputs["ground_truth"]
 
-    if len(s2) == 0:
-        return len(s1)
+    if len(ground_truth) == 0:
+        return len(prediction)
 
-    previous_row = range(len(s2) + 1)
-    for i, c1 in enumerate(s1):
+    previous_row = range(len(ground_truth) + 1)
+    for i, c1 in enumerate(prediction):
         current_row = [i + 1]
-        for j, c2 in enumerate(s2):
+        for j, c2 in enumerate(ground_truth):
             insertions = previous_row[j + 1] + 1
             deletions = current_row[j] + 1
             substitutions = previous_row[j] + (c1 != c2)
             current_row.append(min(insertions, deletions, substitutions))
         previous_row = current_row
 
-    return previous_row[-1]
+    distance = previous_row[-1]
+    if "threshold" in input.settings:
+        threshold = input.settings["threshold"]
+        is_within_threshold = distance <= threshold
+        return {"outputs": {"success": is_within_threshold}}
+
+    return {"outputs": {"score": distance}}
 
 
-def auto_levenshtein_distance(
+async def auto_levenshtein_distance(
     inputs: Dict[str, Any],  # pylint: disable=unused-argument
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -852,19 +1092,21 @@ def auto_levenshtein_distance(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],  # pylint: disable=unused-argument
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("levenshtein_distance", output)
         correct_answer = get_correct_answer(data_point, settings_values)
+        response = await levenshtein_distance(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": {"prediction": output, "ground_truth": correct_answer},
+                    "settings": settings_values,
+                }
+            )
+        )
+        if "success" in response["outputs"]:
+            return Result(type="number", value=response["outputs"]["success"])
+        return Result(type="number", value=response["outputs"]["score"])
 
-        distance = levenshtein_distance(output, correct_answer)
-
-        if "threshold" in settings_values:
-            threshold = settings_values["threshold"]
-            is_within_threshold = distance <= threshold
-            return Result(type="bool", value=is_within_threshold)
-
-        return Result(type="number", value=distance)
     except ValueError as e:
         return Result(
             type="error",
@@ -884,7 +1126,7 @@ def auto_levenshtein_distance(
         )
 
 
-def auto_similarity_match(
+async def auto_similarity_match(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -892,21 +1134,18 @@ def auto_similarity_match(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
+        output = validate_string_output("similarity_match", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-        set1 = set(output.split())
-        set2 = set(correct_answer.split())
-        intersect = set1.intersection(set2)
-        union = set1.union(set2)
-
-        similarity = len(intersect) / len(union)
-
-        is_similar = (
-            True if similarity > settings_values["similarity_threshold"] else False
+        response = await similarity_match(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": {"prediction": output, "ground_truth": correct_answer},
+                    "settings": settings_values,
+                }
+            )
         )
-        result = Result(type="bool", value=is_similar)
+        result = Result(type="bool", value=response["outputs"]["success"])
         return result
     except ValueError as e:
         return Result(
@@ -927,25 +1166,39 @@ def auto_similarity_match(
         )
 
 
+async def similarity_match(input: EvaluatorInputInterface) -> EvaluatorOutputInterface:
+    set1 = set(input.inputs["prediction"].split())
+    set2 = set(input.inputs["ground_truth"].split())
+    intersect = set1.intersection(set2)
+    union = set1.union(set2)
+    print(set1)
+    print(set2)
+    print(union)
+
+    similarity = len(intersect) / len(union)
+    is_similar = True if similarity > input.settings["similarity_threshold"] else False
+    return {"outputs": {"success": is_similar}}
+
+
 async def semantic_similarity(
-    output: Union[str, Dict[str, Any]],
-    correct_answer: str,
-    api_key: str,
-) -> float:
-    """
-    Calculate the semantic similarity score of the LLM app using OpenAI's Embeddings API.
+    input: EvaluatorInputInterface,
+) -> EvaluatorOutputInterface:
+    """Calculate the semantic similarity score of the LLM app using OpenAI's Embeddings API.
 
     Args:
-        output (str): the output text
-        correct_answer (str): the correct answer text
+        input (EvaluatorInputInterface): the evaluator input
 
     Returns:
         float: the semantic similarity score
     """
-    if not isinstance(output, str):
-        output = output.get("data", "")
 
-    openai = AsyncOpenAI(api_key=api_key)
+    openai_api_key = input.credentials.get("OPENAI_API_KEY", None)
+    if not openai_api_key:
+        raise Exception(
+            "No OpenAI key was found. Semantic evaluator requires a valid OpenAI API key to function. Please configure your OpenAI API and try again."
+        )
+
+    openai = AsyncOpenAI(api_key=openai_api_key)
 
     async def encode(text: str):
         response = await openai.embeddings.create(
@@ -956,13 +1209,13 @@ async def encode(text: str):
     def cosine_similarity(output_vector: array, correct_answer_vector: array) -> float:
         return np.dot(output_vector, correct_answer_vector)
 
-    output_vector = await encode(output)
-    correct_answer_vector = await encode(correct_answer)
+    output_vector = await encode(input.inputs["prediction"])
+    correct_answer_vector = await encode(input.inputs["ground_truth"])
     similarity_score = cosine_similarity(output_vector, correct_answer_vector)
-    return similarity_score
+    return {"outputs": {"score": similarity_score}}
 
 
-def auto_semantic_similarity(
+async def auto_semantic_similarity(
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
     data_point: Dict[str, Any],
@@ -970,19 +1223,19 @@ def auto_semantic_similarity(
     settings_values: Dict[str, Any],
     lm_providers_keys: Dict[str, Any],
 ) -> Result:
-    if not isinstance(output, str):
-        output = output.get("data", "")
     try:
-        loop = asyncio.get_event_loop()
-        openai_api_key = lm_providers_keys["OPENAI_API_KEY"]
+        output = validate_string_output("semantic_similarity", output)
         correct_answer = get_correct_answer(data_point, settings_values)
-
-        score = loop.run_until_complete(
-            semantic_similarity(
-                output=output, correct_answer=correct_answer, api_key=openai_api_key
+        inputs = {"prediction": output, "ground_truth": correct_answer}
+        response = await semantic_similarity(
+            input=EvaluatorInputInterface(
+                **{
+                    "inputs": inputs,
+                    "credentials": lm_providers_keys,
+                }
             )
         )
-        return Result(type="number", value=score)
+        return Result(type="number", value=response["outputs"]["score"])
     except Exception:
         return Result(
             type="error",
@@ -997,7 +1250,7 @@ def auto_semantic_similarity(
 EVALUATOR_FUNCTIONS = {
     "auto_exact_match": auto_exact_match,
     "auto_regex_test": auto_regex_test,
-    "field_match_test": field_match_test,
+    "field_match_test": auto_field_match_test,
     "auto_webhook_test": auto_webhook_test,
     "auto_custom_code_run": auto_custom_code_run,
     "auto_ai_critique": auto_ai_critique,
@@ -1015,8 +1268,29 @@ def auto_semantic_similarity(
     "rag_context_relevancy": rag_context_relevancy,
 }
 
+RUN_EVALUATOR_FUNCTIONS = {
+    "auto_exact_match": exact_match,
+    "auto_regex_test": regex_test,
+    "field_match_test": field_match_test,
+    "auto_webhook_test": webhook_test,
+    "auto_custom_code_run": custom_code_run,
+    "auto_ai_critique": ai_critique,
+    "auto_starts_with": starts_with,
+    "auto_ends_with": ends_with,
+    "auto_contains": contains,
+    "auto_contains_any": contains_any,
+    "auto_contains_all": contains_all,
+    "auto_contains_json": contains_json,
+    "auto_json_diff": json_diff,
+    "auto_levenshtein_distance": levenshtein_distance,
+    "auto_similarity_match": similarity_match,
+    "auto_semantic_similarity": semantic_similarity,
+    "rag_faithfulness": measure_rag_consistency,
+    "rag_context_relevancy": measure_context_coherence,
+}
+
 
-def evaluate(
+async def evaluate(
     evaluator_key: str,
     inputs: Dict[str, Any],
     output: Union[str, Dict[str, Any]],
@@ -1035,7 +1309,7 @@ def evaluate(
             ),
         )
     try:
-        return evaluation_function(
+        return await evaluation_function(
             inputs,
             output,
             data_point,
@@ -1052,3 +1326,14 @@ def evaluate(
                 stacktrace=str(exc),
             ),
         )
+
+
+async def run(
+    evaluator_key: str, evaluator_input: EvaluatorInputInterface
+) -> EvaluatorOutputInterface:
+    evaluator_function = RUN_EVALUATOR_FUNCTIONS.get(evaluator_key, None)
+    if not evaluator_function:
+        raise NotImplementedError(f"Evaluator {evaluator_key} not found")
+
+    output = await evaluator_function(evaluator_input)
+    return output
diff --git a/agenta-backend/agenta_backend/services/helpers.py b/agenta-backend/agenta_backend/services/helpers.py
index 7b9510a0b..18951ad6f 100644
--- a/agenta-backend/agenta_backend/services/helpers.py
+++ b/agenta-backend/agenta_backend/services/helpers.py
@@ -1,6 +1,10 @@
 import json
-from typing import List, Dict, Any, Tuple, Union
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone
+from typing import List, Dict, Any, Union, Tuple
+
+from agenta_backend.services import db_manager
+from agenta_backend.models.api.evaluation_model import LMProvidersEnum
+from agenta_backend.resources.evaluators.evaluators import get_all_evaluators
 
 
 def format_inputs(list_of_dictionaries: List[Dict[str, Any]]) -> Dict:
@@ -76,3 +80,65 @@ def convert_to_utc_datetime(dt: Union[datetime, str, None]) -> datetime:
     if dt.tzinfo is None:
         return dt.replace(tzinfo=timezone.utc)
     return dt
+
+
+def format_llm_provider_keys(
+    llm_provider_keys: Dict[LMProvidersEnum, str]
+) -> Dict[str, str]:
+    """Formats a dictionary of LLM provider keys into a dictionary of strings.
+
+    Args:
+        llm_provider_keys (Dict[LMProvidersEnum, str]): LLM provider keys
+
+    Returns:
+        Dict[str, str]: formatted llm provided keys
+
+    Example:
+        Input: {<LMProvidersEnum.mistralai: 'MISTRAL_API_KEY'>: '...', ...}
+        Output:  {'MISTRAL_API_KEY': '...', ...}
+    """
+
+    llm_provider_keys = {key.value: value for key, value in llm_provider_keys.items()}
+    return llm_provider_keys
+
+
+async def ensure_required_llm_keys_exist(
+    evaluator_configs: List[str], llm_provider_keys: Dict[str, str]
+) -> Tuple[bool, None]:
+    """
+    Validates if necessary LLM API keys are present when required evaluators are used.
+
+    Args:
+        evaluator_configs (List[str]): List of evaluator configurations to check.
+        llm_provider_keys (Dict[str, str]): Dictionary of LLM provider keys (e.g., {"OPENAI_API_KEY": "your-key"}).
+
+    Returns:
+        Tuple[bool, None]: Returns (True, None) if validation passes.
+
+    Raises:
+        ValueError: If an evaluator requiring LLM keys is configured but no LLM API key is provided.
+
+    """
+
+    evaluators_requiring_llm_keys = [
+        evaluator["key"]
+        for evaluator in get_all_evaluators()
+        if evaluator.get("requires_llm_api_keys", False)
+        or (
+            evaluator.get("settings_template", {})
+            .get("requires_llm_api_keys", {})
+            .get("default", False)
+        )
+    ]
+    evaluators_found = (
+        await db_manager.check_if_evaluators_exist_in_list_of_evaluators_configs(
+            evaluator_configs, evaluators_requiring_llm_keys
+        )
+    )
+
+    if evaluators_found and "OPENAI_API_KEY" not in llm_provider_keys:
+        raise ValueError(
+            "OpenAI API key is required to run one or more of the specified evaluators."
+        )
+
+    return True, None
diff --git a/agenta-backend/agenta_backend/services/security/sandbox.py b/agenta-backend/agenta_backend/services/security/sandbox.py
index b31e9fe91..95850265b 100644
--- a/agenta-backend/agenta_backend/services/security/sandbox.py
+++ b/agenta-backend/agenta_backend/services/security/sandbox.py
@@ -65,6 +65,7 @@ def execute_code_safely(
         "json",
         "requests",
         "numpy",
+        "typing",
     ]
 
     # Create a dictionary to simulate allowed imports
diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py
index 9d0dd14e8..c2388477e 100644
--- a/agenta-backend/agenta_backend/tasks/evaluations.py
+++ b/agenta-backend/agenta_backend/tasks/evaluations.py
@@ -226,14 +226,16 @@ def evaluate(
                 )
                 logger.debug(f"Evaluating with evaluator: {evaluator_config_db}")
 
-                result = evaluators_service.evaluate(
-                    evaluator_key=evaluator_config_db.evaluator_key,
-                    output=app_output.result.value,
-                    data_point=data_point,
-                    settings_values=evaluator_config_db.settings_values,
-                    app_params=app_variant_parameters,  # type: ignore
-                    inputs=data_point,
-                    lm_providers_keys=lm_providers_keys,
+                result = loop.run_until_complete(
+                    evaluators_service.evaluate(
+                        evaluator_key=evaluator_config_db.evaluator_key,
+                        output=app_output.result.value,
+                        data_point=data_point,
+                        settings_values=evaluator_config_db.settings_values,
+                        app_params=app_variant_parameters,  # type: ignore
+                        inputs=data_point,
+                        lm_providers_keys=lm_providers_keys,
+                    )
                 )
 
                 # Update evaluators aggregated data
diff --git a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
index d22b38563..0b4f65a00 100644
--- a/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
+++ b/agenta-backend/agenta_backend/tests/unit/test_evaluators.py
@@ -1,10 +1,10 @@
 import os
 import pytest
 
-from test_traces import simple_rag_trace
-
+from agenta_backend.tests.unit.test_traces import simple_rag_trace
 from agenta_backend.services.evaluators_service import (
     auto_levenshtein_distance,
+    auto_ai_critique,
     auto_starts_with,
     auto_ends_with,
     auto_contains,
@@ -18,6 +18,53 @@
 )
 
 
+@pytest.mark.parametrize(
+    "ground_truth, output, settings_values, openai_api_key, expected_min, expected_max",
+    [
+        (
+            {"correct_answer": "The capital of Kiribati is Tarawa."},
+            "The capital of Kiribati is South Tarawa.",
+            {
+                "prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below:\nEvaluation strategy: 0 to 10 0 is very bad and 10 is very good.\nPrompt: {llm_app_prompt_template}\nInputs: country: {country}\nExpected Answer Column:{correct_answer}\nEvaluate this: {variant_output}\n\nAnswer ONLY with one of the given grading or evaluation options.",
+                "correct_answer_key": "correct_answer",
+            },
+            os.environ.get("OPENAI_API_KEY"),
+            0,
+            10,
+        ),
+        (
+            {"correct_answer": "The capital of Kiribati is Tarawa."},
+            "The capital of Kiribati is South Tarawa.",
+            {
+                "prompt_template": "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below:\nEvaluation strategy: 0 to 10 0 is very bad and 10 is very good.\nPrompt: {llm_app_prompt_template}\nInputs: country: {country}\nExpected Answer Column:{correct_answer}\nEvaluate this: {variant_output}\n\nAnswer ONLY with one of the given grading or evaluation options.",
+                "correct_answer_key": "correct_answer",
+            },
+            None,
+            None,
+            None,
+        ),
+    ],
+)
+@pytest.mark.asyncio
+async def test_auto_ai_critique_evaluator(
+    ground_truth, output, settings_values, openai_api_key, expected_min, expected_max
+):
+    result = await auto_ai_critique(
+        {},
+        output,
+        ground_truth,
+        {},
+        settings_values,
+        {"OPENAI_API_KEY": openai_api_key},
+    )
+    try:
+        assert expected_min <= round(result.value, 1) <= expected_max
+    except TypeError as error:
+        # exceptions
+        # - raised by evaluator (agenta) -> TypeError
+        assert not isinstance(result.value, float) or not isinstance(result.value, int)
+
+
 @pytest.mark.parametrize(
     "output, settings_values, expected",
     [
@@ -59,8 +106,9 @@
         ),
     ],
 )
-def test_auto_starts_with(output, settings_values, expected):
-    result = auto_starts_with(
+@pytest.mark.asyncio
+async def test_auto_starts_with(output, settings_values, expected):
+    result = await auto_starts_with(
         inputs={},
         output=output,
         data_point={},
@@ -83,8 +131,9 @@ def test_auto_starts_with(output, settings_values, expected):
         ("Hello world", "Hello", True, False),
     ],
 )
-def test_auto_ends_with(output, suffix, case_sensitive, expected):
-    result = auto_ends_with(
+@pytest.mark.asyncio
+async def test_auto_ends_with(output, suffix, case_sensitive, expected):
+    result = await auto_ends_with(
         {},
         output,
         {},
@@ -106,8 +155,9 @@ def test_auto_ends_with(output, suffix, case_sensitive, expected):
         ("Hello world", "abc", True, False),
     ],
 )
-def test_auto_contains(output, substring, case_sensitive, expected):
-    result = auto_contains(
+@pytest.mark.asyncio
+async def test_auto_contains(output, substring, case_sensitive, expected):
+    result = await auto_contains(
         {},
         output,
         {},
@@ -130,8 +180,9 @@ def test_auto_contains(output, substring, case_sensitive, expected):
         ("Hello world", "abc,xyz", True, False),
     ],
 )
-def test_auto_contains_any(output, substrings, case_sensitive, expected):
-    result = auto_contains_any(
+@pytest.mark.asyncio
+async def test_auto_contains_any(output, substrings, case_sensitive, expected):
+    result = await auto_contains_any(
         {},
         output,
         {},
@@ -154,8 +205,9 @@ def test_auto_contains_any(output, substrings, case_sensitive, expected):
         ("Hello world", "world,universe", True, False),
     ],
 )
-def test_auto_contains_all(output, substrings, case_sensitive, expected):
-    result = auto_contains_all(
+@pytest.mark.asyncio
+async def test_auto_contains_all(output, substrings, case_sensitive, expected):
+    result = await auto_contains_all(
         {},
         output,
         {},
@@ -174,10 +226,14 @@ def test_auto_contains_all(output, substrings, case_sensitive, expected):
         ("No JSON here!", False),
         ("{Malformed JSON, nope!}", False),
         ('{"valid": "json", "number": 123}', True),
+        ({"data": {"message": "The capital of Azerbaijan is Baku."}}, True),
+        ({"data": '{"message": "The capital of Azerbaijan is Baku."}'}, True),
+        ({"data": "The capital of Azerbaijan is Baku."}, False),
     ],
 )
-def test_auto_contains_json(output, expected):
-    result = auto_contains_json({}, output, {}, {}, {}, {})
+@pytest.mark.asyncio
+async def test_auto_contains_json(output, expected):
+    result = await auto_contains_json({}, output, {}, {}, {}, {})
     assert result.value == expected
 
 
@@ -226,12 +282,47 @@ def test_auto_contains_json(output, expected):
             0.0,
             1.0,
         ),
+        (
+            {
+                "correct_answer": '{"user": {"name": "John", "details": {"age": 30, "location": "New York"}}}'
+            },
+            {
+                "data": '{"USER": {"NAME": "John", "DETAILS": {"AGE": 30, "LOCATION": "New York"}}}'
+            },
+            {
+                "predict_keys": True,
+                "compare_schema_only": False,
+                "case_insensitive_keys": True,
+                "correct_answer_key": "correct_answer",
+            },
+            0.0,
+            1.0,
+        ),
+        (
+            {
+                "correct_answer": '{"user": {"name": "John", "details": {"age": 30, "location": "New York"}}}'
+            },
+            {
+                "data": {
+                    "output": '{"USER": {"NAME": "John", "DETAILS": {"AGE": 30, "LOCATION": "New York"}}}'
+                }
+            },
+            {
+                "predict_keys": True,
+                "compare_schema_only": False,
+                "case_insensitive_keys": True,
+                "correct_answer_key": "correct_answer",
+            },
+            0.0,
+            1.0,
+        ),
     ],
 )
-def test_auto_json_diff(
+@pytest.mark.asyncio
+async def test_auto_json_diff(
     ground_truth, app_output, settings_values, expected_min, expected_max
 ):
-    result = auto_json_diff({}, app_output, ground_truth, {}, settings_values, {})
+    result = await auto_json_diff({}, app_output, ground_truth, {}, settings_values, {})
     assert expected_min <= result.value <= expected_max
 
 
@@ -265,12 +356,22 @@ def test_auto_json_diff(
             0.0,
             1.0,
         ),
+        (
+            {"correct_answer": "The capital of Namibia is Windhoek."},
+            "Windhoek is the capital of Namibia.",
+            {
+                "correct_answer_key": "correct_answer",
+            },
+            None,
+            None,
+        ),
     ],
 )
-def test_auto_semantic_similarity_match(
+@pytest.mark.asyncio
+async def test_auto_semantic_similarity_match(
     ground_truth, app_output, settings_values, expected_min, expected_max
 ):
-    result = auto_semantic_similarity(
+    result = await auto_semantic_similarity(
         {},
         app_output,
         ground_truth,
@@ -278,7 +379,12 @@ def test_auto_semantic_similarity_match(
         settings_values,
         {"OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY")},
     )
-    assert expected_min <= round(result.value, 3) <= expected_max
+    try:
+        assert expected_min <= round(result.value, 1) <= expected_max
+    except TypeError as error:
+        # exceptions
+        # - raised by evaluator (agenta) -> TypeError
+        assert not isinstance(result.value, float) or not isinstance(result.value, int)
 
 
 @pytest.mark.parametrize(
@@ -322,8 +428,9 @@ def test_auto_semantic_similarity_match(
         ),
     ],
 )
-def test_auto_levenshtein_distance(output, data_point, settings_values, expected):
-    result = auto_levenshtein_distance(
+@pytest.mark.asyncio
+async def test_auto_levenshtein_distance(output, data_point, settings_values, expected):
+    result = await auto_levenshtein_distance(
         inputs={},
         output=output,
         data_point=data_point,
@@ -335,7 +442,7 @@ def test_auto_levenshtein_distance(output, data_point, settings_values, expected
 
 
 @pytest.mark.parametrize(
-    "settings_values, expected_min, expected_max",
+    "settings_values, expected_min, openai_api_key, expected_max",
     [
         (
             {
@@ -343,27 +450,46 @@ def test_auto_levenshtein_distance(output, data_point, settings_values, expected
                 "answer_key": "rag.reporter.outputs.report",
                 "contexts_key": "rag.retriever.outputs.movies",
             },
+            os.environ.get("OPENAI_API_KEY"),
             0.0,
             1.0,
         ),
+        (
+            {
+                "question_key": "rag.retriever.internals.prompt",
+                "answer_key": "rag.reporter.outputs.report",
+                "contexts_key": "rag.retriever.outputs.movies",
+            },
+            None,
+            None,
+            None,
+        ),
         # add more use cases
     ],
 )
-def test_rag_faithfulness_evaluator(settings_values, expected_min, expected_max):
-    result = rag_faithfulness(
+@pytest.mark.asyncio
+async def test_rag_faithfulness_evaluator(
+    settings_values, expected_min, openai_api_key, expected_max
+):
+    result = await rag_faithfulness(
         {},
         simple_rag_trace,
         {},
         {},
         settings_values,
-        {"OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY")},
+        {"OPENAI_API_KEY": openai_api_key},
     )
 
-    assert expected_min <= round(result.value, 1) <= expected_max
+    try:
+        assert expected_min <= round(result.value, 1) <= expected_max
+    except TypeError as error:
+        # exceptions
+        # - raised by evaluator (agenta) -> TypeError
+        assert not isinstance(result.value, float) or not isinstance(result.value, int)
 
 
 @pytest.mark.parametrize(
-    "settings_values, expected_min, expected_max",
+    "settings_values, expected_min, openai_api_key, expected_max",
     [
         (
             {
@@ -371,20 +497,34 @@ def test_rag_faithfulness_evaluator(settings_values, expected_min, expected_max)
                 "answer_key": "rag.reporter.outputs.report",
                 "contexts_key": "rag.retriever.outputs.movies",
             },
+            os.environ.get("OPENAI_API_KEY"),
             0.0,
             1.0,
         ),
+        (
+            {
+                "question_key": "rag.retriever.internals.prompt",
+                "answer_key": "rag.reporter.outputs.report",
+                "contexts_key": "rag.retriever.outputs.movies",
+            },
+            None,
+            None,
+            None,
+        ),
         # add more use cases
     ],
 )
-def test_rag_context_relevancy_evaluator(settings_values, expected_min, expected_max):
-    result = rag_context_relevancy(
+@pytest.mark.asyncio
+async def test_rag_context_relevancy_evaluator(
+    settings_values, expected_min, openai_api_key, expected_max
+):
+    result = await rag_context_relevancy(
         {},
         simple_rag_trace,
         {},
         {},
         settings_values,
-        {"OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY")},
+        {"OPENAI_API_KEY": openai_api_key},
     )
 
     try:
diff --git a/agenta-backend/agenta_backend/tests/unit/test_traces.py b/agenta-backend/agenta_backend/tests/unit/test_traces.py
index 2357ee22e..664eb4a4b 100644
--- a/agenta-backend/agenta_backend/tests/unit/test_traces.py
+++ b/agenta-backend/agenta_backend/tests/unit/test_traces.py
@@ -69,3 +69,71 @@
         ],
     },
 }
+
+
+simple_finance_assisstant_trace = {
+    "data": {},
+    "trace": {
+        "trace_id": "66a61777a1e481ab498bc7b5",
+        "cost": None,
+        "usage": None,
+        "latency": 12.372497,
+        "spans": [
+            {
+                "id": "66a61777a1e481ab498bc7b4",
+                "name": "diversify",
+                "parent_span_id": None,
+                "start_time": "2024-07-25T17:06:46.141563Z",
+                "end_time": "2024-07-25T17:06:46.885700Z",
+                "spankind": "WORKFLOW",
+                "metadata": {"cost": None, "latency": 2.641, "usage": None},
+                "user_id": "—",
+                "inputs": {
+                    "currency": "USD",
+                    "amount": 800000,
+                    "stocks": [],
+                    "real_estate_properties": "Konga KFI, Almord City, Cambridge Lounge",
+                    "percentage_returns": "6%, 9%, 15%",
+                    "durations": "6 months, 9 months, 15 months",
+                },
+                "internals": None,
+                "outputs": {
+                    "report": [
+                        "**Investment Amount:**\nUSD 800,000\n\n**Real Estate Properties:**\n1. Konga KFI: 6% return, 6 months duration\n2. Almord City: 9% return, 9 months duration\n3. Cambridge Lounge: 15% return, 15 months duration\n\n**Allocation Strategy:**\nTo optimize the investment by balancing risk and return potential, I will allocate a higher percentage to properties with higher returns and longer durations while still maintaining diversification.\n\n**Allocation Breakdown:**\n1. Konga KFI: 30%\n2. Almord City: 30%\n3. Cambridge Lounge: 40%\n\n**Final Allocation:**\n1. Konga KFI: USD 240,000\n2. Almord City: USD 240,000\n3. Cambridge Lounge: USD 320,000"
+                    ]
+                },
+                "config": {
+                    "temperature": 0.7,
+                    "prompt_system": "You are a financial advisor that helps users allocate their investments. Users will provide an amount of money they wish to invest along with details about stocks and real estate properties. Your goal is to diversify this amount effectively.\n\nUser Inputs: Investment Amount: The total amount the user wants to invest.\nStocks: A list of stocks the user is interested in.\nReal Estate Properties: A list of properties, including their expected returns and investment durations.",
+                    "prompt_user": "\nMy currency is {currency}. The total amount I want to invest is {amount}.\n",
+                    "max_tokens": 2000,
+                    "model": "gpt-4o",
+                    "top_p": 1.0,
+                    "invest_in_stocks": 0,
+                    "invest_in_realestate": 1,
+                    "frequence_penalty": 0.0,
+                    "presence_penalty": 0.0,
+                },
+            },
+            {
+                "id": "66a61777a1e481ab498bc7b6",
+                "name": "reporter",
+                "parent_span_id": "66a61777a1e481ab498bc7b4",
+                "start_time": "2024-07-25T17:06:46.141563Z",
+                "end_time": "2024-07-25T17:06:46.885700Z",
+                "spankind": "LLM",
+                "metadata": {"cost": None, "latency": 2.64, "usage": None},
+                "user_id": "—",
+                "inputs": {
+                    "user_prompt": "\nMy currency is USD. The total amount I want to invest is 800000.\n\nThe user wants to invest in the following stocks: [].\n\nThe user wants to invest in the following real estate properties: Konga KFI, Almord City, Cambridge Lounge. The percentage returns for these properties are 6%, 9%, 15%, and the investment durations are 6 months, 9 months, 15 months.\n"
+                },
+                "internals": None,
+                "outputs": {
+                    "report": [
+                        "**Investment Amount:**\nUSD 800,000\n\n**Real Estate Properties:**\n1. Konga KFI: 6% return, 6 months duration\n2. Almord City: 9% return, 9 months duration\n3. Cambridge Lounge: 15% return, 15 months duration\n\n**Allocation Strategy:**\nTo optimize the investment by balancing risk and return potential, I will allocate a higher percentage to properties with higher returns and longer durations while still maintaining diversification.\n\n**Allocation Breakdown:**\n1. Konga KFI: 30%\n2. Almord City: 30%\n3. Cambridge Lounge: 40%\n\n**Final Allocation:**\n1. Konga KFI: USD 240,000\n2. Almord City: USD 240,000\n3. Cambridge Lounge: USD 320,000"
+                    ]
+                },
+            },
+        ],
+    },
+}
diff --git a/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py b/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py
index f4361e9c9..515be6261 100644
--- a/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py
+++ b/agenta-backend/agenta_backend/tests/variants_main_router/conftest.py
@@ -14,6 +14,11 @@
     ImageDB,
     AppVariantDB,
 )
+from agenta_backend.tests.unit.test_traces import (
+    simple_rag_trace,
+    simple_finance_assisstant_trace,
+)
+from agenta_backend.resources.evaluators.evaluators import get_all_evaluators
 
 import httpx
 from sqlalchemy.future import select
@@ -223,6 +228,21 @@ def app_variant_parameters_updated():
     }
 
 
+@pytest.fixture()
+def evaluators_requiring_llm_keys():
+    evaluators_requiring_llm_keys = [
+        evaluator["key"]
+        for evaluator in get_all_evaluators()
+        if evaluator.get("requires_llm_api_keys", False)
+        or (
+            evaluator.get("settings_template", {})
+            .get("requires_llm_api_keys", {})
+            .get("default", False)
+        )
+    ]
+    return evaluators_requiring_llm_keys
+
+
 @pytest.fixture()
 def auto_exact_match_evaluator_config():
     return {
@@ -286,3 +306,175 @@ def auto_ai_critique_evaluator_config():
 @pytest.fixture()
 def deploy_to_environment_payload():
     return {"environment_name": "string", "variant_id": "string"}
+
+
+@pytest.fixture()
+def rag_experiment_data_tree():
+    return simple_rag_trace
+
+
+@pytest.fixture()
+def simple_experiment_data_tree():
+    return simple_finance_assisstant_trace
+
+
+@pytest.fixture()
+def mapper_to_run_auto_exact_match_evaluation():
+    return {
+        "prediction": "diversify.reporter.outputs.report[0]",
+    }
+
+
+@pytest.fixture()
+def mapper_to_run_rag_faithfulness_evaluation():
+    return {
+        "question": "rag.retriever.internals.prompt",
+        "contexts": "rag.retriever.outputs.movies",
+        "answer": "rag.reporter.outputs.report",
+    }
+
+
+@pytest.fixture()
+def rag_faithfulness_evaluator_run_inputs():
+    return {
+        "question_key": "List 6 movies about witches in the genre of fiction.",
+        "contexts_key": [
+            "The Craft (1996) in ['Drama', 'Fantasy', 'Horror']: A newcomer to a Catholic prep high school falls in with a trio of outcast teenage girls who practice witchcraft and they all soon conjure up various spells and curses against those who even slightly anger them.",
+            "Oz the Great and Powerful (2013) in ['Adventure', 'Family', 'Fantasy']: A small-time magician is swept away to an enchanted land and is forced into a power struggle between three witches.",
+            "Snow White: A Tale of Terror (1997) in ['Fantasy', 'Horror']: In this dark take on the fairy tale, the growing hatred of a noblewoman, secretly a practitioner of the dark arts, for her stepdaughter, and the witch's horrifying attempts to kill her.",
+            "Into the Woods (2014) in ['Adventure', 'Fantasy', 'Musical']: A witch tasks a childless baker and his wife with procuring magical items from classic fairy tales to reverse the curse put on their family tree.",
+            "Wicked Stepmother (1989) in ['Comedy', 'Fantasy']: A mother/daughter pair of witches descend on a yuppie family's home and cause havoc, one at a time since they share one body & the other must live in a cat the rest of the time. Now it's up...",
+            "Hocus Pocus (1993) in ['Comedy', 'Family', 'Fantasy']: After three centuries, three witch sisters are resurrected in Salem Massachusetts on Halloween night, and it is up to two teen-agers, a young girl, and an immortal cat to put an end to the witches' reign of terror once and for all.",
+            "Warlock (1989) in ['Action', 'Fantasy', 'Horror']: A warlock flees from the 17th to the 20th century, with a witch-hunter in hot pursuit.",
+            "The Hexer (2001) in ['Adventure', 'Fantasy']: The adventures of Geralt of Rivea, \"The Witcher\".",
+            "Heavy Metal (1981) in ['Animation', 'Adventure', 'Fantasy']: A glowing orb terrorizes a young girl with a collection of stories of dark fantasy, eroticism and horror.",
+        ],
+        "answer_key": 'Witches in fiction are depicted through a mix of horror, fantasy, and dark comedy. \n\n"The Craft" (1996) delves into the complexities of teenage witchcraft, showcasing both empowerment and the darker repercussions of their actions.  \n"Snow White: A Tale of Terror" (1997) offers a sinister twist on the classic story, highlighting the witch\'s envy and vengeful nature.  \n"Hocus Pocus" (1993) delivers a comedic and adventurous take on witchcraft, as three resurrected witches wreak havoc in contemporary Salem',
+    }
+
+
+@pytest.fixture()
+def custom_code_snippet():
+    return "from typing import Dict\nfrom random import uniform\n\ndef evaluate(\n    app_params: Dict[str, str],\n    inputs: Dict[str, str],\n    output: str,  # output of the llm app\n    datapoint: Dict[str, str]  # contains the testset row\n) -> float:\n    return uniform(0.1, 0.9)"
+
+
+@pytest.fixture()
+def evaluators_payload_data(custom_code_snippet):
+    prompt_template = "We have an LLM App that we want to evaluate its outputs. Based on the prompt and the parameters provided below evaluate the output based on the evaluation strategy below:\nEvaluation strategy: 0 to 10 0 is very bad and 10 is very good.\nPrompt: {llm_app_prompt_template}\nInputs: country: {country}\nExpected Answer Column:{correct_answer}\nEvaluate this: {variant_output}\n\nAnswer ONLY with one of the given grading or evaluation options."
+    return {
+        "auto_regex_test": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {
+                "regex_pattern": r"The\s+answer\s+is\s+42[.,]?",
+                "regex_should_match": True,
+            },
+        },
+        "field_match_test": {
+            "inputs": {
+                "ground_truth": {"message": "The correct answer is 42"},
+                "prediction": '{"message": "The correct answer is 42"}',
+            },
+            "settings": {"json_field": "ground_truth"},
+        },
+        "auto_custom_code_run": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+                "app_config": {},
+            },
+            "settings": {
+                "code": custom_code_snippet,
+                "correct_answer_key": "correct_answer",
+            },
+        },
+        "auto_ai_critique": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {
+                "prompt_template": prompt_template,
+                "correct_answer_key": "correct_answer",
+            },
+            "credentials": {"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"]},
+        },
+        "auto_starts_with": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"prefix": "The", "case_sensitive": False},
+        },
+        "auto_ends_with": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"suffix": "42", "case_sensitive": False},
+        },
+        "auto_contains": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"substring": "answer is", "case_sensitive": False},
+        },
+        "auto_contains_any": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"substrings": "The,answer,42", "case_sensitive": False},
+        },
+        "auto_contains_all": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"substrings": "The,answer,is,42", "case_sensitive": False},
+        },
+        "auto_contains_json": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": '{"message": "The answer is 42"}',
+            },
+        },
+        "auto_json_diff": {
+            "inputs": {
+                "ground_truth": '{"message": "The correct answer is 42"}',
+                "prediction": '{"message": "The answer is 42"}',
+            },
+            "settings": {
+                "compare_schema_only": True,
+                "predict_keys": True,
+                "case_insensitive_keys": False,
+            },
+        },
+        "auto_levenshtein_distance": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {"threshold": 0.4},
+        },
+        "auto_similarity_match": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "settings": {
+                "similarity_threshold": 0.4,
+                "correct_answer_key": "correct_answer",
+            },
+        },
+        "auto_semantic_similarity": {
+            "inputs": {
+                "ground_truth": "The correct answer is 42",
+                "prediction": "The answer is 42",
+            },
+            "credentials": {"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"]},
+        },
+    }
diff --git a/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py b/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py
index ecfd8e333..e8fd22c5e 100644
--- a/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py
+++ b/agenta-backend/agenta_backend/tests/variants_main_router/test_variant_evaluators_router.py
@@ -176,7 +176,6 @@ async def fetch_evaluation_results(evaluation_id):
         f"{BACKEND_API_HOST}/evaluations/{evaluation_id}/results/", timeout=timeout
     )
     response_data = response.json()
-    print("Response Data: ", response_data)
 
     assert response.status_code == 200
     assert response_data["evaluation_id"] == evaluation_id
@@ -269,6 +268,63 @@ async def create_evaluation_with_evaluator(evaluator_config_name):
         await wait_for_evaluation_to_finish(evaluation_id)
 
 
+@pytest.mark.asyncio
+async def test_create_evaluation_with_no_llm_keys(evaluators_requiring_llm_keys):
+    async with db_engine.get_session() as session:
+        app_result = await session.execute(select(AppDB).filter_by(app_name=APP_NAME))
+        app = app_result.scalars().first()
+
+        app_variant_result = await session.execute(
+            select(AppVariantDB).filter_by(app_id=app.id)
+        )
+        app_variant = app_variant_result.scalars().first()
+
+        testset_result = await session.execute(
+            select(TestSetDB).filter_by(project_id=app.project_id)
+        )
+        testset = testset_result.scalars().first()
+
+        # Prepare payload
+        payload = {
+            "app_id": str(app.id),
+            "variant_ids": [str(app_variant.id)],
+            "evaluators_configs": [],
+            "testset_id": str(testset.id),
+            "lm_providers_keys": {"MISTRAL_API_KEY": OPEN_AI_KEY},
+            "rate_limit": {
+                "batch_size": 10,
+                "max_retries": 3,
+                "retry_delay": 3,
+                "delay_between_batches": 5,
+            },
+        }
+
+        # Fetch evaluator configs
+        response = await test_client.get(
+            f"{BACKEND_API_HOST}/evaluators/configs/?app_id={payload['app_id']}",
+            timeout=timeout,
+        )
+        list_of_configs_ids = []
+        evaluator_configs = response.json()
+        for evaluator_config in evaluator_configs:
+            if evaluator_config["evaluator_key"] in evaluators_requiring_llm_keys:
+                list_of_configs_ids.append(evaluator_config["id"])
+
+        # Update payload with list of configs ids
+        payload["evaluators_configs"] = list_of_configs_ids
+
+        # Make request to create evaluation
+        response = await test_client.post(
+            f"{BACKEND_API_HOST}/evaluations/", json=payload, timeout=timeout
+        )
+
+        assert response.status_code == 500
+        assert (
+            response.json()["detail"]
+            == "OpenAI API key is required to run one or more of the specified evaluators."
+        )
+
+
 @pytest.mark.asyncio
 async def test_create_evaluation_auto_exact_match():
     await create_evaluation_with_evaluator("auto_exact_match_evaluator_config")
@@ -358,3 +414,103 @@ async def test_remove_running_template_app_container():
         assert True
     except:
         assert False
+
+
+@pytest.mark.asyncio
+async def test_rag_experiment_tree_maps_correctly(
+    rag_experiment_data_tree, mapper_to_run_rag_faithfulness_evaluation
+):
+    payload = {
+        "inputs": rag_experiment_data_tree,
+        "mapping": mapper_to_run_rag_faithfulness_evaluation,
+    }
+    response = await test_client.post(
+        f"{BACKEND_API_HOST}/evaluators/map/",
+        json=payload,
+        timeout=timeout,
+    )
+    response_data = response.json()
+    assert response.status_code == 200
+    assert (
+        "question" in response_data["outputs"]
+        and "contexts" in response_data["outputs"]
+        and "answer" in response_data["outputs"]
+    ) == True
+
+
+@pytest.mark.asyncio
+async def test_simple_experiment_tree_maps_correctly(
+    simple_experiment_data_tree, mapper_to_run_auto_exact_match_evaluation
+):
+    payload = {
+        "inputs": simple_experiment_data_tree,
+        "mapping": mapper_to_run_auto_exact_match_evaluation,
+    }
+    response = await test_client.post(
+        f"{BACKEND_API_HOST}/evaluators/map/",
+        json=payload,
+        timeout=timeout,
+    )
+    response_data = response.json()
+    assert response.status_code == 200
+    assert (
+        "prediction" in response_data["outputs"]
+        and isinstance(response_data["outputs"]["prediction"], str)
+    ) == True
+
+
+@pytest.mark.asyncio
+async def test_rag_faithfulness_evaluator_run(
+    rag_faithfulness_evaluator_run_inputs,
+):
+    payload = {
+        "inputs": rag_faithfulness_evaluator_run_inputs,
+        "credentials": {"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"]},
+    }
+    response = await test_client.post(
+        f"{BACKEND_API_HOST}/evaluators/rag_faithfulness/run/",
+        json=payload,
+        timeout=timeout,
+    )
+    assert response.status_code == 200
+    assert 0.0 <= response.json()["outputs"]["score"] <= 1.0
+    assert isinstance(response.json()["outputs"]["score"], float)
+
+
+@pytest.mark.asyncio
+async def test_custom_code_evaluator_run(custom_code_snippet):
+    payload = {
+        "inputs": {
+            "ground_truth": "The correct answer is 42",
+            "prediction": "The answer is 42",
+            "app_config": {},
+        },
+        "settings": {
+            "code": custom_code_snippet,
+            "correct_answer_key": "correct_answer",
+        },
+    }
+    response = await test_client.post(
+        f"{BACKEND_API_HOST}/evaluators/auto_custom_code_run/run/",
+        json=payload,
+        timeout=timeout,
+    )
+    assert response.status_code == 200
+    assert 0.0 <= response.json()["outputs"]["score"] <= 1.0
+    assert isinstance(response.json()["outputs"]["score"], float)
+
+
+@pytest.mark.asyncio
+async def test_run_evaluators_via_api(
+    evaluators_payload_data,
+):
+    evaluators_response_status_code = []
+    for evaluator_key, evaluator_payload in evaluators_payload_data.items():
+        response = await test_client.post(
+            f"{BACKEND_API_HOST}/evaluators/{evaluator_key}/run/",
+            json=evaluator_payload,
+            timeout=timeout,
+        )
+        evaluators_response_status_code.append(response.status_code)
+
+    assert evaluators_response_status_code.count(200) == 14
diff --git a/agenta-backend/poetry.lock b/agenta-backend/poetry.lock
index 209a8f29d..1d2628780 100644
--- a/agenta-backend/poetry.lock
+++ b/agenta-backend/poetry.lock
@@ -181,18 +181,18 @@ frozenlist = ">=1.1.0"
 
 [[package]]
 name = "aiosmtplib"
-version = "1.1.6"
+version = "3.0.2"
 description = "asyncio SMTP client"
 optional = false
-python-versions = ">=3.5.2,<4.0.0"
+python-versions = ">=3.8"
 files = [
-    {file = "aiosmtplib-1.1.6-py3-none-any.whl", hash = "sha256:84174765778b2c5e0e207fbce0a769202fcf0c3de81faa87cc03551a6333bfa9"},
-    {file = "aiosmtplib-1.1.6.tar.gz", hash = "sha256:d138fe6ffecbc9e6320269690b9ac0b75e540ef96e8f5c77d4a306760014dce2"},
+    {file = "aiosmtplib-3.0.2-py3-none-any.whl", hash = "sha256:8783059603a34834c7c90ca51103c3aa129d5922003b5ce98dbaa6d4440f10fc"},
+    {file = "aiosmtplib-3.0.2.tar.gz", hash = "sha256:08fd840f9dbc23258025dca229e8a8f04d2ccf3ecb1319585615bfc7933f7f47"},
 ]
 
 [package.extras]
-docs = ["sphinx (>=2,<4)", "sphinx_autodoc_typehints (>=1.7.0,<2.0.0)"]
-uvloop = ["uvloop (>=0.13,<0.15)"]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.0.0)", "sphinx-autodoc-typehints (>=1.24.0)", "sphinx-copybutton (>=0.5.0)"]
+uvloop = ["uvloop (>=0.18)"]
 
 [[package]]
 name = "alembic"
@@ -579,63 +579,78 @@ files = [
 
 [[package]]
 name = "cffi"
-version = "1.16.0"
+version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
-    {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"},
-    {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"},
-    {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"},
-    {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"},
-    {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"},
-    {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"},
-    {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"},
-    {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"},
-    {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"},
-    {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"},
-    {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"},
-    {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"},
-    {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"},
-    {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"},
-    {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"},
-    {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"},
-    {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"},
-    {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"},
-    {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"},
-    {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"},
-    {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"},
-    {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"},
-    {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"},
-    {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"},
-    {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"},
-    {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"},
-    {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"},
-    {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"},
-    {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"},
-    {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"},
-    {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"},
-    {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"},
-    {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"},
-    {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"},
-    {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"},
-    {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"},
-    {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"},
-    {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"},
-    {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"},
-    {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"},
-    {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"},
-    {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"},
-    {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"},
-    {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"},
-    {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"},
-    {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"},
-    {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"},
-    {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"},
-    {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"},
-    {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"},
-    {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
+    {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
+    {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
+    {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
+    {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
+    {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
+    {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
+    {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
+    {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
+    {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"},
+    {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"},
+    {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"},
+    {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"},
+    {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
+    {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
 ]
 
 [package.dependencies]
@@ -827,43 +842,38 @@ files = [
 
 [[package]]
 name = "cryptography"
-version = "42.0.7"
+version = "43.0.1"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a987f840718078212fdf4504d0fd4c6effe34a7e4740378e59d47696e8dfb477"},
-    {file = "cryptography-42.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd13b5e9b543532453de08bcdc3cc7cebec6f9883e886fd20a92f26940fd3e7a"},
-    {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a79165431551042cc9d1d90e6145d5d0d3ab0f2d66326c201d9b0e7f5bf43604"},
-    {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a47787a5e3649008a1102d3df55424e86606c9bae6fb77ac59afe06d234605f8"},
-    {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:02c0eee2d7133bdbbc5e24441258d5d2244beb31da5ed19fbb80315f4bbbff55"},
-    {file = "cryptography-42.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5e44507bf8d14b36b8389b226665d597bc0f18ea035d75b4e53c7b1ea84583cc"},
-    {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7f8b25fa616d8b846aef64b15c606bb0828dbc35faf90566eb139aa9cff67af2"},
-    {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:93a3209f6bb2b33e725ed08ee0991b92976dfdcf4e8b38646540674fc7508e13"},
-    {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6b8f1881dac458c34778d0a424ae5769de30544fc678eac51c1c8bb2183e9da"},
-    {file = "cryptography-42.0.7-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3de9a45d3b2b7d8088c3fbf1ed4395dfeff79d07842217b38df14ef09ce1d8d7"},
-    {file = "cryptography-42.0.7-cp37-abi3-win32.whl", hash = "sha256:789caea816c6704f63f6241a519bfa347f72fbd67ba28d04636b7c6b7da94b0b"},
-    {file = "cryptography-42.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:8cb8ce7c3347fcf9446f201dc30e2d5a3c898d009126010cbd1f443f28b52678"},
-    {file = "cryptography-42.0.7-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:a3a5ac8b56fe37f3125e5b72b61dcde43283e5370827f5233893d461b7360cd4"},
-    {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:779245e13b9a6638df14641d029add5dc17edbef6ec915688f3acb9e720a5858"},
-    {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d563795db98b4cd57742a78a288cdbdc9daedac29f2239793071fe114f13785"},
-    {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:31adb7d06fe4383226c3e963471f6837742889b3c4caa55aac20ad951bc8ffda"},
-    {file = "cryptography-42.0.7-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:efd0bf5205240182e0f13bcaea41be4fdf5c22c5129fc7ced4a0282ac86998c9"},
-    {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a9bc127cdc4ecf87a5ea22a2556cab6c7eda2923f84e4f3cc588e8470ce4e42e"},
-    {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:3577d029bc3f4827dd5bf8bf7710cac13527b470bbf1820a3f394adb38ed7d5f"},
-    {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2e47577f9b18723fa294b0ea9a17d5e53a227867a0a4904a1a076d1646d45ca1"},
-    {file = "cryptography-42.0.7-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1a58839984d9cb34c855197043eaae2c187d930ca6d644612843b4fe8513c886"},
-    {file = "cryptography-42.0.7-cp39-abi3-win32.whl", hash = "sha256:e6b79d0adb01aae87e8a44c2b64bc3f3fe59515280e00fb6d57a7267a2583cda"},
-    {file = "cryptography-42.0.7-cp39-abi3-win_amd64.whl", hash = "sha256:16268d46086bb8ad5bf0a2b5544d8a9ed87a0e33f5e77dd3c3301e63d941a83b"},
-    {file = "cryptography-42.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2954fccea107026512b15afb4aa664a5640cd0af630e2ee3962f2602693f0c82"},
-    {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:362e7197754c231797ec45ee081f3088a27a47c6c01eff2ac83f60f85a50fe60"},
-    {file = "cryptography-42.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4f698edacf9c9e0371112792558d2f705b5645076cc0aaae02f816a0171770fd"},
-    {file = "cryptography-42.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5482e789294854c28237bba77c4c83be698be740e31a3ae5e879ee5444166582"},
-    {file = "cryptography-42.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e9b2a6309f14c0497f348d08a065d52f3020656f675819fc405fb63bbcd26562"},
-    {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d8e3098721b84392ee45af2dd554c947c32cc52f862b6a3ae982dbb90f577f14"},
-    {file = "cryptography-42.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c65f96dad14f8528a447414125e1fc8feb2ad5a272b8f68477abbcc1ea7d94b9"},
-    {file = "cryptography-42.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36017400817987670037fbb0324d71489b6ead6231c9604f8fc1f7d008087c68"},
-    {file = "cryptography-42.0.7.tar.gz", hash = "sha256:ecbfbc00bf55888edda9868a4cf927205de8499e7fabe6c050322298382953f2"},
+    {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"},
+    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"},
+    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962"},
+    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277"},
+    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a"},
+    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042"},
+    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494"},
+    {file = "cryptography-43.0.1-cp37-abi3-win32.whl", hash = "sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2"},
+    {file = "cryptography-43.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d"},
+    {file = "cryptography-43.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d"},
+    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806"},
+    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85"},
+    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c"},
+    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1"},
+    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa"},
+    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4"},
+    {file = "cryptography-43.0.1-cp39-abi3-win32.whl", hash = "sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47"},
+    {file = "cryptography-43.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb"},
+    {file = "cryptography-43.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034"},
+    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d"},
+    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289"},
+    {file = "cryptography-43.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84"},
+    {file = "cryptography-43.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365"},
+    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96"},
+    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172"},
+    {file = "cryptography-43.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2"},
+    {file = "cryptography-43.0.1.tar.gz", hash = "sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d"},
 ]
 
 [package.dependencies]
@@ -876,7 +886,7 @@ nox = ["nox"]
 pep8test = ["check-sdist", "click", "mypy", "ruff"]
 sdist = ["build"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
+test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 
 [[package]]
@@ -998,19 +1008,19 @@ all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)"
 
 [[package]]
 name = "filelock"
-version = "3.14.0"
+version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"},
-    {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"},
+    {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
+    {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
 ]
 
 [package.extras]
-docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
-typing = ["typing-extensions (>=4.8)"]
+docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
+typing = ["typing-extensions (>=4.12.2)"]
 
 [[package]]
 name = "frozenlist"
@@ -2176,13 +2186,13 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
 [[package]]
 name = "pyjwt"
-version = "2.8.0"
+version = "2.9.0"
 description = "JSON Web Token implementation in Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"},
-    {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"},
+    {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"},
+    {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"},
 ]
 
 [package.dependencies]
@@ -2190,8 +2200,8 @@ cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"cryp
 
 [package.extras]
 crypto = ["cryptography (>=3.4.0)"]
-dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
-docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
+dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"]
+docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
 [[package]]
@@ -2373,13 +2383,13 @@ dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatc
 
 [[package]]
 name = "pytz"
-version = "2024.1"
+version = "2024.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 files = [
-    {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
-    {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
+    {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
+    {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
 ]
 
 [[package]]
@@ -3033,24 +3043,24 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7
 
 [[package]]
 name = "supertokens-python"
-version = "0.15.3"
+version = "0.24.2"
 description = "SuperTokens SDK for Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "supertokens_python-0.15.3-py3-none-any.whl", hash = "sha256:153173fd93b12a023f96f547208e27692fd08471bfc92dc94c6e23be5f98e629"},
-    {file = "supertokens_python-0.15.3.tar.gz", hash = "sha256:233e774f09e7470af025a22fa124c09bcddf4b59aeab86d87211e5f3afb8dac3"},
+    {file = "supertokens_python-0.24.2-py3-none-any.whl", hash = "sha256:a0454806e69006a1aeb8f7436a70671f81cebb82762bebb7923442bddc507bd4"},
+    {file = "supertokens_python-0.24.2.tar.gz", hash = "sha256:5873f6624ff69914124d3b092cb21fea76cc5a58fb3f3476721d1d06c62e9bf2"},
 ]
 
 [package.dependencies]
-aiosmtplib = "1.1.6"
+aiosmtplib = ">=1.1.6,<4.0.0"
 asgiref = ">=3.4.1,<4"
 Deprecated = "1.2.13"
-httpx = ">=0.15.0,<0.25.0"
+httpx = ">=0.15.0,<=0.26.0"
 phonenumbers = "8.12.48"
 pkce = "1.0.3"
 pycryptodome = "==3.10.*"
-PyJWT = {version = ">=2.6.0,<3.0.0", extras = ["crypto"]}
+PyJWT = {version = ">=2.5.0,<3.0.0", extras = ["crypto"]}
 tldextract = "3.1.0"
 twilio = "7.9.1"
 typing-extensions = ">=4.1.1,<5.0.0"
@@ -3058,6 +3068,7 @@ typing-extensions = ">=4.1.1,<5.0.0"
 [package.extras]
 django = ["django (>=3)", "django-cors-headers (==3.11.0)", "django-stubs (==1.9.0)", "python-dotenv (==0.19.2)", "uvicorn (==0.18.2)"]
 django2x = ["django (>=2,<3)", "django-cors-headers (==3.11.0)", "django-stubs (==1.9.0)", "gunicorn (==20.1.0)", "python-dotenv (==0.19.2)"]
+drf = ["adrf", "django (>=4)", "django-cors-headers (==3.11.0)", "django-stubs (==1.9.0)", "djangorestframework", "gunicorn (==20.1.0)", "python-dotenv (==0.19.2)", "tzdata (==2021.5)", "uvicorn (==0.18.2)"]
 fastapi = ["Fastapi", "python-dotenv (==0.19.2)", "respx (==0.19.2)", "uvicorn (==0.18.2)"]
 flask = ["Flask", "flask-cors", "python-dotenv (==0.19.2)"]
 
@@ -3549,4 +3560,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0"
-content-hash = "8af5a2fcfe8b30f6c8b36f6ef1e10e22ddca741515d817af923f51aca6affa83"
+content-hash = "8af5a2fcfe8b30f6c8b36f6ef1e10e22ddca741515d817af923f51aca6affa83"
\ No newline at end of file
diff --git a/agenta-backend/pyproject.toml b/agenta-backend/pyproject.toml
index 53a825406..89ec7371d 100644
--- a/agenta-backend/pyproject.toml
+++ b/agenta-backend/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta_backend"
-version = "0.24.4"
+version = "0.25.2"
 description = ""
 authors = ["Mahmoud Mabrouk <mahmoudmabrouk.mail@gmail.com>"]
 readme = "README.md"
@@ -18,7 +18,6 @@ python-multipart = "^0.0.7"
 backoff = "^2.2.1"
 redis = "^4.6.0"
 aiodocker = "^0.21.0"
-supertokens-python = "^0.15.1"
 sendgrid = "^6.10.0"
 restrictedpython = { version = "^6.2", python = ">=3.9,<3.12" }
 pytest-mock = "^3.11.1"
@@ -42,6 +41,7 @@ tqdm = "^4.66.4"
 alembic = "^1.13.2"
 numpy = "1.26.3"
 autoevals = "^0.0.83"
+supertokens-python = "^0.24.2"
 
 protobuf = "^4.25.4"
 
diff --git a/agenta-cli/agenta/sdk/decorators/routing.py b/agenta-cli/agenta/sdk/decorators/routing.py
index 608562cc1..bfb1f403b 100644
--- a/agenta-cli/agenta/sdk/decorators/routing.py
+++ b/agenta-cli/agenta/sdk/decorators/routing.py
@@ -392,7 +392,6 @@ async def execute_function(
                 if inspect.iscoroutinefunction(func)
                 else func(*args, **func_params["params"])
             )
-
             data = self.patch_result(result)
         except Exception as e:
             self.handle_exception(e)
diff --git a/agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts b/agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts
index a46112b76..6e85f00a9 100644
--- a/agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts
+++ b/agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts
@@ -43,9 +43,9 @@ describe("A/B Testing Evaluation workflow", () => {
 
     context("When executing the evaluation", () => {
         it("Should successfully execute the evaluation process", () => {
-            cy.visit(`/apps/${app_id}/annotations/human_a_b_testing`)
-            cy.url().should("include", "/annotations/human_a_b_testing")
-            cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
+            cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=human_ab_testing`)
+            cy.url().should("include", "/evaluations?selectedEvaluation=human_ab_testing")
+            cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')
 
             cy.get(".ant-modal-content").should("exist")
             cy.get('[data-cy="variants-dropdown-0"]').trigger("mouseover")
diff --git a/agenta-web/cypress/e2e/app-navigation.cy.ts b/agenta-web/cypress/e2e/app-navigation.cy.ts
index a5d312750..b4ebb9f58 100644
--- a/agenta-web/cypress/e2e/app-navigation.cy.ts
+++ b/agenta-web/cypress/e2e/app-navigation.cy.ts
@@ -28,28 +28,19 @@ describe("App Navigation without errors", () => {
         cy.get('[data-cy="app-testset-list"]').should("exist")
     })
 
-    it("should navigate successfully to Automatic Evaluation results evaluators page", () => {
-        cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
-        cy.clickLinkAndWait('[data-cy="app-evaluators-link"]')
-        cy.url().should("include", "/evaluations/new-evaluator")
-    })
+    it("should navigate successfully to Evaluations page", () => {
+        cy.clickLinkAndWait('[data-cy="app-evaluations-link"]')
+        cy.url().should("include", "/evaluations")
+        cy.contains(/evaluations/i)
 
-    it("should navigate successfully to Automatic Evaluation results page", () => {
-        cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
-        cy.clickLinkAndWait('[data-cy="app-evaluations-results-link"]')
-        cy.url().should("include", "/evaluations/results")
-    })
+        cy.get(".ant-tabs-tab").eq(1).click()
+        cy.url().should("include", "/evaluations?selectedEvaluation=human_annotation")
 
-    it("should navigate successfully to A/B Test page", () => {
-        cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
-        cy.clickLinkAndWait('[data-cy="app-human-ab-testing-link"]')
-        cy.location("pathname").should("include", "/annotations/human_a_b_testing")
-    })
+        cy.get(".ant-tabs-tab").eq(2).click()
+        cy.url().should("include", "/evaluations?selectedEvaluation=human_ab_testing")
 
-    it("should navigate successfully to Single Model Test page", () => {
-        cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
-        cy.clickLinkAndWait('[data-cy="app-single-model-test-link"]')
-        cy.location("pathname").should("include", "/annotations/single_model_test")
+        cy.get(".ant-tabs-tab").eq(0).click()
+        cy.url().should("include", "/evaluations?selectedEvaluation=auto_evaluation")
     })
 
     if (isDemo()) {
diff --git a/agenta-web/cypress/e2e/eval.comparison.cy.ts b/agenta-web/cypress/e2e/eval.comparison.cy.ts
index cf97725ac..a8203ae67 100644
--- a/agenta-web/cypress/e2e/eval.comparison.cy.ts
+++ b/agenta-web/cypress/e2e/eval.comparison.cy.ts
@@ -39,8 +39,8 @@ describe("Evaluation Comparison Test", function () {
 
     context("Executing Evaluation Comparison Workflow", () => {
         beforeEach(() => {
-            cy.visit(`/apps/${app_id}/evaluations/results`)
-            cy.location("pathname").should("include", "/evaluations/results")
+            cy.visit(`/apps/${app_id}/evaluations`)
+            cy.location("pathname").should("include", "/evaluations")
         })
 
         it("Should create 2 new Evaluations", () => {
@@ -48,11 +48,7 @@ describe("Evaluation Comparison Test", function () {
                 url: `${Cypress.env().baseApiURL}/evaluations/?app_id=${app_id}`,
                 method: "GET",
             }).then((resp) => {
-                if (resp.body.length) {
-                    cy.get('[data-cy="new-evaluation-button"]').click()
-                } else {
-                    cy.get('[data-cy="new-evaluation-button__no_variants"]').click()
-                }
+                cy.get('[data-cy="new-evaluation-button"]').click()
             })
             cy.get(".ant-modal-content").should("exist")
 
@@ -73,19 +69,19 @@ describe("Evaluation Comparison Test", function () {
         })
 
         it("Should verify that there are completed evaluations in the list", () => {
-            cy.get('.ag-row[row-index="0"]').should("exist")
-            cy.get('.ag-row[row-index="1"]').should("exist")
-            cy.get('.ag-cell[col-id="status"]', {timeout: 60000})
+            cy.get(".ant-table-row").eq(0).should("exist")
+            cy.get(".ant-table-row").eq(1).should("exist")
+            cy.get('[data-cy="evaluation-status-cell"]', {timeout: 60000})
                 .eq(0)
                 .should("contain.text", "Completed")
-            cy.get('.ag-cell[col-id="status"]', {timeout: 60000})
+            cy.get('[data-cy="evaluation-status-cell"]', {timeout: 60000})
                 .eq(1)
                 .should("contain.text", "Completed")
         })
 
         it("Should select 2 evaluations, click on the compare button, and successfully navigate to the comparison page", () => {
-            cy.get("div.ag-selection-checkbox input").eq(0).check()
-            cy.get("div.ag-selection-checkbox input").eq(1).check()
+            cy.get(".ant-checkbox-input").eq(0).check()
+
             cy.get('[data-cy="evaluation-results-compare-button"]').should("not.be.disabled")
             cy.get('[data-cy="evaluation-results-compare-button"]').click()
             cy.location("pathname").should("include", "/evaluations/results/compare")
diff --git a/agenta-web/cypress/e2e/eval.evaluations.cy.ts b/agenta-web/cypress/e2e/eval.evaluations.cy.ts
index 79fb6662e..3265b716c 100644
--- a/agenta-web/cypress/e2e/eval.evaluations.cy.ts
+++ b/agenta-web/cypress/e2e/eval.evaluations.cy.ts
@@ -12,8 +12,8 @@ describe("Evaluations CRUD Operations Test", function () {
 
     context("Executing Evaluations CRUD operations", () => {
         beforeEach(() => {
-            cy.visit(`/apps/${app_id}/evaluations/results`)
-            cy.location("pathname").should("include", "/evaluations/results")
+            cy.visit(`/apps/${app_id}/evaluations`)
+            cy.location("pathname").should("include", "/evaluations")
         })
 
         it("Should successfully create an Evaluation", () => {
@@ -29,15 +29,17 @@ describe("Evaluations CRUD Operations Test", function () {
         })
 
         it("Should verify the successful creation and completion of the evaluation", () => {
-            cy.get('.ag-row[row-index="0"]').should("exist")
-            cy.get('.ag-cell[col-id="status"]').should("contain.text", "Completed")
+            cy.get(".ant-table-row").eq(0).should("exist")
+            cy.get('[data-cy="evaluation-status-cell"]').should("contain.text", "Completed")
         })
 
         it("Should select evaluation and successfully delete it", () => {
-            cy.get(".ag-root-wrapper").should("exist")
-            cy.get("div.ag-selection-checkbox input").eq(0).check()
-            cy.get(":nth-child(1) > .ant-btn > .ant-btn-icon > .anticon > svg").click()
-            cy.get(".ant-modal-confirm-btns > :nth-child(2) > span").click()
+            cy.get(".ant-checkbox-wrapper").should("exist")
+            cy.get(".ant-checkbox-input").eq(0).check()
+            cy.get('[data-cy="delete-evaluation-button"]').click()
+
+            cy.get(".ant-modal-content").should("exist")
+            cy.get(".ant-modal-footer > .ant-btn-primary").click()
         })
     })
 
@@ -56,35 +58,32 @@ describe("Evaluations CRUD Operations Test", function () {
         })
 
         it("Should successfully create an Evaluator", () => {
-            cy.visit(`/apps/${app_id}/evaluations/new-evaluator`)
-            cy.location("pathname").should("include", "/evaluations/new-evaluator")
-            cy.get('[data-cy="evaluator-card"]').should("exist")
-            cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
-            cy.get('[data-cy="new-evaluator-modal"]').should("exist")
-            cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
-            cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
-            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName, {
-                force: true,
-            })
+            cy.visit(`/apps/${app_id}/evaluations?configureEvaluatorModal=open`)
+            cy.url().should("include", "/evaluations?configureEvaluatorModal=open")
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="create-new-evaluator-button"]').click()
+            cy.get('[data-cy="new-evaluator-list"]').eq(2).click()
+            cy.contains(/configure new evaluator/i)
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
+
             cy.get('[data-cy="new-evaluator-advance-settings"]').click()
-            cy.get('[data-cy="new-evaluator-column-name"]').clear()
-            cy.get('[data-cy="new-evaluator-column-name"]').type("answer")
+            cy.get('[data-cy="new-evaluator-advance-settings-input"]').clear()
+            cy.get('[data-cy="new-evaluator-advance-settings-input"]').type("answer")
             cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
-            cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
-            cy.wait(1000)
+            cy.get('[data-cy="evaluator-list"]').should("have.length.gt", 2)
         })
 
         it("Should successfully create an Evaluation", () => {
-            cy.visit(`/apps/${app_id}/evaluations/results`)
-            cy.location("pathname").should("include", "/evaluations/results")
+            cy.visit(`/apps/${app_id}/evaluations`)
+            cy.location("pathname").should("include", "/evaluations")
             cy.createNewEvaluation(newEvalName)
         })
 
         it("Should verify the successful creation and completion of the evaluation", () => {
-            cy.visit(`/apps/${app_id}/evaluations/results`)
-            cy.location("pathname").should("include", "/evaluations/results")
-            cy.get('.ag-row[row-index="0"]').should("exist")
-            cy.get('.ag-cell[col-id="status"]').should("contain.text", "Completed")
+            cy.visit(`/apps/${app_id}/evaluations`)
+            cy.location("pathname").should("include", "/evaluations")
+            cy.get(".ant-table-row").eq(0).should("exist")
+            cy.get('[data-cy="evaluation-status-cell"]').should("contain.text", "Completed")
         })
     })
 
diff --git a/agenta-web/cypress/e2e/eval.evaluators.cy.ts b/agenta-web/cypress/e2e/eval.evaluators.cy.ts
index 0708d157d..426ea6f11 100644
--- a/agenta-web/cypress/e2e/eval.evaluators.cy.ts
+++ b/agenta-web/cypress/e2e/eval.evaluators.cy.ts
@@ -2,6 +2,7 @@ import {randString} from "../../src/lib/helpers/utils"
 
 describe("Evaluators CRUD Operations Test", function () {
     let newEvalName = randString(5)
+    let editedEvalName = randString(5)
     let app_id
     before(() => {
         cy.createVariant()
@@ -12,30 +13,38 @@ describe("Evaluators CRUD Operations Test", function () {
 
     context("Executing Evaluators CRUD operations", () => {
         beforeEach(() => {
-            cy.visit(`/apps/${app_id}/evaluations/new-evaluator`)
-            cy.location("pathname").should("include", "/evaluations/new-evaluator")
+            cy.visit(`/apps/${app_id}/evaluations?configureEvaluatorModal=open`)
+            cy.url().should("include", "/evaluations?configureEvaluatorModal=open")
         })
 
-        it("Should successfully create an Evaluator", () => {
-            cy.get('[data-cy="evaluator-card"]').should("exist")
-            cy.get(".ant-space > :nth-child(2) > .ant-btn").click()
-            cy.get('[data-cy="new-evaluator-modal"]').should("exist")
-            cy.get('[data-cy^="select-new-evaluator"]').eq(0).click()
-            cy.get('[data-cy="configure-new-evaluator-modal"]').should("exist")
+        it("Should successfully create an evaluator", () => {
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="create-new-evaluator-button"]').click()
+            cy.get('[data-cy="new-evaluator-list"]').eq(0).click()
+            cy.contains(/configure new evaluator/i)
             cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(newEvalName)
             cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
-            cy.get('[data-cy="evaluator-card"]').should("have.length", 2)
+            cy.get('[data-cy="evaluator-list"]').should("have.length.gt", 2)
         })
 
-        it("Should click on the edit button and successfully edit an evaluator", () => {
-            cy.get('[data-cy^="evaluator-card-edit-button"]').eq(0).click()
-            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type("edit")
+        it("Should successfully edit an evaluator", () => {
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="evaluator-menu-button"]').eq(0).click()
+            cy.get(".ant-dropdown-menu").should("be.visible")
+            cy.get(".ant-dropdown-menu-item").eq(0).click()
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').clear()
+            cy.get('[data-cy="configure-new-evaluator-modal-input"]').type(editedEvalName)
             cy.get('[data-cy="configure-new-evaluator-modal-save-btn"]').click()
         })
 
-        it("Should click on the delete button and successfully delete an evaluator", () => {
-            cy.get('[data-cy^="evaluator-card-delete-button"]').eq(0).click()
-            cy.get(".ant-modal-confirm-btns > :nth-child(2) > span").click()
+        it("Should successfully delete an evaluator", () => {
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="evaluator-menu-button"]').eq(0).click()
+            cy.get(".ant-dropdown-menu").should("be.visible")
+            cy.get(".ant-dropdown-menu-item")
+                .contains(/delete/i)
+                .click()
+            cy.get(".ant-modal-footer > .ant-btn-primary").click()
         })
     })
 
diff --git a/agenta-web/cypress/e2e/eval.scenarios.cy.ts b/agenta-web/cypress/e2e/eval.scenarios.cy.ts
index 51d9bf371..9478c51f3 100644
--- a/agenta-web/cypress/e2e/eval.scenarios.cy.ts
+++ b/agenta-web/cypress/e2e/eval.scenarios.cy.ts
@@ -9,8 +9,8 @@ describe("Evaluation Scenarios Test", function () {
 
     context("Executing Evaluation Scenarios Workflow", () => {
         beforeEach(() => {
-            cy.visit(`/apps/${app_id}/evaluations/results`)
-            cy.location("pathname").should("include", "/evaluations/results")
+            cy.visit(`/apps/${app_id}/evaluations`)
+            cy.location("pathname").should("include", "/evaluations")
         })
 
         it("Should successfully create an Evaluation", () => {
@@ -18,15 +18,14 @@ describe("Evaluation Scenarios Test", function () {
         })
 
         it("Should verify that evalaution was created and completed successfully", () => {
-            cy.get('.ag-row[row-index="0"]').should("exist")
-            cy.get('.ag-cell[col-id="status"]').should("contain.text", "Completed")
+            cy.get(".ant-table-row").eq(0).should("exist")
+            cy.get('[data-cy="evaluation-status-cell"]').should("contain.text", "Completed")
         })
 
         it("Should double click on the Evaluation and successfully navigate to the evalaution results page", () => {
-            cy.get(".ag-root-wrapper").should("exist")
-            cy.get('.ag-row-first > [col-id="aggregated_results"]').click()
+            cy.get(".ant-table-row").eq(0).should("exist")
+            cy.get(".ant-table-row").click({force: true})
             cy.wait(1000)
-            cy.get(".ag-cell-focus").dblclick()
             cy.contains(/Evaluation Results/i)
             cy.get('[data-cy="evalaution-scenarios-table"]').should("exist")
         })
diff --git a/agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts b/agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts
index f6a9a6070..68b3d048b 100644
--- a/agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts
+++ b/agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts
@@ -16,9 +16,9 @@ describe("Single Model Test workflow", () => {
 
     context("When executing the evaluation", () => {
         it("Should successfully execute the evaluation process", () => {
-            cy.visit(`/apps/${app_id}/annotations/single_model_test`)
-            cy.url().should("include", "/annotations/single_model_test")
-            cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
+            cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=human_annotation`)
+            cy.url().should("include", "/evaluations?selectedEvaluation=human_annotation")
+            cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')
 
             cy.get(".ant-modal-content").should("exist")
 
@@ -49,10 +49,10 @@ describe("Single Model Test workflow", () => {
         })
 
         it("Should modify the evaluation vote scores", () => {
-            cy.visit(`/apps/${app_id}/annotations/single_model_test`)
-            cy.url().should("include", "/annotations/single_model_test")
+            cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=human_annotation`)
+            cy.url().should("include", "/evaluations?selectedEvaluation=human_annotation")
             cy.wait(1000)
-            cy.clickLinkAndWait('[data-cy="single-model-view-evaluation-button"]')
+            cy.clickLinkAndWait(".ant-table-row").eq(0)
             cy.get('[data-cy="evalInstructionsShown-ok-btn"]').click()
             cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').clear()
             cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').type("85")
@@ -67,7 +67,6 @@ describe("Single Model Test workflow", () => {
             cy.visit(`/apps/${app_id}/testsets`)
             cy.url().should("include", "/testsets")
             cy.get('[data-cy="app-testset-list"]').as("table")
-            cy.get("@table").get(".ant-table-pagination li a").last().click()
             cy.get("@table").contains(saved_testset_name).as("tempTestSet").should("be.visible")
         })
     })
diff --git a/agenta-web/cypress/e2e/testset.cy.ts b/agenta-web/cypress/e2e/testset.cy.ts
index 0fda57278..2c725482f 100644
--- a/agenta-web/cypress/e2e/testset.cy.ts
+++ b/agenta-web/cypress/e2e/testset.cy.ts
@@ -18,21 +18,19 @@ describe("Testsets crud and UI functionality", () => {
     context("Testing creation process of testset", () => {
         beforeEach(() => {
             // navigate to the new testset page
-            cy.visit(`/apps/${app_id}/testsets/new/manual`)
-        })
-
-        it("Should navigates successfully to the new testset page", () => {
-            cy.url().should("include", "/testsets/new/manual")
-        })
-
-        it("Should not allow creation of a testset without a name", () => {
-            cy.get('[data-cy="testset-save-button"]').click()
-            cy.get('[data-cy="testset-name-reqd-error"]').should("be.visible")
+            cy.visit(`/apps/${app_id}/testsets`)
         })
 
         it("Should successfully creates the testset and navigates to the list", () => {
+            cy.url().should("include", "/testsets")
+            cy.get('[data-cy="create-testset-modal-button"]').click()
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="create-testset-from-scratch"]').click()
+
             const testsetName = randString(8)
             cy.get('[data-cy="testset-name-input"]').type(testsetName)
+            cy.clickLinkAndWait('[data-cy="create-new-testset-button"]')
+
             cy.get(".ag-row").should("have.length", 3)
             countries.forEach((country, index) => {
                 cy.get(`.ag-center-cols-container .ag-row[row-index="${index}"]`).within(() => {
@@ -51,7 +49,6 @@ describe("Testsets crud and UI functionality", () => {
 
             // validate that the new testset is in the list
             cy.get('[data-cy="app-testset-list"]').as("table")
-            cy.get("@table").get(".ant-table-pagination li a").last().click()
             cy.get("@table").contains(testsetName).as("tempTestSet").should("be.visible")
         })
     })
@@ -64,19 +61,21 @@ describe("Testsets crud and UI functionality", () => {
 
         it("Should successfully upload a testset", () => {
             cy.url().should("include", "/testsets")
-            cy.clickLinkAndWait('[data-cy="testset-new-upload-link"]')
-            cy.url().should("include", "/testsets/new/upload")
+
+            cy.get('[data-cy="create-testset-modal-button"]').click()
+            cy.get(".ant-modal-content").should("exist")
+            cy.get('[data-cy="upload-testset"]').click()
+
             cy.get('[data-cy="upload-testset-file-name"]').type(testset_name)
             cy.get('[type="file"]').selectFile("cypress/data/countries-genders.csv", {force: true})
             cy.wait(1000)
             cy.contains("countries-genders.csv").should("be.visible")
-            cy.get('[data-cy="testset-upload-button"]').click()
+            cy.clickLinkAndWait('[data-cy="testset-upload-button"]')
         })
 
         it("Should check the uploaded testset is present", () => {
             cy.url().should("include", "/testsets")
             cy.get('[data-cy="app-testset-list"]').as("table")
-            cy.get("@table").get(".ant-table-pagination li a").last().click()
             cy.get("@table").contains(testset_name).as("tempTestSet").should("be.visible")
         })
     })
diff --git a/agenta-web/cypress/support/commands/evaluations.ts b/agenta-web/cypress/support/commands/evaluations.ts
index a690763e4..298e0a3c9 100644
--- a/agenta-web/cypress/support/commands/evaluations.ts
+++ b/agenta-web/cypress/support/commands/evaluations.ts
@@ -58,11 +58,13 @@ Cypress.Commands.add("createVariantsAndTestsets", () => {
     cy.createVariant()
 
     cy.clickLinkAndWait('[data-cy="app-testsets-link"]')
-    cy.get('[data-cy="app-testsets-link"]').trigger("mouseout")
-    cy.clickLinkAndWait('[data-cy="testset-new-manual-link"]')
-    const testsetName = randString(5)
+    cy.get('[data-cy="create-testset-modal-button"]').click()
+    cy.get(".ant-modal-content").should("exist")
+    cy.get('[data-cy="create-testset-from-scratch"]').click()
 
+    const testsetName = randString(5)
     cy.get('[data-cy="testset-name-input"]').type(testsetName)
+    cy.clickLinkAndWait('[data-cy="create-new-testset-button"]')
     cy.wrap(testsetName).as("testsetName")
 
     cy.get(".ag-row").should("have.length", 3)
@@ -105,11 +107,7 @@ Cypress.Commands.add("createNewEvaluation", (evaluatorName = "Exact Match") => {
         url: `${Cypress.env().baseApiURL}/evaluations/?app_id=${app_id}`,
         method: "GET",
     }).then((resp) => {
-        if (resp.body.length) {
-            cy.get('[data-cy="new-evaluation-button"]').click()
-        } else {
-            cy.get('[data-cy="new-evaluation-button__no_variants"]').click()
-        }
+        cy.get('[data-cy="new-evaluation-button"]').click()
     })
     cy.get(".ant-modal-content").should("exist")
 
diff --git a/agenta-web/package-lock.json b/agenta-web/package-lock.json
index 6a6a1e3b1..06ade596c 100644
--- a/agenta-web/package-lock.json
+++ b/agenta-web/package-lock.json
@@ -1,12 +1,12 @@
 {
     "name": "agenta",
-    "version": "0.24.4",
+    "version": "0.25.2",
     "lockfileVersion": 3,
     "requires": true,
     "packages": {
         "": {
             "name": "agenta",
-            "version": "0.24.4",
+            "version": "0.25.2",
             "dependencies": {
                 "@ant-design/colors": "^7.0.0",
                 "@ant-design/icons": "^5.3.7",
@@ -39,7 +39,7 @@
                 "@types/uuid": "^9.0.7",
                 "ag-grid-community": "^31.2.0",
                 "ag-grid-react": "^31.2.0",
-                "antd": "^5.4.7",
+                "antd": "^5.20.6",
                 "autoprefixer": "10.4.14",
                 "axios": "^1.4.0",
                 "classnames": "^2.3.2",
@@ -69,8 +69,8 @@
                 "react-resizable": "^3.0.5",
                 "react-syntax-highlighter": "^15.5.0",
                 "react-youtube": "^10.1.0",
-                "supertokens-auth-react": "^0.34.0",
-                "supertokens-node": "^15.0.4",
+                "supertokens-auth-react": "^0.47.0",
+                "supertokens-node": "^20.1.2",
                 "swr": "^2.1.5",
                 "tailwindcss": "^3.4.4",
                 "typescript": "5.0.4",
@@ -81,7 +81,7 @@
                 "@swc/cli": "^0.3.12",
                 "@swc/core": "^1.4.15",
                 "@types/node": "^20.8.10",
-                "cypress": "^13.4.0",
+                "cypress": "^13.15.0",
                 "node-mocks-http": "^1.12.2",
                 "prettier": "^3.2.5"
             },
@@ -121,17 +121,17 @@
             }
         },
         "node_modules/@ant-design/colors": {
-            "version": "7.0.2",
-            "resolved": "https://registry.npmjs.org/@ant-design/colors/-/colors-7.0.2.tgz",
-            "integrity": "sha512-7KJkhTiPiLHSu+LmMJnehfJ6242OCxSlR3xHVBecYxnMW8MS/878NXct1GqYARyL59fyeFdKRxXTfvR9SnDgJg==",
+            "version": "7.1.0",
+            "resolved": "https://registry.npmjs.org/@ant-design/colors/-/colors-7.1.0.tgz",
+            "integrity": "sha512-MMoDGWn1y9LdQJQSHiCC20x3uZ3CwQnv9QMz6pCmJOrqdgM9YxsoVVY0wtrdXbmfSgnV0KNk6zi09NAhMR2jvg==",
             "dependencies": {
                 "@ctrl/tinycolor": "^3.6.1"
             }
         },
         "node_modules/@ant-design/cssinjs": {
-            "version": "1.18.4",
-            "resolved": "https://registry.npmjs.org/@ant-design/cssinjs/-/cssinjs-1.18.4.tgz",
-            "integrity": "sha512-IrUAOj5TYuMG556C9gdbFuOrigyhzhU5ZYpWb3gYTxAwymVqRbvLzFCZg6OsjLBR6GhzcxYF3AhxKmjB+rA2xA==",
+            "version": "1.21.1",
+            "resolved": "https://registry.npmjs.org/@ant-design/cssinjs/-/cssinjs-1.21.1.tgz",
+            "integrity": "sha512-tyWnlK+XH7Bumd0byfbCiZNK43HEubMoCcu9VxwsAwiHdHTgWa+tMN0/yvxa+e8EzuFP1WdUNNPclRpVtD33lg==",
             "dependencies": {
                 "@babel/runtime": "^7.11.1",
                 "@emotion/hash": "^0.8.0",
@@ -139,22 +139,46 @@
                 "classnames": "^2.3.1",
                 "csstype": "^3.1.3",
                 "rc-util": "^5.35.0",
-                "stylis": "^4.0.13"
+                "stylis": "^4.3.3"
             },
             "peerDependencies": {
                 "react": ">=16.0.0",
                 "react-dom": ">=16.0.0"
             }
         },
+        "node_modules/@ant-design/cssinjs-utils": {
+            "version": "1.1.0",
+            "resolved": "https://registry.npmjs.org/@ant-design/cssinjs-utils/-/cssinjs-utils-1.1.0.tgz",
+            "integrity": "sha512-E9nOWObXx7Dy7hdyuYlOFaer/LtPO7oyZVxZphh0CYEslr5EmhJPM3WI0Q2RBHRtYg6dSNqeSK73kvZjPN3IMQ==",
+            "dependencies": {
+                "@ant-design/cssinjs": "^1.21.0",
+                "@babel/runtime": "^7.23.2",
+                "rc-util": "^5.38.0"
+            },
+            "peerDependencies": {
+                "react": ">=16.9.0",
+                "react-dom": ">=16.9.0"
+            }
+        },
+        "node_modules/@ant-design/fast-color": {
+            "version": "2.0.6",
+            "resolved": "https://registry.npmjs.org/@ant-design/fast-color/-/fast-color-2.0.6.tgz",
+            "integrity": "sha512-y2217gk4NqL35giHl72o6Zzqji9O7vHh9YmhUVkPtAOpoTCH4uWxo/pr4VE8t0+ChEPs0qo4eJRC5Q1eXWo3vA==",
+            "dependencies": {
+                "@babel/runtime": "^7.24.7"
+            },
+            "engines": {
+                "node": ">=8.x"
+            }
+        },
         "node_modules/@ant-design/icons": {
-            "version": "5.3.7",
-            "resolved": "https://registry.npmjs.org/@ant-design/icons/-/icons-5.3.7.tgz",
-            "integrity": "sha512-bCPXTAg66f5bdccM4TT21SQBDO1Ek2gho9h3nO9DAKXJP4sq+5VBjrQMSxMVXSB3HyEz+cUbHQ5+6ogxCOpaew==",
-            "license": "MIT",
+            "version": "5.4.0",
+            "resolved": "https://registry.npmjs.org/@ant-design/icons/-/icons-5.4.0.tgz",
+            "integrity": "sha512-QZbWC5xQYexCI5q4/fehSEkchJr5UGtvAJweT743qKUQQGs9IH2DehNLP49DJ3Ii9m9CijD2HN6fNy3WKhIFdA==",
             "dependencies": {
                 "@ant-design/colors": "^7.0.0",
                 "@ant-design/icons-svg": "^4.4.0",
-                "@babel/runtime": "^7.11.2",
+                "@babel/runtime": "^7.24.8",
                 "classnames": "^2.2.6",
                 "rc-util": "^5.31.1"
             },
@@ -172,9 +196,9 @@
             "integrity": "sha512-vHbT+zJEVzllwP+CM+ul7reTEfBR0vgxFe7+lREAsAA7YGsYpboiq2sQNeQeRvh09GfQgs/GyFEvZpJ9cLXpXA=="
         },
         "node_modules/@ant-design/react-slick": {
-            "version": "1.0.2",
-            "resolved": "https://registry.npmjs.org/@ant-design/react-slick/-/react-slick-1.0.2.tgz",
-            "integrity": "sha512-Wj8onxL/T8KQLFFiCA4t8eIRGpRR+UPgOdac2sYzonv+i0n3kXHmvHLLiOYL655DQx2Umii9Y9nNgL7ssu5haQ==",
+            "version": "1.1.2",
+            "resolved": "https://registry.npmjs.org/@ant-design/react-slick/-/react-slick-1.1.2.tgz",
+            "integrity": "sha512-EzlvzE6xQUBrZuuhSAFTdsr4P2bBBHGZwKFemEfq8gIGyIQCxalYfZW/T2ORbtQx5rU69o+WycP3exY/7T1hGA==",
             "dependencies": {
                 "@babel/runtime": "^7.10.4",
                 "classnames": "^2.2.5",
@@ -522,9 +546,9 @@
             }
         },
         "node_modules/@babel/runtime": {
-            "version": "7.24.4",
-            "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.24.4.tgz",
-            "integrity": "sha512-dkxf7+hn8mFBwKjs9bvBlArzLVxVbS8usaPUDd5p2a9JCL9tB8OaOVN1isD4+Xyk4ns89/xeOmbQvgdK7IIVdA==",
+            "version": "7.25.6",
+            "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.25.6.tgz",
+            "integrity": "sha512-VBj9MYyDb9tuLq7yzqjgzt6Q+IBQLrGZfdjOekyEirZPHxXWoTSGUTMrpsfi58Up73d13NfYLv8HT9vmznjzhQ==",
             "dependencies": {
                 "regenerator-runtime": "^0.14.0"
             },
@@ -605,9 +629,9 @@
             }
         },
         "node_modules/@cypress/request": {
-            "version": "3.0.1",
-            "resolved": "https://registry.npmjs.org/@cypress/request/-/request-3.0.1.tgz",
-            "integrity": "sha512-TWivJlJi8ZDx2wGOw1dbLuHJKUYX7bWySw377nlnGOW3hP9/MUKIsEdXT/YngWxVdgNCHRBmFlBipE+5/2ZZlQ==",
+            "version": "3.0.5",
+            "resolved": "https://registry.npmjs.org/@cypress/request/-/request-3.0.5.tgz",
+            "integrity": "sha512-v+XHd9XmWbufxF1/bTaVm2yhbxY+TB4YtWRqF2zaXBlDNMkls34KiATz0AVDLavL3iB6bQk9/7n3oY1EoLSWGA==",
             "dev": true,
             "dependencies": {
                 "aws-sign2": "~0.7.0",
@@ -616,14 +640,14 @@
                 "combined-stream": "~1.0.6",
                 "extend": "~3.0.2",
                 "forever-agent": "~0.6.1",
-                "form-data": "~2.3.2",
-                "http-signature": "~1.3.6",
+                "form-data": "~4.0.0",
+                "http-signature": "~1.4.0",
                 "is-typedarray": "~1.0.0",
                 "isstream": "~0.1.2",
                 "json-stringify-safe": "~5.0.1",
                 "mime-types": "~2.1.19",
                 "performance-now": "^2.1.0",
-                "qs": "6.10.4",
+                "qs": "6.13.0",
                 "safe-buffer": "^5.1.2",
                 "tough-cookie": "^4.1.3",
                 "tunnel-agent": "^0.6.0",
@@ -633,20 +657,6 @@
                 "node": ">= 6"
             }
         },
-        "node_modules/@cypress/request/node_modules/form-data": {
-            "version": "2.3.3",
-            "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz",
-            "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==",
-            "dev": true,
-            "dependencies": {
-                "asynckit": "^0.4.0",
-                "combined-stream": "^1.0.6",
-                "mime-types": "^2.1.12"
-            },
-            "engines": {
-                "node": ">= 0.12"
-            }
-        },
         "node_modules/@cypress/request/node_modules/uuid": {
             "version": "8.3.2",
             "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
@@ -1957,13 +1967,24 @@
                 "@opentelemetry/sdk-trace-base": "^1.22"
             }
         },
+        "node_modules/@rc-component/async-validator": {
+            "version": "5.0.4",
+            "resolved": "https://registry.npmjs.org/@rc-component/async-validator/-/async-validator-5.0.4.tgz",
+            "integrity": "sha512-qgGdcVIF604M9EqjNF0hbUTz42bz/RDtxWdWuU5EQe3hi7M8ob54B6B35rOsvX5eSvIHIzT9iH1R3n+hk3CGfg==",
+            "dependencies": {
+                "@babel/runtime": "^7.24.4"
+            },
+            "engines": {
+                "node": ">=14.x"
+            }
+        },
         "node_modules/@rc-component/color-picker": {
-            "version": "1.5.2",
-            "resolved": "https://registry.npmjs.org/@rc-component/color-picker/-/color-picker-1.5.2.tgz",
-            "integrity": "sha512-YJXujYzYFAEtlXJXy0yJUhwzUWPTcniBZto+wZ/vnACmFnUTNR7dH+NOeqSwMMsssh74e9H5Jfpr5LAH2PYqUw==",
+            "version": "2.0.1",
+            "resolved": "https://registry.npmjs.org/@rc-component/color-picker/-/color-picker-2.0.1.tgz",
+            "integrity": "sha512-WcZYwAThV/b2GISQ8F+7650r5ZZJ043E57aVBFkQ+kSY4C6wdofXgB0hBx+GPGpIU0Z81eETNoDUJMr7oy/P8Q==",
             "dependencies": {
+                "@ant-design/fast-color": "^2.0.6",
                 "@babel/runtime": "^7.23.6",
-                "@ctrl/tinycolor": "^3.6.1",
                 "classnames": "^2.2.6",
                 "rc-util": "^5.38.1"
             },
@@ -2030,14 +2051,31 @@
                 "react-dom": ">=16.9.0"
             }
         },
+        "node_modules/@rc-component/qrcode": {
+            "version": "1.0.0",
+            "resolved": "https://registry.npmjs.org/@rc-component/qrcode/-/qrcode-1.0.0.tgz",
+            "integrity": "sha512-L+rZ4HXP2sJ1gHMGHjsg9jlYBX/SLN2D6OxP9Zn3qgtpMWtO2vUfxVFwiogHpAIqs54FnALxraUy/BCO1yRIgg==",
+            "dependencies": {
+                "@babel/runtime": "^7.24.7",
+                "classnames": "^2.3.2",
+                "rc-util": "^5.38.0"
+            },
+            "engines": {
+                "node": ">=8.x"
+            },
+            "peerDependencies": {
+                "react": ">=16.9.0",
+                "react-dom": ">=16.9.0"
+            }
+        },
         "node_modules/@rc-component/tour": {
-            "version": "1.12.3",
-            "resolved": "https://registry.npmjs.org/@rc-component/tour/-/tour-1.12.3.tgz",
-            "integrity": "sha512-U4mf1FiUxGCwrX4ed8op77Y8VKur+8Y/61ylxtqGbcSoh1EBC7bWd/DkLu0ClTUrKZInqEi1FL7YgFtnT90vHA==",
+            "version": "1.15.1",
+            "resolved": "https://registry.npmjs.org/@rc-component/tour/-/tour-1.15.1.tgz",
+            "integrity": "sha512-Tr2t7J1DKZUpfJuDZWHxyxWpfmj8EZrqSgyMZ+BCdvKZ6r1UDsfU46M/iWAAFBy961Ssfom2kv5f3UcjIL2CmQ==",
             "dependencies": {
                 "@babel/runtime": "^7.18.0",
                 "@rc-component/portal": "^1.0.0-9",
-                "@rc-component/trigger": "^1.3.6",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "^2.3.2",
                 "rc-util": "^5.24.4"
             },
@@ -2050,9 +2088,9 @@
             }
         },
         "node_modules/@rc-component/trigger": {
-            "version": "1.18.3",
-            "resolved": "https://registry.npmjs.org/@rc-component/trigger/-/trigger-1.18.3.tgz",
-            "integrity": "sha512-Ksr25pXreYe1gX6ayZ1jLrOrl9OAUHUqnuhEx6MeHnNa1zVM5Y2Aj3Q35UrER0ns8D2cJYtmJtVli+i+4eKrvA==",
+            "version": "2.2.3",
+            "resolved": "https://registry.npmjs.org/@rc-component/trigger/-/trigger-2.2.3.tgz",
+            "integrity": "sha512-X1oFIpKoXAMXNDYCviOmTfuNuYxE4h5laBsyCqVAVMjNHxoF3/uiyA7XdegK1XbCvBbCZ6P6byWrEoDRpKL8+A==",
             "dependencies": {
                 "@babel/runtime": "^7.23.2",
                 "@rc-component/portal": "^1.1.0",
@@ -3753,57 +3791,59 @@
             }
         },
         "node_modules/antd": {
-            "version": "5.14.1",
-            "resolved": "https://registry.npmjs.org/antd/-/antd-5.14.1.tgz",
-            "integrity": "sha512-P0Bwt9NKSZqnEJ0QAyAb13ay34FjOKsz+KEp/ts+feYsynhUxF7/Ay6d1jS6ZcNpcs+JWTlLKO59YFZ3tX07wQ==",
-            "dependencies": {
-                "@ant-design/colors": "^7.0.2",
-                "@ant-design/cssinjs": "^1.18.4",
-                "@ant-design/icons": "^5.3.0",
-                "@ant-design/react-slick": "~1.0.2",
+            "version": "5.20.6",
+            "resolved": "https://registry.npmjs.org/antd/-/antd-5.20.6.tgz",
+            "integrity": "sha512-TZFmNenHlh26DelHCJbkB+x1OVulIKsN1f/CnAd2NxZLysXqRvSuLUeHcgccqAnxTy7B03GZ6i1tocGxPCNjgA==",
+            "dependencies": {
+                "@ant-design/colors": "^7.1.0",
+                "@ant-design/cssinjs": "^1.21.0",
+                "@ant-design/cssinjs-utils": "^1.0.3",
+                "@ant-design/icons": "^5.4.0",
+                "@ant-design/react-slick": "~1.1.2",
+                "@babel/runtime": "^7.24.8",
                 "@ctrl/tinycolor": "^3.6.1",
-                "@rc-component/color-picker": "~1.5.1",
+                "@rc-component/color-picker": "~2.0.1",
                 "@rc-component/mutate-observer": "^1.1.0",
-                "@rc-component/tour": "~1.12.3",
-                "@rc-component/trigger": "^1.18.3",
+                "@rc-component/qrcode": "~1.0.0",
+                "@rc-component/tour": "~1.15.1",
+                "@rc-component/trigger": "^2.2.2",
                 "classnames": "^2.5.1",
                 "copy-to-clipboard": "^3.3.3",
-                "dayjs": "^1.11.10",
-                "qrcode.react": "^3.1.0",
-                "rc-cascader": "~3.21.2",
-                "rc-checkbox": "~3.1.0",
-                "rc-collapse": "~3.7.2",
-                "rc-dialog": "~9.3.4",
-                "rc-drawer": "~7.0.0",
-                "rc-dropdown": "~4.1.0",
-                "rc-field-form": "~1.41.0",
-                "rc-image": "~7.5.1",
-                "rc-input": "~1.4.3",
-                "rc-input-number": "~9.0.0",
-                "rc-mentions": "~2.10.1",
-                "rc-menu": "~9.12.4",
-                "rc-motion": "^2.9.0",
-                "rc-notification": "~5.3.0",
-                "rc-pagination": "~4.0.4",
-                "rc-picker": "~4.1.1",
-                "rc-progress": "~3.5.1",
-                "rc-rate": "~2.12.0",
+                "dayjs": "^1.11.11",
+                "rc-cascader": "~3.28.1",
+                "rc-checkbox": "~3.3.0",
+                "rc-collapse": "~3.7.3",
+                "rc-dialog": "~9.5.2",
+                "rc-drawer": "~7.2.0",
+                "rc-dropdown": "~4.2.0",
+                "rc-field-form": "~2.4.0",
+                "rc-image": "~7.9.0",
+                "rc-input": "~1.6.3",
+                "rc-input-number": "~9.2.0",
+                "rc-mentions": "~2.15.0",
+                "rc-menu": "~9.14.1",
+                "rc-motion": "^2.9.2",
+                "rc-notification": "~5.6.0",
+                "rc-pagination": "~4.2.0",
+                "rc-picker": "~4.6.14",
+                "rc-progress": "~4.0.0",
+                "rc-rate": "~2.13.0",
                 "rc-resize-observer": "^1.4.0",
                 "rc-segmented": "~2.3.0",
-                "rc-select": "~14.11.0",
-                "rc-slider": "~10.5.0",
+                "rc-select": "~14.15.2",
+                "rc-slider": "~11.1.5",
                 "rc-steps": "~6.0.1",
                 "rc-switch": "~4.1.0",
-                "rc-table": "~7.39.0",
-                "rc-tabs": "~14.0.0",
-                "rc-textarea": "~1.6.3",
-                "rc-tooltip": "~6.1.3",
-                "rc-tree": "~5.8.5",
-                "rc-tree-select": "~5.17.0",
-                "rc-upload": "~4.5.2",
-                "rc-util": "^5.38.1",
+                "rc-table": "~7.45.7",
+                "rc-tabs": "~15.1.1",
+                "rc-textarea": "~1.8.1",
+                "rc-tooltip": "~6.2.0",
+                "rc-tree": "~5.9.0",
+                "rc-tree-select": "~5.23.0",
+                "rc-upload": "~4.7.0",
+                "rc-util": "^5.43.0",
                 "scroll-into-view-if-needed": "^3.1.0",
-                "throttle-debounce": "^5.0.0"
+                "throttle-debounce": "^5.0.2"
             },
             "funding": {
                 "type": "opencollective",
@@ -4048,16 +4088,11 @@
             }
         },
         "node_modules/async": {
-            "version": "3.2.5",
-            "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
-            "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==",
+            "version": "3.2.6",
+            "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
+            "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==",
             "dev": true
         },
-        "node_modules/async-validator": {
-            "version": "4.2.5",
-            "resolved": "https://registry.npmjs.org/async-validator/-/async-validator-4.2.5.tgz",
-            "integrity": "sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg=="
-        },
         "node_modules/asynciterator.prototype": {
             "version": "1.0.0",
             "resolved": "https://registry.npmjs.org/asynciterator.prototype/-/asynciterator.prototype-1.0.0.tgz",
@@ -4133,9 +4168,9 @@
             }
         },
         "node_modules/aws4": {
-            "version": "1.12.0",
-            "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.12.0.tgz",
-            "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg==",
+            "version": "1.13.2",
+            "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.13.2.tgz",
+            "integrity": "sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw==",
             "dev": true
         },
         "node_modules/axe-core": {
@@ -4210,7 +4245,6 @@
             "version": "1.5.1",
             "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
             "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-            "dev": true,
             "funding": [
                 {
                     "type": "github",
@@ -4615,13 +4649,18 @@
             }
         },
         "node_modules/call-bind": {
-            "version": "1.0.5",
-            "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.5.tgz",
-            "integrity": "sha512-C3nQxfFZxFRVoJoGKKI8y3MOEo129NQ+FgQ08iye+Mk4zNZZGdjfs06bVTr+DBSlA66Q2VEcMki/cUCP4SercQ==",
+            "version": "1.0.7",
+            "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
+            "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
             "dependencies": {
+                "es-define-property": "^1.0.0",
+                "es-errors": "^1.3.0",
                 "function-bind": "^1.1.2",
-                "get-intrinsic": "^1.2.1",
-                "set-function-length": "^1.1.1"
+                "get-intrinsic": "^1.2.4",
+                "set-function-length": "^1.2.1"
+            },
+            "engines": {
+                "node": ">= 0.4"
             },
             "funding": {
                 "url": "https://github.com/sponsors/ljharb"
@@ -4824,9 +4863,9 @@
             }
         },
         "node_modules/cli-table3": {
-            "version": "0.6.3",
-            "resolved": "https://registry.npmjs.org/cli-table3/-/cli-table3-0.6.3.tgz",
-            "integrity": "sha512-w5Jac5SykAeZJKntOxJCrm63Eg5/4dhMWIcuTbo9rpE+brgaSZo0RuNJZeOyMgsUdhDeojvgyQLmjI+K50ZGyg==",
+            "version": "0.6.5",
+            "resolved": "https://registry.npmjs.org/cli-table3/-/cli-table3-0.6.5.tgz",
+            "integrity": "sha512-+W/5efTR7y5HRD7gACw9yQjqMVvEMLBHmboM/kPWam+H+Hmyrgjh6YncVKK122YZkXrLudzTuAukUw9FnMf7IQ==",
             "dev": true,
             "dependencies": {
                 "string-width": "^4.2.0"
@@ -5119,21 +5158,20 @@
             "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="
         },
         "node_modules/cypress": {
-            "version": "13.6.1",
-            "resolved": "https://registry.npmjs.org/cypress/-/cypress-13.6.1.tgz",
-            "integrity": "sha512-k1Wl5PQcA/4UoTffYKKaxA0FJKwg8yenYNYRzLt11CUR0Kln+h7Udne6mdU1cUIdXBDTVZWtmiUjzqGs7/pEpw==",
+            "version": "13.15.0",
+            "resolved": "https://registry.npmjs.org/cypress/-/cypress-13.15.0.tgz",
+            "integrity": "sha512-53aO7PwOfi604qzOkCSzNlWquCynLlKE/rmmpSPcziRH6LNfaDUAklQT6WJIsD8ywxlIy+uVZsnTMCCQVd2kTw==",
             "dev": true,
             "hasInstallScript": true,
             "dependencies": {
-                "@cypress/request": "^3.0.0",
+                "@cypress/request": "^3.0.4",
                 "@cypress/xvfb": "^1.2.4",
-                "@types/node": "^18.17.5",
                 "@types/sinonjs__fake-timers": "8.1.1",
                 "@types/sizzle": "^2.3.2",
                 "arch": "^2.2.0",
                 "blob-util": "^2.0.2",
                 "bluebird": "^3.7.2",
-                "buffer": "^5.6.0",
+                "buffer": "^5.7.1",
                 "cachedir": "^2.3.0",
                 "chalk": "^4.1.0",
                 "check-more-types": "^2.24.0",
@@ -5151,7 +5189,7 @@
                 "figures": "^3.2.0",
                 "fs-extra": "^9.1.0",
                 "getos": "^3.2.1",
-                "is-ci": "^3.0.0",
+                "is-ci": "^3.0.1",
                 "is-installed-globally": "~0.4.0",
                 "lazy-ass": "^1.6.0",
                 "listr2": "^3.8.3",
@@ -5165,7 +5203,7 @@
                 "request-progress": "^3.0.0",
                 "semver": "^7.5.3",
                 "supports-color": "^8.1.1",
-                "tmp": "~0.2.1",
+                "tmp": "~0.2.3",
                 "untildify": "^4.0.0",
                 "yauzl": "^2.10.0"
             },
@@ -5176,15 +5214,6 @@
                 "node": "^16.0.0 || ^18.0.0 || >=20.0.0"
             }
         },
-        "node_modules/cypress/node_modules/@types/node": {
-            "version": "18.19.3",
-            "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.3.tgz",
-            "integrity": "sha512-k5fggr14DwAytoA/t8rPrIz++lXK7/DqckthCmoZOKNsEbJkId4Z//BqgApXBUGrGddrigYa1oqheo/7YmW4rg==",
-            "dev": true,
-            "dependencies": {
-                "undici-types": "~5.26.4"
-            }
-        },
         "node_modules/cypress/node_modules/chalk": {
             "version": "4.1.2",
             "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -5377,9 +5406,9 @@
             }
         },
         "node_modules/dayjs": {
-            "version": "1.11.10",
-            "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.10.tgz",
-            "integrity": "sha512-vjAczensTgRcqDERK0SR2XMwsF/tSvnvlv6VcF2GIhg6Sx4yOIt/irsr1RDJsKiIyBzJDpCoXiWWq28MqH2cnQ=="
+            "version": "1.11.13",
+            "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz",
+            "integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg=="
         },
         "node_modules/debug": {
             "version": "4.3.4",
@@ -5456,16 +5485,19 @@
             }
         },
         "node_modules/define-data-property": {
-            "version": "1.1.1",
-            "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.1.tgz",
-            "integrity": "sha512-E7uGkTzkk1d0ByLeSc6ZsFS79Axg+m1P/VsgYsxHgiuc3tFSj+MjMIwe90FC4lOAZzNBdY7kkO2P2wKdsQ1vgQ==",
+            "version": "1.1.4",
+            "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+            "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
             "dependencies": {
-                "get-intrinsic": "^1.2.1",
-                "gopd": "^1.0.1",
-                "has-property-descriptors": "^1.0.0"
+                "es-define-property": "^1.0.0",
+                "es-errors": "^1.3.0",
+                "gopd": "^1.0.1"
             },
             "engines": {
                 "node": ">= 0.4"
+            },
+            "funding": {
+                "url": "https://github.com/sponsors/ljharb"
             }
         },
         "node_modules/define-properties": {
@@ -5748,6 +5780,25 @@
                 "url": "https://github.com/sponsors/ljharb"
             }
         },
+        "node_modules/es-define-property": {
+            "version": "1.0.0",
+            "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
+            "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
+            "dependencies": {
+                "get-intrinsic": "^1.2.4"
+            },
+            "engines": {
+                "node": ">= 0.4"
+            }
+        },
+        "node_modules/es-errors": {
+            "version": "1.3.0",
+            "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+            "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+            "engines": {
+                "node": ">= 0.4"
+            }
+        },
         "node_modules/es-iterator-helpers": {
             "version": "1.0.15",
             "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.15.tgz",
@@ -6826,15 +6877,19 @@
             }
         },
         "node_modules/get-intrinsic": {
-            "version": "1.2.2",
-            "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.2.tgz",
-            "integrity": "sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==",
+            "version": "1.2.4",
+            "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
+            "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
             "dependencies": {
+                "es-errors": "^1.3.0",
                 "function-bind": "^1.1.2",
                 "has-proto": "^1.0.1",
                 "has-symbols": "^1.0.3",
                 "hasown": "^2.0.0"
             },
+            "engines": {
+                "node": ">= 0.4"
+            },
             "funding": {
                 "url": "https://github.com/sponsors/ljharb"
             }
@@ -7103,11 +7158,11 @@
             }
         },
         "node_modules/has-property-descriptors": {
-            "version": "1.0.1",
-            "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.1.tgz",
-            "integrity": "sha512-VsX8eaIewvas0xnvinAe9bw4WfIeODpGYikiWYLH+dma0Jw6KHYqWiWfhQlgOVK8D6PvjubK5Uc4P0iIhIcNVg==",
+            "version": "1.0.2",
+            "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+            "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
             "dependencies": {
-                "get-intrinsic": "^1.2.2"
+                "es-define-property": "^1.0.0"
             },
             "funding": {
                 "url": "https://github.com/sponsors/ljharb"
@@ -7278,14 +7333,14 @@
             "dev": true
         },
         "node_modules/http-signature": {
-            "version": "1.3.6",
-            "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz",
-            "integrity": "sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==",
+            "version": "1.4.0",
+            "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.4.0.tgz",
+            "integrity": "sha512-G5akfn7eKbpDN+8nPS/cb57YeA1jLTVxjpCj7tmm3QKPdyDy7T+qSC40e9ptydSWvkwjSXw1VbkpyEm39ukeAg==",
             "dev": true,
             "dependencies": {
                 "assert-plus": "^1.0.0",
                 "jsprim": "^2.0.2",
-                "sshpk": "^1.14.1"
+                "sshpk": "^1.18.0"
             },
             "engines": {
                 "node": ">=0.10"
@@ -7334,7 +7389,6 @@
             "version": "1.2.1",
             "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
             "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-            "dev": true,
             "funding": [
                 {
                     "type": "github",
@@ -7401,14 +7455,6 @@
                 "node": ">=8"
             }
         },
-        "node_modules/inflation": {
-            "version": "2.1.0",
-            "resolved": "https://registry.npmjs.org/inflation/-/inflation-2.1.0.tgz",
-            "integrity": "sha512-t54PPJHG1Pp7VQvxyVCJ9mBbjG3Hqryges9bXoOO6GExCPa+//i/d5GSuFtpx3ALLd7lgIAur6zrIlBQyJuMlQ==",
-            "engines": {
-                "node": ">= 0.8.0"
-            }
-        },
         "node_modules/inflight": {
             "version": "1.0.6",
             "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
@@ -8003,9 +8049,9 @@
             }
         },
         "node_modules/jose": {
-            "version": "4.15.5",
-            "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.5.tgz",
-            "integrity": "sha512-jc7BFxgKPKi94uOvEmzlSWFFe2+vASyXaKUpdQKatWAESU2MWjDfFf0fdfc83CDKcA5QecabZeNLyfhe3yKNkg==",
+            "version": "4.15.9",
+            "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.9.tgz",
+            "integrity": "sha512-1vUQX+IdDMVPj4k8kOxgUqlcK518yluMuGZwqlr44FS1ppZB/5GWh4rZG89erpOBOJjU/OBsnCVFfapsRz6nEA==",
             "funding": {
                 "url": "https://github.com/sponsors/panva"
             }
@@ -8429,9 +8475,9 @@
             }
         },
         "node_modules/libphonenumber-js": {
-            "version": "1.10.51",
-            "resolved": "https://registry.npmjs.org/libphonenumber-js/-/libphonenumber-js-1.10.51.tgz",
-            "integrity": "sha512-vY2I+rQwrDQzoPds0JeTEpeWzbUJgqoV0O4v31PauHBb/e+1KCXKylHcDnBMgJZ9fH9mErsEbROJY3Z3JtqEmg=="
+            "version": "1.11.8",
+            "resolved": "https://registry.npmjs.org/libphonenumber-js/-/libphonenumber-js-1.11.8.tgz",
+            "integrity": "sha512-0fv/YKpJBAgXKy0kaS3fnqoUVN8901vUYAKIGD/MWZaDfhJt1nZjPL3ZzdZBt/G8G8Hw2J1xOIrXWdNHFHPAvg=="
         },
         "node_modules/lilconfig": {
             "version": "2.1.0",
@@ -9893,9 +9939,9 @@
             "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw=="
         },
         "node_modules/nodemailer": {
-            "version": "6.9.9",
-            "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.9.tgz",
-            "integrity": "sha512-dexTll8zqQoVJEZPwQAKzxxtFn0qTnjdQTchoU6Re9BUUGBJiOy3YMn/0ShTW6J5M0dfQ1NeDeRTTl4oIWgQMA==",
+            "version": "6.9.15",
+            "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.15.tgz",
+            "integrity": "sha512-AHf04ySLC6CIfuRtRiEYtGEXgRfa6INgWGluDhnxTZhHSKvrBu7lc1VVchQ0d8nPc4cFaZoPq8vkyNoZr0TpGQ==",
             "engines": {
                 "node": ">=6.0.0"
             }
@@ -10241,6 +10287,11 @@
             "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.0.tgz",
             "integrity": "sha512-dATvCeZN/8wQsGywez1mzHtTlP22H8OEfPrVMLNr4/eGa+ijtLn/6M5f0dY8UKNrC2O9UCU6SSoG3qRKnt7STw=="
         },
+        "node_modules/pako": {
+            "version": "2.1.0",
+            "resolved": "https://registry.npmjs.org/pako/-/pako-2.1.0.tgz",
+            "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug=="
+        },
         "node_modules/papaparse": {
             "version": "5.4.1",
             "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz",
@@ -10739,7 +10790,6 @@
             "version": "0.11.10",
             "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz",
             "integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==",
-            "dev": true,
             "engines": {
                 "node": ">= 0.6.0"
             }
@@ -10822,20 +10872,17 @@
                 "node": ">=6"
             }
         },
-        "node_modules/qrcode.react": {
-            "version": "3.1.0",
-            "resolved": "https://registry.npmjs.org/qrcode.react/-/qrcode.react-3.1.0.tgz",
-            "integrity": "sha512-oyF+Urr3oAMUG/OiOuONL3HXM+53wvuH3mtIWQrYmsXoAq0DkvZp2RYUWFSMFtbdOpuS++9v+WAkzNVkMlNW6Q==",
-            "peerDependencies": {
-                "react": "^16.8.0 || ^17.0.0 || ^18.0.0"
-            }
+        "node_modules/qr.js": {
+            "version": "0.0.0",
+            "resolved": "https://registry.npmjs.org/qr.js/-/qr.js-0.0.0.tgz",
+            "integrity": "sha512-c4iYnWb+k2E+vYpRimHqSu575b1/wKl4XFeJGpFmrJQz5I88v9aY2czh7s0w36srfCM1sXgC/xpoJz5dJfq+OQ=="
         },
         "node_modules/qs": {
-            "version": "6.10.4",
-            "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.4.tgz",
-            "integrity": "sha512-OQiU+C+Ds5qiH91qh/mg0w+8nwQuLjM4F4M/PbmhDOoYehPh+Fb0bDjtR1sOvy7YKxvj28Y/M0PhP5uVX0kB+g==",
+            "version": "6.13.0",
+            "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+            "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
             "dependencies": {
-                "side-channel": "^1.0.4"
+                "side-channel": "^1.0.6"
             },
             "engines": {
                 "node": ">=0.6"
@@ -10899,15 +10946,15 @@
             }
         },
         "node_modules/rc-cascader": {
-            "version": "3.21.2",
-            "resolved": "https://registry.npmjs.org/rc-cascader/-/rc-cascader-3.21.2.tgz",
-            "integrity": "sha512-J7GozpgsLaOtzfIHFJFuh4oFY0ePb1w10twqK6is3pAkqHkca/PsokbDr822KIRZ8/CK8CqevxohuPDVZ1RO/A==",
+            "version": "3.28.1",
+            "resolved": "https://registry.npmjs.org/rc-cascader/-/rc-cascader-3.28.1.tgz",
+            "integrity": "sha512-9+8oHIMWVLHxuaapDiqFNmD9KSyKN/P4bo9x/MBuDbyTqP8f2/POmmZxdXWBO3yq/uE3pKyQCXYNUxrNfHRv2A==",
             "dependencies": {
                 "@babel/runtime": "^7.12.5",
                 "array-tree-filter": "^2.1.0",
                 "classnames": "^2.3.1",
-                "rc-select": "~14.11.0",
-                "rc-tree": "~5.8.1",
+                "rc-select": "~14.15.0",
+                "rc-tree": "~5.9.0",
                 "rc-util": "^5.37.0"
             },
             "peerDependencies": {
@@ -10916,9 +10963,9 @@
             }
         },
         "node_modules/rc-checkbox": {
-            "version": "3.1.0",
-            "resolved": "https://registry.npmjs.org/rc-checkbox/-/rc-checkbox-3.1.0.tgz",
-            "integrity": "sha512-PAwpJFnBa3Ei+5pyqMMXdcKYKNBMS+TvSDiLdDnARnMJHC8ESxwPfm4Ao1gJiKtWLdmGfigascnCpwrHFgoOBQ==",
+            "version": "3.3.0",
+            "resolved": "https://registry.npmjs.org/rc-checkbox/-/rc-checkbox-3.3.0.tgz",
+            "integrity": "sha512-Ih3ZaAcoAiFKJjifzwsGiT/f/quIkxJoklW4yKGho14Olulwn8gN7hOBve0/WGDg5o/l/5mL0w7ff7/YGvefVw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.3.2",
@@ -10930,9 +10977,9 @@
             }
         },
         "node_modules/rc-collapse": {
-            "version": "3.7.2",
-            "resolved": "https://registry.npmjs.org/rc-collapse/-/rc-collapse-3.7.2.tgz",
-            "integrity": "sha512-ZRw6ipDyOnfLFySxAiCMdbHtb5ePAsB9mT17PA6y1mRD/W6KHRaZeb5qK/X9xDV1CqgyxMpzw0VdS74PCcUk4A==",
+            "version": "3.7.3",
+            "resolved": "https://registry.npmjs.org/rc-collapse/-/rc-collapse-3.7.3.tgz",
+            "integrity": "sha512-60FJcdTRn0X5sELF18TANwtVi7FtModq649H11mYF1jh83DniMoM4MqY627sEKRCTm4+WXfGDcB7hY5oW6xhyw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "2.x",
@@ -10945,9 +10992,9 @@
             }
         },
         "node_modules/rc-dialog": {
-            "version": "9.3.4",
-            "resolved": "https://registry.npmjs.org/rc-dialog/-/rc-dialog-9.3.4.tgz",
-            "integrity": "sha512-975X3018GhR+EjZFbxA2Z57SX5rnu0G0/OxFgMMvZK4/hQWEm3MHaNvP4wXpxYDoJsp+xUvVW+GB9CMMCm81jA==",
+            "version": "9.5.2",
+            "resolved": "https://registry.npmjs.org/rc-dialog/-/rc-dialog-9.5.2.tgz",
+            "integrity": "sha512-qVUjc8JukG+j/pNaHVSRa2GO2/KbV2thm7yO4hepQ902eGdYK913sGkwg/fh9yhKYV1ql3BKIN2xnud3rEXAPw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "@rc-component/portal": "^1.0.0-8",
@@ -10961,15 +11008,15 @@
             }
         },
         "node_modules/rc-drawer": {
-            "version": "7.0.0",
-            "resolved": "https://registry.npmjs.org/rc-drawer/-/rc-drawer-7.0.0.tgz",
-            "integrity": "sha512-ePcS4KtQnn57bCbVXazHN2iC8nTPCXlWEIA/Pft87Pd9U7ZeDkdRzG47jWG2/TAFXFlFltRAMcslqmUM8NPCGA==",
+            "version": "7.2.0",
+            "resolved": "https://registry.npmjs.org/rc-drawer/-/rc-drawer-7.2.0.tgz",
+            "integrity": "sha512-9lOQ7kBekEJRdEpScHvtmEtXnAsy+NGDXiRWc2ZVC7QXAazNVbeT4EraQKYwCME8BJLa8Bxqxvs5swwyOepRwg==",
             "dependencies": {
-                "@babel/runtime": "^7.10.1",
+                "@babel/runtime": "^7.23.9",
                 "@rc-component/portal": "^1.1.1",
                 "classnames": "^2.2.6",
                 "rc-motion": "^2.6.1",
-                "rc-util": "^5.36.0"
+                "rc-util": "^5.38.1"
             },
             "peerDependencies": {
                 "react": ">=16.9.0",
@@ -10977,12 +11024,12 @@
             }
         },
         "node_modules/rc-dropdown": {
-            "version": "4.1.0",
-            "resolved": "https://registry.npmjs.org/rc-dropdown/-/rc-dropdown-4.1.0.tgz",
-            "integrity": "sha512-VZjMunpBdlVzYpEdJSaV7WM7O0jf8uyDjirxXLZRNZ+tAC+NzD3PXPEtliFwGzVwBBdCmGuSqiS9DWcOLxQ9tw==",
+            "version": "4.2.0",
+            "resolved": "https://registry.npmjs.org/rc-dropdown/-/rc-dropdown-4.2.0.tgz",
+            "integrity": "sha512-odM8Ove+gSh0zU27DUj5cG1gNKg7mLWBYzB5E4nNLrLwBmYEgYP43vHKDGOVZcJSVElQBI0+jTQgjnq0NfLjng==",
             "dependencies": {
                 "@babel/runtime": "^7.18.3",
-                "@rc-component/trigger": "^1.7.0",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "^2.2.6",
                 "rc-util": "^5.17.0"
             },
@@ -10992,12 +11039,12 @@
             }
         },
         "node_modules/rc-field-form": {
-            "version": "1.41.0",
-            "resolved": "https://registry.npmjs.org/rc-field-form/-/rc-field-form-1.41.0.tgz",
-            "integrity": "sha512-k9AS0wmxfJfusWDP/YXWTpteDNaQ4isJx9UKxx4/e8Dub4spFeZ54/EuN2sYrMRID/+hUznPgVZeg+Gf7XSYCw==",
+            "version": "2.4.0",
+            "resolved": "https://registry.npmjs.org/rc-field-form/-/rc-field-form-2.4.0.tgz",
+            "integrity": "sha512-XZ/lF9iqf9HXApIHQHqzJK5v2w4mkUMsVqAzOyWVzoiwwXEavY6Tpuw7HavgzIoD+huVff4JghSGcgEfX6eycg==",
             "dependencies": {
                 "@babel/runtime": "^7.18.0",
-                "async-validator": "^4.1.0",
+                "@rc-component/async-validator": "^5.0.3",
                 "rc-util": "^5.32.2"
             },
             "engines": {
@@ -11009,14 +11056,14 @@
             }
         },
         "node_modules/rc-image": {
-            "version": "7.5.1",
-            "resolved": "https://registry.npmjs.org/rc-image/-/rc-image-7.5.1.tgz",
-            "integrity": "sha512-Z9loECh92SQp0nSipc0MBuf5+yVC05H/pzC+Nf8xw1BKDFUJzUeehYBjaWlxly8VGBZJcTHYri61Fz9ng1G3Ag==",
+            "version": "7.9.0",
+            "resolved": "https://registry.npmjs.org/rc-image/-/rc-image-7.9.0.tgz",
+            "integrity": "sha512-l4zqO5E0quuLMCtdKfBgj4Suv8tIS011F5k1zBBlK25iMjjiNHxA0VeTzGFtUZERSA45gvpXDg8/P6qNLjR25g==",
             "dependencies": {
                 "@babel/runtime": "^7.11.2",
                 "@rc-component/portal": "^1.0.2",
                 "classnames": "^2.2.6",
-                "rc-dialog": "~9.3.4",
+                "rc-dialog": "~9.5.2",
                 "rc-motion": "^2.6.2",
                 "rc-util": "^5.34.1"
             },
@@ -11026,9 +11073,9 @@
             }
         },
         "node_modules/rc-input": {
-            "version": "1.4.3",
-            "resolved": "https://registry.npmjs.org/rc-input/-/rc-input-1.4.3.tgz",
-            "integrity": "sha512-aHyQUAIRmTlOnvk5EcNqEpJ+XMtfMpYRAJayIlJfsvvH9cAKUWboh4egm23vgMA7E+c/qm4BZcnrDcA960GC1w==",
+            "version": "1.6.3",
+            "resolved": "https://registry.npmjs.org/rc-input/-/rc-input-1.6.3.tgz",
+            "integrity": "sha512-wI4NzuqBS8vvKr8cljsvnTUqItMfG1QbJoxovCgL+DX4eVUcHIjVwharwevIxyy7H/jbLryh+K7ysnJr23aWIA==",
             "dependencies": {
                 "@babel/runtime": "^7.11.1",
                 "classnames": "^2.2.1",
@@ -11040,15 +11087,15 @@
             }
         },
         "node_modules/rc-input-number": {
-            "version": "9.0.0",
-            "resolved": "https://registry.npmjs.org/rc-input-number/-/rc-input-number-9.0.0.tgz",
-            "integrity": "sha512-RfcDBDdWFFetouWFXBA+WPEC8LzBXyngr9b+yTLVIygfFu7HiLRGn/s/v9wwno94X7KFvnb28FNynMGj9XJlDQ==",
+            "version": "9.2.0",
+            "resolved": "https://registry.npmjs.org/rc-input-number/-/rc-input-number-9.2.0.tgz",
+            "integrity": "sha512-5XZFhBCV5f9UQ62AZ2hFbEY8iZT/dm23Q1kAg0H8EvOgD3UDbYYJAayoVIkM3lQaCqYAW5gV0yV3vjw1XtzWHg==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "@rc-component/mini-decimal": "^1.0.1",
                 "classnames": "^2.2.5",
-                "rc-input": "~1.4.0",
-                "rc-util": "^5.28.0"
+                "rc-input": "~1.6.0",
+                "rc-util": "^5.40.1"
             },
             "peerDependencies": {
                 "react": ">=16.9.0",
@@ -11056,16 +11103,16 @@
             }
         },
         "node_modules/rc-mentions": {
-            "version": "2.10.1",
-            "resolved": "https://registry.npmjs.org/rc-mentions/-/rc-mentions-2.10.1.tgz",
-            "integrity": "sha512-72qsEcr/7su+a07ndJ1j8rI9n0Ka/ngWOLYnWMMv0p2mi/5zPwPrEDTt6Uqpe8FWjWhueDJx/vzunL6IdKDYMg==",
+            "version": "2.15.0",
+            "resolved": "https://registry.npmjs.org/rc-mentions/-/rc-mentions-2.15.0.tgz",
+            "integrity": "sha512-f5v5i7VdqvBDXbphoqcQWmXDif2Msd2arritVoWybrVDuHE6nQ7XCYsybHbV//WylooK52BFDouFvyaRDtXZEw==",
             "dependencies": {
                 "@babel/runtime": "^7.22.5",
-                "@rc-component/trigger": "^1.5.0",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "^2.2.6",
-                "rc-input": "~1.4.0",
-                "rc-menu": "~9.12.0",
-                "rc-textarea": "~1.6.1",
+                "rc-input": "~1.6.0",
+                "rc-menu": "~9.14.0",
+                "rc-textarea": "~1.8.0",
                 "rc-util": "^5.34.1"
             },
             "peerDependencies": {
@@ -11074,12 +11121,12 @@
             }
         },
         "node_modules/rc-menu": {
-            "version": "9.12.4",
-            "resolved": "https://registry.npmjs.org/rc-menu/-/rc-menu-9.12.4.tgz",
-            "integrity": "sha512-t2NcvPLV1mFJzw4F21ojOoRVofK2rWhpKPx69q2raUsiHPDP6DDevsBILEYdsIegqBeSXoWs2bf6CueBKg3BFg==",
+            "version": "9.14.1",
+            "resolved": "https://registry.npmjs.org/rc-menu/-/rc-menu-9.14.1.tgz",
+            "integrity": "sha512-5wlRb3M8S4yGlWhSoEYJ7ZVRElyScdcpUHxgiLxkeig1tEdyKrnED3B2fhpN0Rrpdp9jyhnmZR/Lwq2fH5VvDQ==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
-                "@rc-component/trigger": "^1.17.0",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "2.x",
                 "rc-motion": "^2.4.3",
                 "rc-overflow": "^1.3.1",
@@ -11091,13 +11138,13 @@
             }
         },
         "node_modules/rc-motion": {
-            "version": "2.9.0",
-            "resolved": "https://registry.npmjs.org/rc-motion/-/rc-motion-2.9.0.tgz",
-            "integrity": "sha512-XIU2+xLkdIr1/h6ohPZXyPBMvOmuyFZQ/T0xnawz+Rh+gh4FINcnZmMT5UTIj6hgI0VLDjTaPeRd+smJeSPqiQ==",
+            "version": "2.9.2",
+            "resolved": "https://registry.npmjs.org/rc-motion/-/rc-motion-2.9.2.tgz",
+            "integrity": "sha512-fUAhHKLDdkAXIDLH0GYwof3raS58dtNUmzLF2MeiR8o6n4thNpSDQhOqQzWE4WfFZDCi9VEN8n7tiB7czREcyw==",
             "dependencies": {
                 "@babel/runtime": "^7.11.1",
                 "classnames": "^2.2.1",
-                "rc-util": "^5.21.0"
+                "rc-util": "^5.43.0"
             },
             "peerDependencies": {
                 "react": ">=16.9.0",
@@ -11105,9 +11152,9 @@
             }
         },
         "node_modules/rc-notification": {
-            "version": "5.3.0",
-            "resolved": "https://registry.npmjs.org/rc-notification/-/rc-notification-5.3.0.tgz",
-            "integrity": "sha512-WCf0uCOkZ3HGfF0p1H4Sgt7aWfipxORWTPp7o6prA3vxwtWhtug3GfpYls1pnBp4WA+j8vGIi5c2/hQRpGzPcQ==",
+            "version": "5.6.1",
+            "resolved": "https://registry.npmjs.org/rc-notification/-/rc-notification-5.6.1.tgz",
+            "integrity": "sha512-Q4ZKES3IBxWmpNnlDiMFYoH6D7MJ1L3n3gp59pnpaMI8gm9Vj+gVRxdInvoYjBoZvEOenxb9MbbKvnFhzJpgvA==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "2.x",
@@ -11138,9 +11185,9 @@
             }
         },
         "node_modules/rc-pagination": {
-            "version": "4.0.4",
-            "resolved": "https://registry.npmjs.org/rc-pagination/-/rc-pagination-4.0.4.tgz",
-            "integrity": "sha512-GGrLT4NgG6wgJpT/hHIpL9nELv27A1XbSZzECIuQBQTVSf4xGKxWr6I/jhpRPauYEWEbWVw22ObG6tJQqwJqWQ==",
+            "version": "4.2.0",
+            "resolved": "https://registry.npmjs.org/rc-pagination/-/rc-pagination-4.2.0.tgz",
+            "integrity": "sha512-V6qeANJsT6tmOcZ4XiUmj8JXjRLbkusuufpuoBw2GiAn94fIixYjFLmbruD1Sbhn8fPLDnWawPp4CN37zQorvw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.3.2",
@@ -11152,16 +11199,16 @@
             }
         },
         "node_modules/rc-picker": {
-            "version": "4.1.1",
-            "resolved": "https://registry.npmjs.org/rc-picker/-/rc-picker-4.1.1.tgz",
-            "integrity": "sha512-H99qaHUepHjHnAqMLiftJEATXRuHJZcUyFoRkyIqUvTHVGnx/uHxFFNm7QIu1valCpfwdsGWQxiWgn9CAxvlvA==",
+            "version": "4.6.14",
+            "resolved": "https://registry.npmjs.org/rc-picker/-/rc-picker-4.6.14.tgz",
+            "integrity": "sha512-7DuTfUFdkxmsNpWQ0TWv6FPGna5e6KKC4nxtx3x9xhumLz7jb3fhlDdWQvqEL6tpt9DOb1+N5j+wB+lDOSS9kg==",
             "dependencies": {
-                "@babel/runtime": "^7.10.1",
-                "@rc-component/trigger": "^1.5.0",
+                "@babel/runtime": "^7.24.7",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "^2.2.1",
                 "rc-overflow": "^1.3.2",
                 "rc-resize-observer": "^1.4.0",
-                "rc-util": "^5.38.1"
+                "rc-util": "^5.43.0"
             },
             "engines": {
                 "node": ">=8.x"
@@ -11190,9 +11237,9 @@
             }
         },
         "node_modules/rc-progress": {
-            "version": "3.5.1",
-            "resolved": "https://registry.npmjs.org/rc-progress/-/rc-progress-3.5.1.tgz",
-            "integrity": "sha512-V6Amx6SbLRwPin/oD+k1vbPrO8+9Qf8zW1T8A7o83HdNafEVvAxPV5YsgtKFP+Ud5HghLj33zKOcEHrcrUGkfw==",
+            "version": "4.0.0",
+            "resolved": "https://registry.npmjs.org/rc-progress/-/rc-progress-4.0.0.tgz",
+            "integrity": "sha512-oofVMMafOCokIUIBnZLNcOZFsABaUw8PPrf1/y0ZBvKZNpOiu5h4AO9vv11Sw0p4Hb3D0yGWuEattcQGtNJ/aw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.2.6",
@@ -11204,9 +11251,9 @@
             }
         },
         "node_modules/rc-rate": {
-            "version": "2.12.0",
-            "resolved": "https://registry.npmjs.org/rc-rate/-/rc-rate-2.12.0.tgz",
-            "integrity": "sha512-g092v5iZCdVzbjdn28FzvWebK2IutoVoiTeqoLTj9WM7SjA/gOJIw5/JFZMRyJYYVe1jLAU2UhAfstIpCNRozg==",
+            "version": "2.13.0",
+            "resolved": "https://registry.npmjs.org/rc-rate/-/rc-rate-2.13.0.tgz",
+            "integrity": "sha512-oxvx1Q5k5wD30sjN5tqAyWTvJfLNNJn7Oq3IeS4HxWfAiC4BOXMITNAsw7u/fzdtO4MS8Ki8uRLOzcnEuoQiAw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.2.5",
@@ -11251,12 +11298,12 @@
             }
         },
         "node_modules/rc-select": {
-            "version": "14.11.0",
-            "resolved": "https://registry.npmjs.org/rc-select/-/rc-select-14.11.0.tgz",
-            "integrity": "sha512-8J8G/7duaGjFiTXCBLWfh5P+KDWyA3KTlZDfV3xj/asMPqB2cmxfM+lH50wRiPIRsCQ6EbkCFBccPuaje3DHIg==",
+            "version": "14.15.2",
+            "resolved": "https://registry.npmjs.org/rc-select/-/rc-select-14.15.2.tgz",
+            "integrity": "sha512-oNoXlaFmpqXYcQDzcPVLrEqS2J9c+/+oJuGrlXeVVX/gVgrbHa5YcyiRUXRydFjyuA7GP3elRuLF7Y3Tfwltlw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
-                "@rc-component/trigger": "^1.5.0",
+                "@rc-component/trigger": "^2.1.1",
                 "classnames": "2.x",
                 "rc-motion": "^2.0.1",
                 "rc-overflow": "^1.3.1",
@@ -11272,13 +11319,13 @@
             }
         },
         "node_modules/rc-slider": {
-            "version": "10.5.0",
-            "resolved": "https://registry.npmjs.org/rc-slider/-/rc-slider-10.5.0.tgz",
-            "integrity": "sha512-xiYght50cvoODZYI43v3Ylsqiw14+D7ELsgzR40boDZaya1HFa1Etnv9MDkQE8X/UrXAffwv2AcNAhslgYuDTw==",
+            "version": "11.1.6",
+            "resolved": "https://registry.npmjs.org/rc-slider/-/rc-slider-11.1.6.tgz",
+            "integrity": "sha512-LACAaXM0hi+4x4ErDGZLy7weIQwmBIVbIgPE+eDHiHkyzMvKjWHraCG8/B22Y/tCQUPAsP02wBhKhth7mH2PIw==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.2.5",
-                "rc-util": "^5.27.0"
+                "rc-util": "^5.36.0"
             },
             "engines": {
                 "node": ">=8.x"
@@ -11320,16 +11367,16 @@
             }
         },
         "node_modules/rc-table": {
-            "version": "7.39.0",
-            "resolved": "https://registry.npmjs.org/rc-table/-/rc-table-7.39.0.tgz",
-            "integrity": "sha512-7fHLMNsm/2DlGwyIMkdH2xIeRzb5I69bLsFaEVtX+gqmGhByy0wtOAgHkiOew3PtXozSJyh+iXifjLgQzWdczw==",
+            "version": "7.45.7",
+            "resolved": "https://registry.npmjs.org/rc-table/-/rc-table-7.45.7.tgz",
+            "integrity": "sha512-wi9LetBL1t1csxyGkMB2p3mCiMt+NDexMlPbXHvQFmBBAsMxrgNSAPwUci2zDLUq9m8QdWc1Nh8suvrpy9mXrg==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "@rc-component/context": "^1.4.0",
                 "classnames": "^2.2.5",
                 "rc-resize-observer": "^1.1.0",
                 "rc-util": "^5.37.0",
-                "rc-virtual-list": "^3.11.1"
+                "rc-virtual-list": "^3.14.2"
             },
             "engines": {
                 "node": ">=8.x"
@@ -11340,14 +11387,14 @@
             }
         },
         "node_modules/rc-tabs": {
-            "version": "14.0.0",
-            "resolved": "https://registry.npmjs.org/rc-tabs/-/rc-tabs-14.0.0.tgz",
-            "integrity": "sha512-lp1YWkaPnjlyhOZCPrAWxK6/P6nMGX/BAZcAC3nuVwKz0Byfp+vNnQKK8BRCP2g/fzu+SeB5dm9aUigRu3tRkQ==",
+            "version": "15.1.1",
+            "resolved": "https://registry.npmjs.org/rc-tabs/-/rc-tabs-15.1.1.tgz",
+            "integrity": "sha512-Tc7bJvpEdkWIVCUL7yQrMNBJY3j44NcyWS48jF/UKMXuUlzaXK+Z/pEL5LjGcTadtPvVmNqA40yv7hmr+tCOAw==",
             "dependencies": {
                 "@babel/runtime": "^7.11.2",
                 "classnames": "2.x",
-                "rc-dropdown": "~4.1.0",
-                "rc-menu": "~9.12.0",
+                "rc-dropdown": "~4.2.0",
+                "rc-menu": "~9.14.0",
                 "rc-motion": "^2.6.2",
                 "rc-resize-observer": "^1.0.0",
                 "rc-util": "^5.34.1"
@@ -11361,13 +11408,13 @@
             }
         },
         "node_modules/rc-textarea": {
-            "version": "1.6.3",
-            "resolved": "https://registry.npmjs.org/rc-textarea/-/rc-textarea-1.6.3.tgz",
-            "integrity": "sha512-8k7+8Y2GJ/cQLiClFMg8kUXOOdvcFQrnGeSchOvI2ZMIVvX5a3zQpLxoODL0HTrvU63fPkRmMuqaEcOF9dQemA==",
+            "version": "1.8.1",
+            "resolved": "https://registry.npmjs.org/rc-textarea/-/rc-textarea-1.8.1.tgz",
+            "integrity": "sha512-bm36N2ZqwZAP60ZQg2OY9mPdqWC+m6UTjHc+CqEZOxb3Ia29BGHazY/s5bI8M4113CkqTzhtFUDNA078ZiOx3Q==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "^2.2.1",
-                "rc-input": "~1.4.0",
+                "rc-input": "~1.6.0",
                 "rc-resize-observer": "^1.0.0",
                 "rc-util": "^5.27.0"
             },
@@ -11377,12 +11424,12 @@
             }
         },
         "node_modules/rc-tooltip": {
-            "version": "6.1.3",
-            "resolved": "https://registry.npmjs.org/rc-tooltip/-/rc-tooltip-6.1.3.tgz",
-            "integrity": "sha512-HMSbSs5oieZ7XddtINUddBLSVgsnlaSb3bZrzzGWjXa7/B7nNedmsuz72s7EWFEro9mNa7RyF3gOXKYqvJiTcQ==",
+            "version": "6.2.0",
+            "resolved": "https://registry.npmjs.org/rc-tooltip/-/rc-tooltip-6.2.0.tgz",
+            "integrity": "sha512-iS/3iOAvtDh9GIx1ulY7EFUXUtktFccNLsARo3NPgLf0QW9oT0w3dA9cYWlhqAKmD+uriEwdWz1kH0Qs4zk2Aw==",
             "dependencies": {
                 "@babel/runtime": "^7.11.2",
-                "@rc-component/trigger": "^1.18.0",
+                "@rc-component/trigger": "^2.0.0",
                 "classnames": "^2.3.1"
             },
             "peerDependencies": {
@@ -11391,9 +11438,9 @@
             }
         },
         "node_modules/rc-tree": {
-            "version": "5.8.5",
-            "resolved": "https://registry.npmjs.org/rc-tree/-/rc-tree-5.8.5.tgz",
-            "integrity": "sha512-PRfcZtVDNkR7oh26RuNe1hpw11c1wfgzwmPFL0lnxGnYefe9lDAO6cg5wJKIAwyXFVt5zHgpjYmaz0CPy1ZtKg==",
+            "version": "5.9.0",
+            "resolved": "https://registry.npmjs.org/rc-tree/-/rc-tree-5.9.0.tgz",
+            "integrity": "sha512-CPrgOvm9d/9E+izTONKSngNzQdIEjMox2PBufWjS1wf7vxtvmCWzK1SlpHbRY6IaBfJIeZ+88RkcIevf729cRg==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "2.x",
@@ -11410,14 +11457,14 @@
             }
         },
         "node_modules/rc-tree-select": {
-            "version": "5.17.0",
-            "resolved": "https://registry.npmjs.org/rc-tree-select/-/rc-tree-select-5.17.0.tgz",
-            "integrity": "sha512-7sRGafswBhf7n6IuHyCEFCildwQIgyKiV8zfYyUoWfZEFdhuk7lCH+DN0aHt+oJrdiY9+6Io/LDXloGe01O8XQ==",
+            "version": "5.23.0",
+            "resolved": "https://registry.npmjs.org/rc-tree-select/-/rc-tree-select-5.23.0.tgz",
+            "integrity": "sha512-aQGi2tFSRw1WbXv0UVXPzHm09E0cSvUVZMLxQtMv3rnZZpNmdRXWrnd9QkLNlVH31F+X5rgghmdSFF3yZW0N9A==",
             "dependencies": {
                 "@babel/runtime": "^7.10.1",
                 "classnames": "2.x",
-                "rc-select": "~14.11.0-0",
-                "rc-tree": "~5.8.1",
+                "rc-select": "~14.15.0",
+                "rc-tree": "~5.9.0",
                 "rc-util": "^5.16.1"
             },
             "peerDependencies": {
@@ -11426,9 +11473,9 @@
             }
         },
         "node_modules/rc-upload": {
-            "version": "4.5.2",
-            "resolved": "https://registry.npmjs.org/rc-upload/-/rc-upload-4.5.2.tgz",
-            "integrity": "sha512-QO3ne77DwnAPKFn0bA5qJM81QBjQi0e0NHdkvpFyY73Bea2NfITiotqJqVjHgeYPOJu5lLVR32TNGP084aSoXA==",
+            "version": "4.7.0",
+            "resolved": "https://registry.npmjs.org/rc-upload/-/rc-upload-4.7.0.tgz",
+            "integrity": "sha512-eUwxYNHlsYe5vYhKFAUGrQG95JrnPzY+BmPi1Daq39fWNl/eOc7v4UODuWrVp2LFkQBuV3cMCG/I68iub6oBrg==",
             "dependencies": {
                 "@babel/runtime": "^7.18.3",
                 "classnames": "^2.2.5",
@@ -11440,9 +11487,9 @@
             }
         },
         "node_modules/rc-util": {
-            "version": "5.38.1",
-            "resolved": "https://registry.npmjs.org/rc-util/-/rc-util-5.38.1.tgz",
-            "integrity": "sha512-e4ZMs7q9XqwTuhIK7zBIVFltUtMSjphuPPQXHoHlzRzNdOwUxDejo0Zls5HYaJfRKNURcsS/ceKVULlhjBrxng==",
+            "version": "5.43.0",
+            "resolved": "https://registry.npmjs.org/rc-util/-/rc-util-5.43.0.tgz",
+            "integrity": "sha512-AzC7KKOXFqAdIBqdGWepL9Xn7cm3vnAmjlHqUnoQaTMZYhM4VlXGLkkHHxj/BZ7Td0+SOPKB4RGPboBVKT9htw==",
             "dependencies": {
                 "@babel/runtime": "^7.18.3",
                 "react-is": "^18.2.0"
@@ -11458,9 +11505,9 @@
             "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w=="
         },
         "node_modules/rc-virtual-list": {
-            "version": "3.11.4",
-            "resolved": "https://registry.npmjs.org/rc-virtual-list/-/rc-virtual-list-3.11.4.tgz",
-            "integrity": "sha512-NbBi0fvyIu26gP69nQBiWgUMTPX3mr4FcuBQiVqagU0BnuX8WQkiivnMs105JROeuUIFczLrlgUhLQwTWV1XDA==",
+            "version": "3.14.5",
+            "resolved": "https://registry.npmjs.org/rc-virtual-list/-/rc-virtual-list-3.14.5.tgz",
+            "integrity": "sha512-ZMOnkCLv2wUN8Jz7yI4XiSLa9THlYvf00LuMhb1JlsQCewuU7ydPuHw1rGVPhe9VZYl/5UqODtNd7QKJ2DMGfg==",
             "dependencies": {
                 "@babel/runtime": "^7.20.0",
                 "classnames": "^2.2.6",
@@ -11645,6 +11692,18 @@
             "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz",
             "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w=="
         },
+        "node_modules/react-qr-code": {
+            "version": "2.0.15",
+            "resolved": "https://registry.npmjs.org/react-qr-code/-/react-qr-code-2.0.15.tgz",
+            "integrity": "sha512-MkZcjEXqVKqXEIMVE0mbcGgDpkfSdd8zhuzXEl9QzYeNcw8Hq2oVIzDLWuZN2PQBwM5PWjc2S31K8Q1UbcFMfw==",
+            "dependencies": {
+                "prop-types": "^15.8.1",
+                "qr.js": "0.0.0"
+            },
+            "peerDependencies": {
+                "react": "*"
+            }
+        },
         "node_modules/react-resizable": {
             "version": "3.0.5",
             "resolved": "https://registry.npmjs.org/react-resizable/-/react-resizable-3.0.5.tgz",
@@ -12030,9 +12089,9 @@
             }
         },
         "node_modules/rfdc": {
-            "version": "1.3.0",
-            "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.3.0.tgz",
-            "integrity": "sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==",
+            "version": "1.4.1",
+            "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.4.1.tgz",
+            "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==",
             "dev": true
         },
         "node_modules/rimraf": {
@@ -12271,14 +12330,16 @@
             }
         },
         "node_modules/set-function-length": {
-            "version": "1.1.1",
-            "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz",
-            "integrity": "sha512-VoaqjbBJKiWtg4yRcKBQ7g7wnGnLV3M8oLvVWwOk2PdYY6PEFegR1vezXR0tw6fZGF9csVakIRjrJiy2veSBFQ==",
+            "version": "1.2.2",
+            "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
+            "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
             "dependencies": {
-                "define-data-property": "^1.1.1",
-                "get-intrinsic": "^1.2.1",
+                "define-data-property": "^1.1.4",
+                "es-errors": "^1.3.0",
+                "function-bind": "^1.1.2",
+                "get-intrinsic": "^1.2.4",
                 "gopd": "^1.0.1",
-                "has-property-descriptors": "^1.0.0"
+                "has-property-descriptors": "^1.0.2"
             },
             "engines": {
                 "node": ">= 0.4"
@@ -12327,13 +12388,17 @@
             "integrity": "sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw=="
         },
         "node_modules/side-channel": {
-            "version": "1.0.4",
-            "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
-            "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==",
+            "version": "1.0.6",
+            "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
+            "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
             "dependencies": {
-                "call-bind": "^1.0.0",
-                "get-intrinsic": "^1.0.2",
-                "object-inspect": "^1.9.0"
+                "call-bind": "^1.0.7",
+                "es-errors": "^1.3.0",
+                "get-intrinsic": "^1.2.4",
+                "object-inspect": "^1.13.1"
+            },
+            "engines": {
+                "node": ">= 0.4"
             },
             "funding": {
                 "url": "https://github.com/sponsors/ljharb"
@@ -12760,9 +12825,9 @@
             }
         },
         "node_modules/stylis": {
-            "version": "4.3.1",
-            "resolved": "https://registry.npmjs.org/stylis/-/stylis-4.3.1.tgz",
-            "integrity": "sha512-EQepAV+wMsIaGVGX1RECzgrcqRRU/0sYOHkeLsZ3fzHaHXZy4DaOOX0vOlGQdlsjkh3mFHAIlVimpwAs4dslyQ=="
+            "version": "4.3.4",
+            "resolved": "https://registry.npmjs.org/stylis/-/stylis-4.3.4.tgz",
+            "integrity": "sha512-osIBl6BGUmSfDkyH2mB7EFvCJntXDrLhKjHTRj/rK6xLH0yuPrHULDRQzKokSOD4VoorhtKpfcfW1GAntu8now=="
         },
         "node_modules/sucrase": {
             "version": "3.35.0",
@@ -12794,12 +12859,13 @@
             }
         },
         "node_modules/supertokens-auth-react": {
-            "version": "0.34.2",
-            "resolved": "https://registry.npmjs.org/supertokens-auth-react/-/supertokens-auth-react-0.34.2.tgz",
-            "integrity": "sha512-0Gaqb7SWL5+UXd9Ft87db3CK0i0CSzb9Ch3Lf+ZMcMfqeokWLZewhu3yqoZYvX035owjhMc72PYF1fxd6TwIQQ==",
+            "version": "0.47.0",
+            "resolved": "https://registry.npmjs.org/supertokens-auth-react/-/supertokens-auth-react-0.47.0.tgz",
+            "integrity": "sha512-m8+yyQAdQ9x6VkejMCY3+MwgQrV8beSPDw+nK2Wo2CmGOkFwGqkDADTrB+174YD0rD+NBL0HPkr1JwaSnZ/SHQ==",
             "dependencies": {
                 "intl-tel-input": "^17.0.19",
                 "prop-types": "*",
+                "react-qr-code": "^2.0.12",
                 "supertokens-js-override": "^0.0.4"
             },
             "engines": {
@@ -12809,7 +12875,7 @@
             "peerDependencies": {
                 "react": ">=16.8.0",
                 "react-dom": ">=16.8.0",
-                "supertokens-web-js": "^0.7.2"
+                "supertokens-web-js": "^0.13.0"
             }
         },
         "node_modules/supertokens-js-override": {
@@ -12818,38 +12884,63 @@
             "integrity": "sha512-r0JFBjkMIdep3Lbk3JA+MpnpuOtw4RSyrlRAbrzMcxwiYco3GFWl/daimQZ5b1forOiUODpOlXbSOljP/oyurg=="
         },
         "node_modules/supertokens-node": {
-            "version": "15.2.1",
-            "resolved": "https://registry.npmjs.org/supertokens-node/-/supertokens-node-15.2.1.tgz",
-            "integrity": "sha512-3zJ2EsiHYJHnYwAzDQI5Alp+4x/KcwEOBgeoPN5bWglZY0Xw0AzcZvd8S3N71vjLGvab0J3XxWmHeEHqSz5dbg==",
+            "version": "20.1.2",
+            "resolved": "https://registry.npmjs.org/supertokens-node/-/supertokens-node-20.1.2.tgz",
+            "integrity": "sha512-Ypbm5h537VykYFGbVBtnDr0cea0VWhFszrN05tMZKKvc7h40lVV+uwF+1wh03RWuAFsIdb81DcoFV1cwgmz4sg==",
             "dependencies": {
+                "buffer": "^6.0.3",
                 "content-type": "^1.0.5",
                 "cookie": "0.4.0",
                 "cross-fetch": "^3.1.6",
                 "debug": "^4.3.3",
-                "inflation": "^2.0.0",
                 "jose": "^4.13.1",
                 "libphonenumber-js": "^1.9.44",
                 "nodemailer": "^6.7.2",
+                "pako": "^2.1.0",
                 "pkce-challenge": "^3.0.0",
+                "process": "^0.11.10",
                 "psl": "1.8.0",
                 "supertokens-js-override": "^0.0.4",
-                "twilio": "^4.7.2"
+                "twilio": "^4.19.3"
+            }
+        },
+        "node_modules/supertokens-node/node_modules/buffer": {
+            "version": "6.0.3",
+            "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
+            "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
+            "funding": [
+                {
+                    "type": "github",
+                    "url": "https://github.com/sponsors/feross"
+                },
+                {
+                    "type": "patreon",
+                    "url": "https://www.patreon.com/feross"
+                },
+                {
+                    "type": "consulting",
+                    "url": "https://feross.org/support"
+                }
+            ],
+            "dependencies": {
+                "base64-js": "^1.3.1",
+                "ieee754": "^1.2.1"
             }
         },
         "node_modules/supertokens-web-js": {
-            "version": "0.7.3",
-            "resolved": "https://registry.npmjs.org/supertokens-web-js/-/supertokens-web-js-0.7.3.tgz",
-            "integrity": "sha512-HkpIwQ7KCcD8gn3D9u9hfLV/rAf5sr2M6ICH16R3JV2TOB7XmFgX+jzVn3R7DuSRSXu39dT0nn0Vcdc30EzBbA==",
+            "version": "0.13.0",
+            "resolved": "https://registry.npmjs.org/supertokens-web-js/-/supertokens-web-js-0.13.0.tgz",
+            "integrity": "sha512-I0o8Pblu4G1Bopm6c0YI2bZQ/22k5qrjdPFV7etNJ+ydOmNoQPBCo1iY9QbmLANcokyLOaWS5k8jEuwoVh/FYQ==",
             "peer": true,
             "dependencies": {
                 "supertokens-js-override": "0.0.4",
-                "supertokens-website": "^17.0.1"
+                "supertokens-website": "^20.0.1"
             }
         },
         "node_modules/supertokens-website": {
-            "version": "17.0.4",
-            "resolved": "https://registry.npmjs.org/supertokens-website/-/supertokens-website-17.0.4.tgz",
-            "integrity": "sha512-ayWhEFvspUe26YhM1bq11ssEpnFCZIsoHZtJwJHgHsoflfMUKdgrzOix/bboI0PWJeNTUphHyZebw0ApctaS1Q==",
+            "version": "20.1.4",
+            "resolved": "https://registry.npmjs.org/supertokens-website/-/supertokens-website-20.1.4.tgz",
+            "integrity": "sha512-2hbB/MWJew4W8bq5q0M75tjUnniahLzXVOnu+535j3HB4wworwMJ1ZGfg25OL9q1fm4g+ITNKVjJeJgJqEDlmA==",
             "peer": true,
             "dependencies": {
                 "browser-tabs-lock": "^1.3.0",
@@ -13057,9 +13148,9 @@
             }
         },
         "node_modules/throttle-debounce": {
-            "version": "5.0.0",
-            "resolved": "https://registry.npmjs.org/throttle-debounce/-/throttle-debounce-5.0.0.tgz",
-            "integrity": "sha512-2iQTSgkkc1Zyk0MeVrt/3BvuOXYPl/R8Z0U2xxo9rjwNciaHDG3R+Lm6dh4EeUci49DanvBnuqI6jshoQQRGEg==",
+            "version": "5.0.2",
+            "resolved": "https://registry.npmjs.org/throttle-debounce/-/throttle-debounce-5.0.2.tgz",
+            "integrity": "sha512-B71/4oyj61iNH0KeCamLuE2rmKuTO5byTOSVwECM5FA7TiAiAW+UqTKZ9ERueC4qvgSttUhdmq1mXC3kJqGX7A==",
             "engines": {
                 "node": ">=12.22"
             }
@@ -13090,15 +13181,12 @@
             "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA=="
         },
         "node_modules/tmp": {
-            "version": "0.2.1",
-            "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.1.tgz",
-            "integrity": "sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==",
+            "version": "0.2.3",
+            "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz",
+            "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==",
             "dev": true,
-            "dependencies": {
-                "rimraf": "^3.0.0"
-            },
             "engines": {
-                "node": ">=8.17.0"
+                "node": ">=14.14"
             }
         },
         "node_modules/to-fast-properties": {
@@ -13143,9 +13231,9 @@
             }
         },
         "node_modules/tough-cookie": {
-            "version": "4.1.3",
-            "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz",
-            "integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==",
+            "version": "4.1.4",
+            "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
+            "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
             "dev": true,
             "dependencies": {
                 "psl": "^1.1.33",
@@ -13272,9 +13360,9 @@
             "dev": true
         },
         "node_modules/twilio": {
-            "version": "4.19.3",
-            "resolved": "https://registry.npmjs.org/twilio/-/twilio-4.19.3.tgz",
-            "integrity": "sha512-3X5Czl9Vg4QFl+2pnfMQ+H8YfEDQ4WeuAmqjUpbK65x0DfmxTCHuPEFWUKVZCJZew6iltJB/1whhVvIKETe54A==",
+            "version": "4.23.0",
+            "resolved": "https://registry.npmjs.org/twilio/-/twilio-4.23.0.tgz",
+            "integrity": "sha512-LdNBQfOe0dY2oJH2sAsrxazpgfFQo5yXGxe96QA8UWB5uu+433PrUbkv8gQ5RmrRCqUTPQ0aOrIyAdBr1aB03Q==",
             "dependencies": {
                 "axios": "^1.6.0",
                 "dayjs": "^1.11.9",
diff --git a/agenta-web/package.json b/agenta-web/package.json
index 00045d105..7676ad6c7 100644
--- a/agenta-web/package.json
+++ b/agenta-web/package.json
@@ -1,6 +1,6 @@
 {
     "name": "agenta",
-    "version": "0.24.4",
+    "version": "0.25.2",
     "private": true,
     "engines": {
         "node": ">=18"
@@ -50,7 +50,7 @@
         "@types/uuid": "^9.0.7",
         "ag-grid-community": "^31.2.0",
         "ag-grid-react": "^31.2.0",
-        "antd": "^5.4.7",
+        "antd": "^5.20.6",
         "autoprefixer": "10.4.14",
         "axios": "^1.4.0",
         "classnames": "^2.3.2",
@@ -80,8 +80,8 @@
         "react-resizable": "^3.0.5",
         "react-syntax-highlighter": "^15.5.0",
         "react-youtube": "^10.1.0",
-        "supertokens-auth-react": "^0.34.0",
-        "supertokens-node": "^15.0.4",
+        "supertokens-auth-react": "^0.47.0",
+        "supertokens-node": "^20.1.2",
         "swr": "^2.1.5",
         "tailwindcss": "^3.4.4",
         "typescript": "5.0.4",
@@ -92,7 +92,7 @@
         "@swc/cli": "^0.3.12",
         "@swc/core": "^1.4.15",
         "@types/node": "^20.8.10",
-        "cypress": "^13.4.0",
+        "cypress": "^13.15.0",
         "node-mocks-http": "^1.12.2",
         "prettier": "^3.2.5"
     }
diff --git a/agenta-web/public/arrows.svg b/agenta-web/public/arrows.svg
deleted file mode 100644
index bbb17a648..000000000
--- a/agenta-web/public/arrows.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-6 h-6">
-  <path stroke-linecap="round" stroke-linejoin="round" d="M7.5 21L3 16.5m0 0L7.5 12M3 16.5h13.5m0-13.5L21 7.5m0 0L16.5 12M21 7.5H7.5" />
-</svg>
diff --git a/agenta-web/public/assets/On-boarding.png b/agenta-web/public/assets/On-boarding.png
new file mode 100644
index 000000000..00ec79f65
Binary files /dev/null and b/agenta-web/public/assets/On-boarding.png differ
diff --git a/agenta-web/public/assets/complex-img.png b/agenta-web/public/assets/complex-img.png
deleted file mode 100644
index d25bd4fee..000000000
Binary files a/agenta-web/public/assets/complex-img.png and /dev/null differ
diff --git a/agenta-web/public/assets/not-found.png b/agenta-web/public/assets/not-found.png
new file mode 100644
index 000000000..f4048f657
Binary files /dev/null and b/agenta-web/public/assets/not-found.png differ
diff --git a/agenta-web/public/assets/onboard-page-grids.svg b/agenta-web/public/assets/onboard-page-grids.svg
new file mode 100644
index 000000000..85990df21
--- /dev/null
+++ b/agenta-web/public/assets/onboard-page-grids.svg
@@ -0,0 +1,81 @@
+<svg width="640" height="832" viewBox="0 0 640 832" fill="none" xmlns="http://www.w3.org/2000/svg">
+<mask id="mask0_2932_46559" style="mask-type:alpha" maskUnits="userSpaceOnUse" x="0" y="0" width="640" height="832">
+<rect width="640" height="832" fill="url(#paint0_linear_2932_46559)"/>
+</mask>
+<g mask="url(#mask0_2932_46559)">
+<g opacity="0.05">
+<path d="M0.578186 739.555L0.578186 832H91.924V739.555H0.578186Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 647.108L0.578125 739.553H91.9241V647.108H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 554.669L0.578125 647.113H91.9241V554.669H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 462.222L0.578125 554.667H91.924V462.222H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 369.778L0.578125 462.223H91.924V369.778H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 277.334L0.578125 369.778H91.924V277.334H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0 184.305L0 277.92H92.5023V184.305H0Z" fill="#758391"/>
+<path d="M0.578125 184.889L0.578125 277.334H91.924V184.889H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 92.4442L0.578125 184.889H91.924V92.4442H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M0.578125 -0.000167847L0.578125 92.4443H91.924V-0.000167847H0.578125Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 739.555V832H183.269V739.555H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 647.108V739.553H183.269V647.108H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 554.669V647.113H183.269V554.669H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 462.222V554.667H183.269V462.222H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 369.778V462.223H183.269V369.778H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 277.334V369.778H183.269V277.334H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 184.889V277.334H183.269V184.889H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 92.4442V184.889H183.269V92.4442H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M91.923 -0.000167847V92.4443H183.269V-0.000167847H91.923Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 739.555V832H274.617V739.555H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 647.108V739.553H274.617V647.108H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 554.669V647.113H274.617V554.669H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 462.222V554.667H274.617V462.222H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 369.778V462.223H274.617V369.778H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 277.334V369.778H274.617V277.334H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M182.693 184.305V277.92H275.195V184.305H182.693Z" fill="#758391"/>
+<path d="M183.271 184.889V277.334H274.617V184.889H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 92.4442V184.889H274.617V92.4442H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M183.271 -0.000167847V92.4443H274.617V-0.000167847H183.271Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 739.555V832H365.962V739.555H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 647.108V739.553H365.962V647.108H274.616Z" fill="#051729"/>
+<path d="M274.616 554.669V647.113H365.962V554.669H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 462.222V554.667H365.962V462.222H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 369.778V462.223H365.962V369.778H274.616Z" fill="#051729"/>
+<path d="M274.616 277.334V369.778H365.962V277.334H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 184.889V277.334H365.962V184.889H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 92.4442V184.889H365.962V92.4442H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M274.616 -0.000167847V92.4443H365.962V-0.000167847H274.616Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 739.555V832H457.307V739.555H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 647.108V739.553H457.308V647.108H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 554.669V647.113H457.308V554.669H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.383 461.637V555.252H457.885V461.637H365.383Z" fill="#758391"/>
+<path d="M365.962 462.222V554.667H457.307V462.222H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 369.778V462.223H457.307V369.778H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 277.334V369.778H457.307V277.334H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 184.889V277.334H457.307V184.889H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 92.4442V184.889H457.307V92.4442H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M365.962 -0.000167847V92.4443H457.307V-0.000167847H365.962Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 739.555V832H548.652V739.555H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 647.108V739.553H548.652V647.108H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 554.669V647.113H548.652V554.669H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 462.222V554.667H548.652V462.222H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 369.778V462.223H548.652V369.778H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 277.334V369.778H548.652V277.334H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 184.889V277.334H548.652V184.889H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 92.4442V184.889H548.652V92.4442H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M457.306 -0.000167847V92.4443H548.652V-0.000167847H457.306Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 739.555V832H640V739.555H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 647.108V739.553H640V647.108H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 554.669V647.113H640V554.669H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 462.222V554.667H640V462.222H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 369.778V462.223H640V369.778H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 277.334V369.778H640V277.334H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 184.889V277.334H640V184.889H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 92.4442V184.889H640V92.4442H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+<path d="M548.654 -0.000167847V92.4443H640V-0.000167847H548.654Z" stroke="#051729" stroke-width="1.48691"/>
+</g>
+</g>
+<defs>
+<linearGradient id="paint0_linear_2932_46559" x1="0" y1="416" x2="640" y2="416" gradientUnits="userSpaceOnUse">
+<stop stop-color="white"/>
+<stop offset="0.501495" stop-color="white" stop-opacity="0"/>
+</linearGradient>
+</defs>
+</svg>
diff --git a/agenta-web/public/assets/simple-img.png b/agenta-web/public/assets/simple-img.png
deleted file mode 100644
index 3fb9f338d..000000000
Binary files a/agenta-web/public/assets/simple-img.png and /dev/null differ
diff --git a/agenta-web/public/assets/tips-images/testsets-dark.png b/agenta-web/public/assets/tips-images/testsets-dark.png
deleted file mode 100644
index 5b0018753..000000000
Binary files a/agenta-web/public/assets/tips-images/testsets-dark.png and /dev/null differ
diff --git a/agenta-web/public/assets/tips-images/testsets-light.png b/agenta-web/public/assets/tips-images/testsets-light.png
deleted file mode 100644
index 6c230981a..000000000
Binary files a/agenta-web/public/assets/tips-images/testsets-light.png and /dev/null differ
diff --git a/agenta-web/public/assets/tips-images/testsetsView-dark.png b/agenta-web/public/assets/tips-images/testsetsView-dark.png
deleted file mode 100644
index 8f56c84bd..000000000
Binary files a/agenta-web/public/assets/tips-images/testsetsView-dark.png and /dev/null differ
diff --git a/agenta-web/public/assets/tips-images/testsetsView-light.png b/agenta-web/public/assets/tips-images/testsetsView-light.png
deleted file mode 100644
index 6b38489b3..000000000
Binary files a/agenta-web/public/assets/tips-images/testsetsView-light.png and /dev/null differ
diff --git a/agenta-web/public/assets/wave.png b/agenta-web/public/assets/wave.png
deleted file mode 100644
index 250e857fb..000000000
Binary files a/agenta-web/public/assets/wave.png and /dev/null differ
diff --git a/agenta-web/public/favicon.ico b/agenta-web/public/favicon.ico
deleted file mode 100644
index 718d6fea4..000000000
Binary files a/agenta-web/public/favicon.ico and /dev/null differ
diff --git a/agenta-web/public/next.svg b/agenta-web/public/next.svg
deleted file mode 100644
index 5174b28c5..000000000
--- a/agenta-web/public/next.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
\ No newline at end of file
diff --git a/agenta-web/public/vercel.svg b/agenta-web/public/vercel.svg
deleted file mode 100644
index d2f842227..000000000
--- a/agenta-web/public/vercel.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>
\ No newline at end of file
diff --git a/agenta-web/src/code_snippets/endpoints/fetch_config/curl.ts b/agenta-web/src/code_snippets/endpoints/fetch_config/curl.ts
index d7d09260b..13de78483 100644
--- a/agenta-web/src/code_snippets/endpoints/fetch_config/curl.ts
+++ b/agenta-web/src/code_snippets/endpoints/fetch_config/curl.ts
@@ -1,6 +1,6 @@
 export default function cURLCode(baseId: string, env_name: string): string {
     return `
-    curl -X GET "https://cloud.agenta.ai/api/configs?base_id=${baseId}&environment_name=${env_name}" \\
+    curl -X GET "${process.env.NEXT_PUBLIC_AGENTA_API_URL}/api/configs?base_id=${baseId}&environment_name=${env_name}" \\
     -H "Authorization: Bearer YOUR_API_KEY" \\
     -H "Content-Type: application/json" \\
     --connect-timeout 60
diff --git a/agenta-web/src/code_snippets/endpoints/fetch_config/python.ts b/agenta-web/src/code_snippets/endpoints/fetch_config/python.ts
index 1fbb904e3..5f56871fa 100644
--- a/agenta-web/src/code_snippets/endpoints/fetch_config/python.ts
+++ b/agenta-web/src/code_snippets/endpoints/fetch_config/python.ts
@@ -1,7 +1,7 @@
 export default function pythonCode(baseId: string, env_name: string): string {
     return `
     # os.environ["AGENTA_API_KEY"] = "your_api_key" # Only when using cloud
-    # os.environ["AGENTA_HOST"] = "https://cloud.agenta.ai"
+    # os.environ["AGENTA_HOST"] = "${process.env.NEXT_PUBLIC_AGENTA_API_URL}"
 
     from agenta import Agenta
     ag = Agenta()
diff --git a/agenta-web/src/code_snippets/endpoints/fetch_config/typescript.ts b/agenta-web/src/code_snippets/endpoints/fetch_config/typescript.ts
index ae1b27b88..b2f401244 100644
--- a/agenta-web/src/code_snippets/endpoints/fetch_config/typescript.ts
+++ b/agenta-web/src/code_snippets/endpoints/fetch_config/typescript.ts
@@ -6,7 +6,7 @@ export default function tsCode(baseId: string, env_name: string): string {
 
     const getConfig = async (baseId: string, environmentName: string) => {
         try {
-            const baseUrl = 'https://cloud.agenta.ai/api';
+            const baseUrl = '${process.env.NEXT_PUBLIC_AGENTA_API_URL}/api';
             const params = {
                 base_id: baseId,
                 environment_name: environmentName
diff --git a/agenta-web/src/components/AppSelector/AppSelector.tsx b/agenta-web/src/components/AppSelector/AppSelector.tsx
index 5daac3a0c..d4b4f13c7 100644
--- a/agenta-web/src/components/AppSelector/AppSelector.tsx
+++ b/agenta-web/src/components/AppSelector/AppSelector.tsx
@@ -279,19 +279,12 @@ const AppSelector: React.FC = () => {
             }}
         >
             <div className={classes.container}>
-                {isLoading ? (
-                    <div>
-                        <ResultComponent status={"info"} title="Loading..." spinner={true} />
-                    </div>
-                ) : error ? (
-                    <div>
-                        <ResultComponent status={"error"} title="Failed to load" />
-                    </div>
-                ) : Array.isArray(apps) && apps.length ? (
-                    <div className="flex flex-col gap-6">
-                        <div className="flex items-center justify-between">
-                            <h1 className={classes.title}>Applications</h1>
+                {!isLoading && !error && (
+                    <div className="flex items-center justify-between mb-5">
+                        <h1 className={classes.title}>App Management</h1>
+                        {Array.isArray(apps) && apps.length ? (
                             <Button
+                                type="primary"
                                 data-cy="create-new-app-button"
                                 icon={<PlusOutlined />}
                                 onClick={() => {
@@ -308,7 +301,20 @@ const AppSelector: React.FC = () => {
                             >
                                 Create new app
                             </Button>
-                        </div>
+                        ) : null}
+                    </div>
+                )}
+
+                {isLoading ? (
+                    <div>
+                        <ResultComponent status={"info"} title="Loading..." spinner={true} />
+                    </div>
+                ) : error ? (
+                    <div>
+                        <ResultComponent status={"error"} title="Failed to load" />
+                    </div>
+                ) : Array.isArray(apps) && apps.length ? (
+                    <div className="flex flex-col gap-6">
                         <div className={classes.cardsList}>
                             {Array.isArray(apps) && (
                                 <>
diff --git a/agenta-web/src/components/AppSelector/Welcome.tsx b/agenta-web/src/components/AppSelector/Welcome.tsx
index fa25a9377..9620bc4b5 100644
--- a/agenta-web/src/components/AppSelector/Welcome.tsx
+++ b/agenta-web/src/components/AppSelector/Welcome.tsx
@@ -1,125 +1,50 @@
 import React from "react"
-import {useAppTheme} from "../Layout/ThemeContextProvider"
 import {createUseStyles} from "react-jss"
-import {CheckCircleFilled} from "@ant-design/icons"
-import {StyleProps} from "@/lib/Types"
+import {JSSTheme} from "@/lib/Types"
 import Image from "next/image"
+import {Button, Card, Typography} from "antd"
+import {ArrowRight} from "@phosphor-icons/react"
 
-const useStyles = createUseStyles({
-    head: {
-        marginBottom: 30,
-        "& h2": {
-            fontSize: 18,
-            margin: "20px 0",
-            textAlign: "center",
-        },
-    },
-    heading: {
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    card: {
+        width: 392,
+        height: 268,
         display: "flex",
-        alignItems: "center",
-        justifyContent: "center",
-        gap: "1rem",
-        "& > h1": {
-            margin: 0,
-            fontSize: 36,
-        },
+        cursor: "pointer",
+        flexDirection: "column",
+        justifyContent: "space-between",
+        transition: "all 0.025s ease-in",
+        boxShadow:
+            "0px 2px 4px 0px rgba(0, 0, 0, 0.02), 0px 1px 6px -1px rgba(0, 0, 0, 0.02), 0px 1px 2px 0px rgba(0, 0, 0, 0.03)",
+        "& > .ant-card-head": {
+            minHeight: 0,
+            padding: theme.paddingSM,
 
-        "& > img": {
-            animation: "$wave 1.8s ease-in-out infinite",
-            height: 44,
-        },
-    },
-    "@keyframes wave": {
-        "0%": {
-            transform: "rotate(0deg)",
+            "& .ant-card-head-title": {
+                fontSize: theme.fontSizeLG,
+                fontWeight: theme.fontWeightMedium,
+            },
         },
-        "10%": {
-            transform: "rotate(-10deg)",
+        "& > .ant-card-body": {
+            padding: theme.paddingSM,
+            flex: 1,
         },
-        "20%": {
-            transform: "rotate(12deg)",
+        "& > .ant-card-actions": {
+            padding: "0 12px",
         },
-        "30%": {
-            transform: "rotate(-10deg)",
-        },
-        "40%": {
-            transform: "rotate(9deg)",
-        },
-        "50%": {
-            transform: "rotate(0deg)",
-        },
-        "100%": {
-            transform: "rotate(0deg)",
-        },
-    },
-    description: {
-        lineHeight: 1.7,
-    },
-    wrapper: {
-        display: "flex",
-        justifyContent: "space-between",
-        gap: 20,
-        maxWidth: "1250px",
-        margin: "0 auto",
-        width: "100%",
-    },
-    container: ({themeMode}: StyleProps) => ({
-        display: "flex",
-        justifyContent: "space-between",
-        cursor: "pointer",
-        flexDirection: "column",
-        border: `1px solid ${themeMode === "dark" ? "rgb(13, 17, 23)" : "#91caff"}`,
-        padding: "15px",
-        borderRadius: 10,
-        flex: 1,
-        backgroundColor: themeMode === "dark" ? "#000" : "#fff",
-        transition: "all 0.3s ease-out",
         "&:hover": {
-            backgroundColor: themeMode === "dark" ? "" : "#f3faff",
-            boxShadow: themeMode === "dark" ? "0 0 10px rgba(225, 225, 225, 0.3)" : "",
+            boxShadow: theme.boxShadow,
         },
-    }),
-    title: {
-        display: "flex",
-        alignItems: "center",
-        justifyContent: "center",
-        gap: "15px",
-        "& h1": {
-            fontWeight: 600,
-            fontSize: 24,
-        },
-    },
-    tag: {
-        padding: "2px 6px",
-        fontWeight: "bold",
     },
-    img: ({themeMode}: StyleProps) => ({
+    button: {
         width: "100%",
-        height: "auto",
-        filter: themeMode === "dark" ? "invert(1)" : "none",
-    }),
-    steps: ({themeMode}: StyleProps) => ({
-        fontSize: 16,
-        margin: "20px 0 0",
         display: "flex",
-        flexDirection: "column",
-        listStyleType: "none",
-        padding: 20,
-        "& li": {
-            marginBottom: 10,
-        },
-        "& svg": {
-            color: themeMode === "dark" ? "#fff" : "#0958d9",
-            marginRight: 10,
-        },
-        "& span": {
-            fontWeight: 600,
+        alignItems: "center",
+        "& > .ant-btn-icon": {
+            marginTop: 4,
         },
-    }),
-    text: {
-        marginLeft: 25,
     },
-})
+}))
 
 interface Props {
     onWriteOwnApp: () => void
@@ -127,100 +52,97 @@ interface Props {
 }
 
 const Welcome: React.FC<Props> = ({onWriteOwnApp, onCreateFromTemplate}) => {
-    const {appTheme} = useAppTheme()
-    const classes = useStyles({themeMode: appTheme} as StyleProps)
+    const classes = useStyles()
 
-    return (
-        <>
-            <section>
-                <section className={classes.head}>
-                    <div className={classes.heading}>
-                        <h1>Welcome to Agenta</h1>
-                        <Image src="/assets/wave.png" alt="wave" width={44} height={44} />
-                    </div>
-                    <h2>The developer-first open source LLMOps platform.</h2>
-                </section>
-                <section className={classes.wrapper}>
-                    <div
-                        className={classes.container}
-                        onClick={onCreateFromTemplate}
-                        data-cy="create-from-template__no-app"
-                    >
-                        <div className={classes.title}>
-                            <h1>Quickstart From a Template</h1>
-                        </div>
+    const templatePoints = [
+        "Compare prompts and models",
+        "Create testsets",
+        "Evaluate outputs",
+        "Deploy in one click",
+    ]
+    const complexLLM = [
+        "Use Langchain, Llama Index, or any framework",
+        "Use OpenAI, Cohere, or self-hosted open-source models",
+        "Continue in the UI: Everything in the left",
+        "Streamline collaboration between devs and domain experts!",
+    ]
 
-                        <Image
-                            src="/assets/simple-img.png"
-                            alt="Simple start Image"
-                            className={classes.img}
-                            sizes="100vw"
-                            width={500}
-                            height={300}
-                            priority
-                        />
+    return (
+        <section className="h-[75vh] flex flex-col justify-center gap-10">
+            <div className="text-center">
+                <Image
+                    src="/assets/light-complete-transparent-CROPPED.png"
+                    alt="agenta-ai"
+                    width={114}
+                    height={40}
+                    className="block mx-auto"
+                />
+                <Typography.Title level={3}>
+                    Start building and testing your LLM <br /> applications with Agenta AI.{" "}
+                </Typography.Title>
+            </div>
 
-                        <ul className={classes.steps}>
-                            <li>
-                                <CheckCircleFilled /> <span>Start from a template</span>
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> <span>Compare</span> prompts and models
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> Create testsets
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> <span>Evaluate</span> outputs
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> <span>Deploy</span> in one click
-                            </li>
+            <div className="flex items-center justify-center gap-4">
+                <Card
+                    title="Quick start with a template"
+                    className={classes.card}
+                    onClick={onCreateFromTemplate}
+                    data-cy="create-from-template__no-app"
+                    actions={[
+                        <Button
+                            type="primary"
+                            key="template"
+                            className={classes.button}
+                            iconPosition="end"
+                            icon={<ArrowRight size={18} />}
+                            size="large"
+                        >
+                            Start with a template
+                        </Button>,
+                    ]}
+                >
+                    <div className="gap-2">
+                        <Typography.Text>
+                            Setup an app using our preset LLM config and explore Agenta AI
+                        </Typography.Text>
+                        <ul className="-ml-5">
+                            {templatePoints.map((item) => (
+                                <li key={item}>{item}</li>
+                            ))}
                         </ul>
                     </div>
-                    <div className={classes.container} onClick={onWriteOwnApp}>
-                        <div>
-                            <div className={classes.title}>
-                                <h1>Build Complex LLM apps</h1>
-                            </div>
+                </Card>
 
-                            <Image
-                                src="/assets/complex-img.png"
-                                alt="Complex build Image"
-                                className={classes.img}
-                                sizes="100vw"
-                                width={500}
-                                height={300}
-                                priority
-                            />
-                        </div>
-
-                        <ul className={classes.steps}>
-                            <li>
-                                <CheckCircleFilled /> <span>Start from code</span>
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> Use <span>Langchain</span>,{" "}
-                                <span>Llama Index</span>, or any framework
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> Use <span>OpenAI</span>, <span>Cohere</span>,
-                                or self-hosted open-source models
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> <span>Continue in the UI: </span>Everything in
-                                the left
-                            </li>
-                            <li>
-                                <CheckCircleFilled /> Streamline <span>collaboration</span> between
-                                devs and domain experts!
-                            </li>
+                <Card
+                    title="Build complex LLM apps"
+                    className={classes.card}
+                    onClick={onWriteOwnApp}
+                    actions={[
+                        <Button
+                            type="primary"
+                            key="ownApp"
+                            className={classes.button}
+                            iconPosition="end"
+                            icon={<ArrowRight size={18} />}
+                            size="large"
+                        >
+                            Setup your own app
+                        </Button>,
+                    ]}
+                >
+                    <div className="gap-2">
+                        <Typography.Text>
+                            Create your own complex application using any framework.
+                        </Typography.Text>
+                        <ul className="-ml-5">
+                            {complexLLM.map((item) => (
+                                <li key={item}>{item}</li>
+                            ))}
                         </ul>
                     </div>
-                </section>
-            </section>
-        </>
+                </Card>
+            </div>
+        </section>
     )
 }
-
 export default Welcome
diff --git a/agenta-web/src/components/DynamicCodeBlock/CodeBlock.tsx b/agenta-web/src/components/DynamicCodeBlock/CodeBlock.tsx
index df6ebc800..6a611b3fc 100644
--- a/agenta-web/src/components/DynamicCodeBlock/CodeBlock.tsx
+++ b/agenta-web/src/components/DynamicCodeBlock/CodeBlock.tsx
@@ -29,7 +29,7 @@ const CodeBlock: FC<CodeBlockProps> = ({language, value}) => {
                     language={language}
                     style={appTheme === "dark" ? darcula : coy}
                     showLineNumbers
-                    wrapLongLines={true}
+                    wrapLongLines={false}
                 >
                     {value}
                 </SyntaxHighlighter>
diff --git a/agenta-web/src/components/Evaluations/AutomaticEvaluationResult.tsx b/agenta-web/src/components/Evaluations/AutomaticEvaluationResult.tsx
deleted file mode 100644
index 2b6ef8ef5..000000000
--- a/agenta-web/src/components/Evaluations/AutomaticEvaluationResult.tsx
+++ /dev/null
@@ -1,286 +0,0 @@
-import {
-    deleteEvaluations,
-    fetchEvaluationResults,
-    fetchAllLoadEvaluations,
-} from "@/services/human-evaluations/api"
-import {Button, Spin, Statistic, Table, Typography} from "antd"
-import {useRouter} from "next/router"
-import {useEffect, useState} from "react"
-import {ColumnsType} from "antd/es/table"
-import {Evaluation, SingleModelEvaluationListTableDataType, StyleProps} from "@/lib/Types"
-import {DeleteOutlined} from "@ant-design/icons"
-import {EvaluationFlow, EvaluationType} from "@/lib/enums"
-import {createUseStyles} from "react-jss"
-import {useAppTheme} from "../Layout/ThemeContextProvider"
-import {calculateResultsDataAvg} from "@/lib/helpers/evaluate"
-import {
-    fromEvaluationResponseToEvaluation,
-    singleModelTestEvaluationTransformer,
-} from "@/lib/transformers"
-import {variantNameWithRev} from "@/lib/helpers/variantHelper"
-
-const useStyles = createUseStyles({
-    container: {
-        marginBottom: 20,
-    },
-    collapse: ({themeMode}: StyleProps) => ({
-        margin: "10px 0",
-        "& .ant-collapse-header": {
-            alignItems: "center !important",
-            padding: "0px 20px !important",
-            borderTopLeftRadius: "10px !important",
-            borderTopRightRadius: "10px !important",
-            background: themeMode === "dark" ? "#1d1d1d" : "#f8f8f8",
-        },
-    }),
-    stat: {
-        "& .ant-statistic-content-value": {
-            fontSize: 20,
-            color: "#1677ff",
-        },
-        "& .ant-statistic-content-suffix": {
-            fontSize: 20,
-            color: "#1677ff",
-        },
-    },
-    btnContainer: {
-        display: "flex",
-        alignItems: "center",
-        justifyContent: "flex-end",
-        margin: "20px 0",
-        gap: 10,
-        "& svg": {
-            color: "red",
-        },
-    },
-})
-
-const {Title} = Typography
-interface AutomaticEvaluationResultProps {
-    setIsEvalModalOpen: React.Dispatch<React.SetStateAction<boolean>>
-}
-export default function AutomaticEvaluationResult({
-    setIsEvalModalOpen,
-}: AutomaticEvaluationResultProps) {
-    const router = useRouter()
-    const [evaluationsList, setEvaluationsList] = useState<
-        SingleModelEvaluationListTableDataType[]
-    >([])
-    const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
-    const [selectionType] = useState<"checkbox" | "radio">("checkbox")
-    const {appTheme} = useAppTheme()
-    const classes = useStyles({themeMode: appTheme} as StyleProps)
-    const app_id = router.query.app_id?.toString() || ""
-    const [fetchingEvaluations, setFetchingEvaluations] = useState(false)
-
-    useEffect(() => {
-        if (!app_id) {
-            return
-        }
-
-        const fetchEvaluations = async () => {
-            try {
-                setFetchingEvaluations(true)
-                const evals: Evaluation[] = (await fetchAllLoadEvaluations(app_id)).map(
-                    fromEvaluationResponseToEvaluation,
-                )
-                const results = await Promise.all(evals.map((e) => fetchEvaluationResults(e.id)))
-                const newEvals = results.map((result, ix) => {
-                    const item = evals[ix]
-                    if ([EvaluationType.single_model_test].includes(item.evaluationType)) {
-                        return singleModelTestEvaluationTransformer({item, result})
-                    }
-                })
-
-                setEvaluationsList(
-                    newEvals
-                        .filter((evaluation) => evaluation !== undefined)
-                        .filter(
-                            (item: any) =>
-                                item.resultsData !== undefined ||
-                                !(Object.keys(item.scoresData || {}).length === 0) ||
-                                item.avgScore !== undefined,
-                        ) as any,
-                )
-            } catch (error) {
-                console.error(error)
-            } finally {
-                setFetchingEvaluations(false)
-            }
-        }
-
-        fetchEvaluations()
-    }, [app_id])
-
-    const handleNavigation = (variantName: string, revisionNum: string) => {
-        router.push(`/apps/${app_id}/playground?variant=${variantName}&revision=${revisionNum}`)
-    }
-
-    const onCompleteEvaluation = (evaluation: any) => {
-        // TODO: improve type
-        const evaluationType =
-            EvaluationType[evaluation.evaluationType as keyof typeof EvaluationType]
-
-        if (evaluationType === EvaluationType.single_model_test) {
-            router.push(`/apps/${app_id}/annotations/single_model_test/${evaluation.key}`)
-        }
-    }
-
-    const columns: ColumnsType<SingleModelEvaluationListTableDataType> = [
-        {
-            title: "Variant",
-            dataIndex: "variants",
-            key: "variants",
-            render: (value, record: SingleModelEvaluationListTableDataType) => {
-                return (
-                    <div
-                        onClick={() => handleNavigation(value[0].variantName, record.revisions[0])}
-                        style={{cursor: "pointer"}}
-                    >
-                        <span>
-                            {variantNameWithRev({
-                                variant_name: value[0].variantName,
-                                revision: record.revisions[0],
-                            })}
-                        </span>
-                    </div>
-                )
-            },
-        },
-        {
-            title: "Test set",
-            dataIndex: "testsetName",
-            key: "testsetName",
-            render: (value: any, record: SingleModelEvaluationListTableDataType, index: number) => {
-                return <span>{record.testset.name}</span>
-            },
-        },
-        {
-            title: "Average score",
-            dataIndex: "averageScore",
-            key: "averageScore",
-            render: (value: any, record: SingleModelEvaluationListTableDataType, index: number) => {
-                let score = 0
-                if (record.scoresData) {
-                    score =
-                        ((record.scoresData.correct?.length ||
-                            record.scoresData.true?.length ||
-                            0) /
-                            record.scoresData.nb_of_rows) *
-                        100
-                } else if (record.resultsData) {
-                    const multiplier = {
-                        [EvaluationType.auto_webhook_test]: 100,
-                        [EvaluationType.single_model_test]: 1,
-                    }
-                    score = calculateResultsDataAvg(
-                        record.resultsData,
-                        multiplier[record.evaluationType as keyof typeof multiplier],
-                    )
-                    score = isNaN(score) ? 0 : score
-                } else if (record.avgScore) {
-                    score = record.avgScore * 100
-                }
-
-                return (
-                    <span>
-                        <Statistic
-                            className={classes.stat}
-                            value={score}
-                            precision={score <= 99 ? 2 : 1}
-                            suffix="%"
-                        />
-                    </span>
-                )
-            },
-        },
-        {
-            title: "Created at",
-            dataIndex: "createdAt",
-            key: "createdAt",
-            width: "300",
-        },
-        {
-            title: "Action",
-            dataIndex: "action",
-            key: "action",
-            render: (value: any, record: SingleModelEvaluationListTableDataType, index: number) => {
-                let actionText = "View evaluation"
-                if (record.status !== EvaluationFlow.EVALUATION_FINISHED) {
-                    actionText = "Continue evaluation"
-                }
-                return (
-                    <div className="hover-button-wrapper">
-                        <Button
-                            type="primary"
-                            data-cy="single-model-view-evaluation-button"
-                            onClick={() => onCompleteEvaluation(record)}
-                        >
-                            {actionText}
-                        </Button>
-                    </div>
-                )
-            },
-        },
-    ]
-
-    const rowSelection = {
-        onChange: (
-            selectedRowKeys: React.Key[],
-            selectedRows: SingleModelEvaluationListTableDataType[],
-        ) => {
-            setSelectedRowKeys(selectedRowKeys)
-        },
-    }
-
-    const onDelete = async () => {
-        const evaluationsIds = selectedRowKeys.map((key) => key.toString())
-        try {
-            await deleteEvaluations(evaluationsIds)
-            setEvaluationsList((prevEvaluationsList) =>
-                prevEvaluationsList.filter(
-                    (evaluation) => !evaluationsIds.includes(evaluation.key),
-                ),
-            )
-
-            setSelectedRowKeys([])
-        } catch (error) {
-            console.error(error)
-        }
-    }
-
-    return (
-        <div>
-            <div className={classes.btnContainer}>
-                <Button onClick={onDelete} disabled={selectedRowKeys.length == 0}>
-                    <DeleteOutlined key="delete" />
-                    Delete
-                </Button>
-                <Button
-                    type="primary"
-                    data-cy="new-annotation-modal-button"
-                    onClick={() => setIsEvalModalOpen(true)}
-                >
-                    New Evaluation
-                </Button>
-            </div>
-
-            <div className={classes.container}>
-                <Title level={3}>Single Model Test Results</Title>
-            </div>
-
-            <Spin spinning={fetchingEvaluations}>
-                <Table
-                    rowSelection={{
-                        type: selectionType,
-                        ...rowSelection,
-                    }}
-                    className="ph-no-capture"
-                    data-cy="automatic-evaluation-result"
-                    columns={columns}
-                    dataSource={evaluationsList}
-                />
-            </Spin>
-        </div>
-    )
-}
diff --git a/agenta-web/src/components/Evaluations/HumanEvaluationResult.tsx b/agenta-web/src/components/Evaluations/HumanEvaluationResult.tsx
index 87720a438..e69de29bb 100644
--- a/agenta-web/src/components/Evaluations/HumanEvaluationResult.tsx
+++ b/agenta-web/src/components/Evaluations/HumanEvaluationResult.tsx
@@ -1,373 +0,0 @@
-import {
-    deleteEvaluations,
-    fetchAllLoadEvaluations,
-    fetchEvaluationResults,
-} from "@/services/human-evaluations/api"
-import {Button, Spin, Statistic, Table, Typography} from "antd"
-import {useRouter} from "next/router"
-import {useEffect, useState} from "react"
-import {ColumnsType} from "antd/es/table"
-import {EvaluationResponseType, StyleProps} from "@/lib/Types"
-import {DeleteOutlined} from "@ant-design/icons"
-import {EvaluationFlow, EvaluationType} from "@/lib/enums"
-import {createUseStyles} from "react-jss"
-import {useAppTheme} from "../Layout/ThemeContextProvider"
-import {getVotesPercentage} from "@/lib/helpers/evaluate"
-import {isDemo} from "@/lib/helpers/utils"
-import {variantNameWithRev} from "@/lib/helpers/variantHelper"
-import {abTestingEvaluationTransformer} from "@/lib/transformers"
-
-interface VariantVotesData {
-    number_of_votes: number
-    percentage: number
-}
-
-export interface HumanEvaluationListTableDataType {
-    key: string
-    variants: string[]
-    testset: {
-        _id: string
-        name: string
-    }
-    evaluationType: string
-    status: EvaluationFlow
-    votesData: {
-        nb_of_rows: number
-        variants: string[]
-        flag_votes: {
-            number_of_votes: number
-            percentage: number
-        }
-        positive_votes: {
-            number_of_votes: number
-            percentage: number
-        }
-        variants_votes_data: Record<string, VariantVotesData>
-    }
-    createdAt: string
-    revisions: string[]
-    variant_revision_ids: string[]
-    variantNames: string[]
-}
-
-const useStyles = createUseStyles({
-    container: {
-        marginBottom: 20,
-    },
-    collapse: ({themeMode}: StyleProps) => ({
-        margin: "10px 0",
-        "& .ant-collapse-header": {
-            alignItems: "center !important",
-            padding: "0px 20px !important",
-            borderTopLeftRadius: "10px !important",
-            borderTopRightRadius: "10px !important",
-            background: themeMode === "dark" ? "#1d1d1d" : "#f8f8f8",
-        },
-    }),
-    statFlag: {
-        "& .ant-statistic-content-value": {
-            fontSize: 20,
-            color: "#cf1322",
-        },
-        "& .ant-statistic-content-suffix": {
-            fontSize: 20,
-            color: "#cf1322",
-        },
-    },
-    stat: {
-        "& .ant-statistic-content-value": {
-            fontSize: 20,
-            color: "#1677ff",
-        },
-        "& .ant-statistic-content-suffix": {
-            fontSize: 20,
-            color: "#1677ff",
-        },
-    },
-    statGood: {
-        "& .ant-statistic-content-value": {
-            fontSize: 20,
-            color: "#3f8600",
-        },
-        "& .ant-statistic-content-suffix": {
-            fontSize: 20,
-            color: "#3f8600",
-        },
-    },
-    btnContainer: {
-        display: "flex",
-        alignItems: "center",
-        justifyContent: "flex-end",
-        margin: "20px 0",
-        gap: 10,
-        "& svg": {
-            color: "red",
-        },
-    },
-})
-
-const {Title} = Typography
-
-interface HumanEvaluationResultProps {
-    setIsEvalModalOpen: React.Dispatch<React.SetStateAction<boolean>>
-}
-
-export default function HumanEvaluationResult({setIsEvalModalOpen}: HumanEvaluationResultProps) {
-    const router = useRouter()
-    const [evaluationsList, setEvaluationsList] = useState<HumanEvaluationListTableDataType[]>([])
-    const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
-    const [selectionType] = useState<"checkbox" | "radio">("checkbox")
-    const {appTheme} = useAppTheme()
-    const classes = useStyles({themeMode: appTheme} as StyleProps)
-    const app_id = router.query.app_id?.toString() || ""
-    const [fetchingEvaluations, setFetchingEvaluations] = useState(false)
-
-    useEffect(() => {
-        if (!app_id) {
-            return
-        }
-        const fetchEvaluations = async () => {
-            try {
-                setFetchingEvaluations(true)
-                fetchAllLoadEvaluations(app_id)
-                    .then((response) => {
-                        const fetchPromises = response.map((item: EvaluationResponseType) => {
-                            return fetchEvaluationResults(item.id)
-                                .then((results) => {
-                                    if (item.evaluation_type === EvaluationType.human_a_b_testing) {
-                                        if (Object.keys(results.votes_data).length > 0) {
-                                            return abTestingEvaluationTransformer({item, results})
-                                        }
-                                    }
-                                })
-                                .catch((err) => console.error(err))
-                        })
-                        Promise.all(fetchPromises)
-                            .then((evaluations) => {
-                                const validEvaluations = evaluations.filter(
-                                    (evaluation) => evaluation !== undefined,
-                                )
-                                setEvaluationsList(validEvaluations)
-                            })
-                            .catch((err) => console.error(err))
-                    })
-                    .catch((err) => console.error(err))
-                    .finally(() => setFetchingEvaluations(false))
-            } catch (error) {
-                console.error(error)
-            }
-        }
-
-        fetchEvaluations()
-    }, [app_id])
-
-    const onCompleteEvaluation = (evaluation: any) => {
-        // TODO: improve type
-        const evaluationType =
-            EvaluationType[evaluation.evaluationType as keyof typeof EvaluationType]
-
-        if (evaluationType === EvaluationType.human_a_b_testing) {
-            router.push(`/apps/${app_id}/annotations/human_a_b_testing/${evaluation.key}`)
-        }
-    }
-
-    const handleNavigation = (variantName: string, revisionNum: string) => {
-        router.push(`/apps/${app_id}/playground?variant=${variantName}&revision=${revisionNum}`)
-    }
-
-    const columns: ColumnsType<HumanEvaluationListTableDataType> = [
-        {
-            title: "Test set",
-            dataIndex: "testsetName",
-            key: "testsetName",
-            render: (_, record: HumanEvaluationListTableDataType, index: number) => {
-                return <span>{record.testset.name}</span>
-            },
-        },
-        {
-            title: "Variant 1",
-            dataIndex: "variantNames",
-            key: "variant1",
-            render: (value, record) => {
-                const percentage = getVotesPercentage(record, 0)
-                return (
-                    <div>
-                        <Statistic
-                            className={classes.stat}
-                            value={percentage}
-                            precision={percentage <= 99 ? 2 : 1}
-                            suffix="%"
-                        />
-                        <div
-                            style={{cursor: "pointer"}}
-                            onClick={() => handleNavigation(value[0], record.revisions[0])}
-                        >
-                            (
-                            {variantNameWithRev({
-                                variant_name: value[0],
-                                revision: record.revisions[0],
-                            })}
-                            )
-                        </div>
-                    </div>
-                )
-            },
-        },
-        {
-            title: "Variant 2",
-            dataIndex: "variantNames",
-            key: "variant2",
-            render: (value, record) => {
-                const percentage = getVotesPercentage(record, 1)
-                return (
-                    <div>
-                        <Statistic
-                            className={classes.stat}
-                            value={percentage}
-                            precision={percentage <= 99 ? 2 : 1}
-                            suffix="%"
-                        />
-                        <div
-                            style={{cursor: "pointer"}}
-                            onClick={() => handleNavigation(value[1], record.revisions[1])}
-                        >
-                            (
-                            {variantNameWithRev({
-                                variant_name: value[1],
-                                revision: record.revisions[1],
-                            })}
-                            )
-                        </div>
-                    </div>
-                )
-            },
-        },
-        {
-            title: "Both are good",
-            dataIndex: "positive",
-            key: "positive",
-            render: (value: any, record: HumanEvaluationListTableDataType) => {
-                let percentage = record.votesData.positive_votes.percentage
-                return (
-                    <span>
-                        <Statistic
-                            className={classes.statGood}
-                            value={percentage}
-                            precision={percentage <= 99 ? 2 : 1}
-                            suffix="%"
-                        />
-                    </span>
-                )
-            },
-        },
-        {
-            title: "Flag",
-            dataIndex: "flag",
-            key: "flag",
-            render: (value: any, record: HumanEvaluationListTableDataType) => {
-                let percentage = record.votesData.flag_votes.percentage
-                return (
-                    <span>
-                        <Statistic
-                            className={classes.statFlag}
-                            value={percentage}
-                            precision={percentage <= 99 ? 2 : 1}
-                            suffix="%"
-                        />
-                    </span>
-                )
-            },
-        },
-    ]
-
-    if (isDemo()) {
-        columns.push({
-            title: "User",
-            dataIndex: ["user", "username"],
-            key: "username",
-        })
-    }
-
-    columns.push(
-        ...[
-            {
-                title: "Created at",
-                dataIndex: "createdAt",
-                key: "createdAt",
-                width: "300",
-            },
-            {
-                title: "Action",
-                dataIndex: "action",
-                key: "action",
-                render: (value: any, record: HumanEvaluationListTableDataType, index: number) => {
-                    let actionText = "View evaluation"
-                    if (record.status !== EvaluationFlow.EVALUATION_FINISHED) {
-                        actionText = "Continue evaluation"
-                    }
-                    return (
-                        <div className="hover-button-wrapper">
-                            <Button type="primary" onClick={() => onCompleteEvaluation(record)}>
-                                {actionText}
-                            </Button>
-                        </div>
-                    )
-                },
-            },
-        ],
-    )
-
-    const rowSelection = {
-        onChange: (selectedRowKeys: React.Key[]) => {
-            setSelectedRowKeys(selectedRowKeys)
-        },
-    }
-
-    const onDelete = async () => {
-        const evaluationsIds = selectedRowKeys.map((key) => key.toString())
-        try {
-            await deleteEvaluations(evaluationsIds)
-            setEvaluationsList((prevEvaluationsList) =>
-                prevEvaluationsList.filter(
-                    (evaluation) => !evaluationsIds.includes(evaluation.key),
-                ),
-            )
-
-            setSelectedRowKeys([])
-        } catch {}
-    }
-
-    return (
-        <div>
-            <div className={classes.btnContainer}>
-                <Button onClick={onDelete} disabled={selectedRowKeys.length == 0}>
-                    <DeleteOutlined key="delete" />
-                    Delete
-                </Button>
-                <Button
-                    type="primary"
-                    data-cy="new-annotation-modal-button"
-                    onClick={() => setIsEvalModalOpen(true)}
-                >
-                    New Evaluation
-                </Button>
-            </div>
-
-            <div className={classes.container}>
-                <Title level={3}>A/B Test Results</Title>
-            </div>
-
-            <Spin spinning={fetchingEvaluations}>
-                <Table
-                    rowSelection={{
-                        type: selectionType,
-                        ...rowSelection,
-                    }}
-                    className="ph-no-capture"
-                    columns={columns}
-                    dataSource={evaluationsList}
-                />
-            </Spin>
-        </div>
-    )
-}
diff --git a/agenta-web/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx b/agenta-web/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
index 142ea39a0..084bf22fb 100644
--- a/agenta-web/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
+++ b/agenta-web/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
@@ -5,7 +5,6 @@ import {createNewEvaluation} from "@/services/human-evaluations/api"
 import {isDemo} from "@/lib/helpers/utils"
 import {Button, Col, Dropdown, MenuProps, Modal, ModalProps, Row, Spin, message} from "antd"
 import {getErrorMessage} from "@/lib/helpers/errorHandler"
-import {DownOutlined} from "@ant-design/icons"
 import {EvaluationType} from "@/lib/enums"
 import {PERMISSION_ERR_MSG} from "@/lib/helpers/axiosConfig"
 import {getAllVariantParameters} from "@/lib/helpers/variantHelper"
@@ -15,6 +14,7 @@ import {createUseStyles} from "react-jss"
 import EvaluationErrorModal from "../Evaluations/EvaluationErrorModal"
 import {dynamicComponent} from "@/lib/helpers/dynamic"
 import {useLoadTestsetsList} from "@/services/testsets/api"
+import {CaretDown, Play} from "@phosphor-icons/react"
 
 const useStyles = createUseStyles((theme: JSSTheme) => ({
     evaluationContainer: {
@@ -111,6 +111,11 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({
         alignItems: "center",
         justifyContent: "space-between",
     },
+    dropdownItemLabels: {
+        fontSize: theme.fontSizeSM,
+        lineHeight: theme.lineHeightSM,
+        color: theme.colorTextDescription,
+    },
 }))
 
 interface HumanEvaluationModalProps {
@@ -278,7 +283,15 @@ const HumanEvaluationModal = ({
                 filteredVariants.push({
                     label: (
                         <>
-                            <div data-cy={`variant-${idx}`}>{variant.variantName}</div>
+                            <div
+                                data-cy={`variant-${idx}`}
+                                className="flex items-center justify-between"
+                            >
+                                <span>{variant.variantName}</span>
+                                <span className={classes.dropdownItemLabels}>
+                                    #{variant.variantId.split("-")[0]}
+                                </span>
+                            </div>
                         </>
                     ),
                     key: label,
@@ -340,9 +353,9 @@ const HumanEvaluationModal = ({
         setVariants(selectedVariants)
 
         if (evaluationType === EvaluationType.human_a_b_testing) {
-            router.push(`/apps/${appId}/annotations/human_a_b_testing/${evaluationTableId}`)
+            router.push(`/apps/${appId}/evaluations/human_a_b_testing/${evaluationTableId}`)
         } else if (evaluationType === EvaluationType.single_model_test) {
-            router.push(`/apps/${appId}/annotations/single_model_test/${evaluationTableId}`)
+            router.push(`/apps/${appId}/evaluations/single_model_test/${evaluationTableId}`)
         }
     }
 
@@ -356,7 +369,7 @@ const HumanEvaluationModal = ({
                     setSelectedTestset({name: "Select a Test set"})
                     setSelectedVariants(new Array(1).fill({variantName: "Select a variant"}))
                 }}
-                title="Start a New Evaluation"
+                title="New Evaluation"
                 footer={null}
             >
                 <Spin spinning={areAppVariantsLoading}>
@@ -373,7 +386,7 @@ const HumanEvaluationModal = ({
                                     >
                                         <div className={classes.dropdownStyles}>
                                             {selectedTestset.name}
-                                            <DownOutlined />
+                                            <CaretDown size={16} />
                                         </div>
                                     </Button>
                                 </Dropdown>
@@ -393,7 +406,7 @@ const HumanEvaluationModal = ({
                                             <div className={classes.dropdownStyles}>
                                                 {selectedVariants[index]?.variantName ||
                                                     "Select a variant"}
-                                                <DownOutlined />
+                                                <CaretDown size={16} />
                                             </div>
                                         </Button>
                                     </Dropdown>
@@ -430,6 +443,8 @@ const HumanEvaluationModal = ({
                                         onClick={onStartEvaluation}
                                         type="primary"
                                         data-cy="start-new-evaluation-button"
+                                        icon={<Play size={14} />}
+                                        className="flex items-center"
                                     >
                                         Start
                                     </Button>
diff --git a/agenta-web/src/components/pages/overview/abTestingEvaluation/AbTestingEvalOverview.tsx b/agenta-web/src/components/HumanEvaluations/AbTestingEvaluation.tsx
similarity index 78%
rename from agenta-web/src/components/pages/overview/abTestingEvaluation/AbTestingEvalOverview.tsx
rename to agenta-web/src/components/HumanEvaluations/AbTestingEvaluation.tsx
index 958997b03..bae2f29c8 100644
--- a/agenta-web/src/components/pages/overview/abTestingEvaluation/AbTestingEvalOverview.tsx
+++ b/agenta-web/src/components/HumanEvaluations/AbTestingEvaluation.tsx
@@ -1,5 +1,5 @@
 import DeleteEvaluationModal from "@/components/DeleteEvaluationModal/DeleteEvaluationModal"
-import {HumanEvaluationListTableDataType} from "@/components/Evaluations/HumanEvaluationResult"
+import {HumanEvaluationListTableDataType, JSSTheme} from "@/lib/Types"
 import HumanEvaluationModal from "@/components/HumanEvaluationModal/HumanEvaluationModal"
 import {EvaluationType} from "@/lib/enums"
 import {getColorFromStr} from "@/lib/helpers/colors"
@@ -7,14 +7,13 @@ import {getVotesPercentage} from "@/lib/helpers/evaluate"
 import {getInitials, isDemo} from "@/lib/helpers/utils"
 import {variantNameWithRev} from "@/lib/helpers/variantHelper"
 import {abTestingEvaluationTransformer} from "@/lib/transformers"
-import {JSSTheme} from "@/lib/Types"
 import {
     deleteEvaluations,
     fetchAllLoadEvaluations,
     fetchEvaluationResults,
 } from "@/services/human-evaluations/api"
 import {MoreOutlined, PlusOutlined} from "@ant-design/icons"
-import {Database, GearSix, Note, Rocket, Trash} from "@phosphor-icons/react"
+import {Database, GearSix, Note, Plus, Rocket, Trash} from "@phosphor-icons/react"
 import {Avatar, Button, Dropdown, message, Space, Spin, Statistic, Table, Typography} from "antd"
 import {ColumnsType} from "antd/es/table"
 import {useRouter} from "next/router"
@@ -65,9 +64,13 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({
             color: theme.colorSuccess,
         },
     },
+    button: {
+        display: "flex",
+        alignItems: "center",
+    },
 }))
 
-const AbTestingEvalOverview = () => {
+const AbTestingEvaluation = ({viewType}: {viewType: "evaluation" | "overview"}) => {
     const classes = useStyles()
     const router = useRouter()
     const appId = router.query.app_id as string
@@ -77,6 +80,8 @@ const AbTestingEvalOverview = () => {
     const [isEvalModalOpen, setIsEvalModalOpen] = useState(false)
     const [selectedEvalRecord, setSelectedEvalRecord] = useState<HumanEvaluationListTableDataType>()
     const [isDeleteEvalModalOpen, setIsDeleteEvalModalOpen] = useState(false)
+    const [isDeleteMultipleEvalModalOpen, setIsDeleteMultipleEvalModalOpen] = useState(false)
+    const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
 
     useEffect(() => {
         if (!appId) return
@@ -105,9 +110,8 @@ const AbTestingEvalOverview = () => {
                             new Date(b.createdAt || 0).getTime() -
                             new Date(a.createdAt || 0).getTime(),
                     )
-                    .slice(0, 5)
 
-                setEvaluationsList(results)
+                setEvaluationsList(viewType === "overview" ? results.slice(0, 5) : results)
             } catch (error) {
                 console.error(error)
             } finally {
@@ -122,6 +126,31 @@ const AbTestingEvalOverview = () => {
         router.push(`/apps/${appId}/playground?variant=${variantName}&revision=${revisionNum}`)
     }
 
+    const rowSelection = {
+        onChange: (selectedRowKeys: React.Key[]) => {
+            setSelectedRowKeys(selectedRowKeys)
+        },
+    }
+
+    const handleDeleteMultipleEvaluations = async () => {
+        const evaluationsIds = selectedRowKeys.map((key) => key.toString())
+        try {
+            setFetchingEvaluations(true)
+            await deleteEvaluations(evaluationsIds)
+            setEvaluationsList((prevEvaluationsList) =>
+                prevEvaluationsList.filter(
+                    (evaluation) => !evaluationsIds.includes(evaluation.key),
+                ),
+            )
+            setSelectedRowKeys([])
+            message.success("Evaluations Deleted")
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setFetchingEvaluations(false)
+        }
+    }
+
     const handleDeleteEvaluation = async (record: HumanEvaluationListTableDataType) => {
         try {
             setFetchingEvaluations(true)
@@ -288,7 +317,7 @@ const AbTestingEvalOverview = () => {
                                         onClick: (e) => {
                                             e.domEvent.stopPropagation()
                                             router.push(
-                                                `/apps/${appId}/annotations/human_a_b_testing/${record.key}`,
+                                                `/apps/${appId}/evaluations/human_a_b_testing/${record.key}`,
                                             )
                                         },
                                     },
@@ -357,25 +386,64 @@ const AbTestingEvalOverview = () => {
 
     return (
         <div className={classes.container}>
-            <div className="flex items-center justify-between">
-                <Space>
-                    <Title>A/B Testing Evaluations</Title>
-                    <Button size="small" href={`/apps/${appId}/annotations/human_a_b_testing`}>
-                        View all
+            {viewType === "overview" ? (
+                <div className="flex items-center justify-between">
+                    <Space>
+                        <Title>Human A/B Testing</Title>
+                        <Button
+                            size="small"
+                            href={`/apps/${appId}/evaluations?selectedEvaluation=ab_testing_evaluation`}
+                        >
+                            View all
+                        </Button>
+                    </Space>
+
+                    <Button
+                        icon={<PlusOutlined />}
+                        size="small"
+                        onClick={() => setIsEvalModalOpen(true)}
+                    >
+                        Create new
+                    </Button>
+                </div>
+            ) : (
+                <div className="flex items-center justify-between">
+                    <Button
+                        type="primary"
+                        icon={<Plus size={14} />}
+                        className={classes.button}
+                        onClick={() => setIsEvalModalOpen(true)}
+                        data-cy="new-human-eval-modal-button"
+                    >
+                        Start new evaluation
                     </Button>
-                </Space>
 
-                <Button
-                    icon={<PlusOutlined />}
-                    size="small"
-                    onClick={() => setIsEvalModalOpen(true)}
-                >
-                    Create new
-                </Button>
-            </div>
+                    <Space>
+                        <Button
+                            danger
+                            type="text"
+                            icon={<Trash size={14} />}
+                            className={classes.button}
+                            onClick={() => setIsDeleteMultipleEvalModalOpen(true)}
+                            disabled={selectedRowKeys.length == 0}
+                        >
+                            Delete
+                        </Button>
+                    </Space>
+                </div>
+            )}
 
             <Spin spinning={fetchingEvaluations}>
                 <Table
+                    rowSelection={
+                        viewType === "evaluation"
+                            ? {
+                                  type: "checkbox",
+                                  columnWidth: 48,
+                                  ...rowSelection,
+                              }
+                            : undefined
+                    }
                     className="ph-no-capture"
                     columns={columns}
                     dataSource={evaluationsList}
@@ -386,7 +454,7 @@ const AbTestingEvalOverview = () => {
                         style: {cursor: "pointer"},
                         onClick: () =>
                             router.push(
-                                `/apps/${appId}/annotations/human_a_b_testing/${record.key}`,
+                                `/apps/${appId}/evaluations/human_a_b_testing/${record.key}`,
                             ),
                     })}
                 />
@@ -409,8 +477,20 @@ const AbTestingEvalOverview = () => {
                     evaluationType={"a/b testing evaluation"}
                 />
             )}
+
+            {isDeleteMultipleEvalModalOpen && (
+                <DeleteEvaluationModal
+                    open={isDeleteMultipleEvalModalOpen}
+                    onCancel={() => setIsDeleteMultipleEvalModalOpen(false)}
+                    onOk={async () => {
+                        await handleDeleteMultipleEvaluations()
+                        setIsDeleteMultipleEvalModalOpen(false)
+                    }}
+                    evaluationType={"a/b testing evaluation"}
+                />
+            )}
         </div>
     )
 }
 
-export default AbTestingEvalOverview
+export default AbTestingEvaluation
diff --git a/agenta-web/src/components/pages/overview/singleModelEvaluation/SingleModelEvalOverview.tsx b/agenta-web/src/components/HumanEvaluations/SingleModelEvaluation.tsx
similarity index 73%
rename from agenta-web/src/components/pages/overview/singleModelEvaluation/SingleModelEvalOverview.tsx
rename to agenta-web/src/components/HumanEvaluations/SingleModelEvaluation.tsx
index f6ce10479..763c4241b 100644
--- a/agenta-web/src/components/pages/overview/singleModelEvaluation/SingleModelEvalOverview.tsx
+++ b/agenta-web/src/components/HumanEvaluations/SingleModelEvaluation.tsx
@@ -14,7 +14,7 @@ import {
     fetchEvaluationResults,
 } from "@/services/human-evaluations/api"
 import {MoreOutlined, PlusOutlined} from "@ant-design/icons"
-import {Database, GearSix, Note, Rocket, Trash} from "@phosphor-icons/react"
+import {Database, GearSix, Note, Plus, Rocket, Trash} from "@phosphor-icons/react"
 import {Button, Dropdown, message, Space, Spin, Statistic, Table, Typography} from "antd"
 import {ColumnsType} from "antd/es/table"
 import {useRouter} from "next/router"
@@ -43,9 +43,13 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({
             color: theme.colorPrimary,
         },
     },
+    button: {
+        display: "flex",
+        alignItems: "center",
+    },
 }))
 
-const SingleModelEvalOverview = () => {
+const SingleModelEvaluation = ({viewType}: {viewType: "evaluation" | "overview"}) => {
     const classes = useStyles()
     const router = useRouter()
     const appId = router.query.app_id as string
@@ -58,6 +62,8 @@ const SingleModelEvalOverview = () => {
     const [selectedEvalRecord, setSelectedEvalRecord] =
         useState<SingleModelEvaluationListTableDataType>()
     const [isDeleteEvalModalOpen, setIsDeleteEvalModalOpen] = useState(false)
+    const [isDeleteEvalMultipleModalOpen, setIsDeleteEvalMultipleModalOpen] = useState(false)
+    const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
 
     useEffect(() => {
         if (!appId) return
@@ -89,9 +95,10 @@ const SingleModelEvalOverview = () => {
                             new Date(b?.createdAt ?? 0).getTime() -
                             new Date(a?.createdAt ?? 0).getTime(),
                     )
-                    .slice(0, 5)
 
-                setEvaluationsList(newEvalResults as any)
+                setEvaluationsList(
+                    viewType === "overview" ? newEvalResults.slice(0, 5) : (newEvalResults as any),
+                )
             } catch (error) {
                 console.error(error)
             } finally {
@@ -102,6 +109,31 @@ const SingleModelEvalOverview = () => {
         fetchEvaluations()
     }, [appId])
 
+    const rowSelection = {
+        onChange: (selectedRowKeys: React.Key[]) => {
+            setSelectedRowKeys(selectedRowKeys)
+        },
+    }
+
+    const handleDeleteMultipleEvaluations = async () => {
+        const evaluationsIds = selectedRowKeys.map((key) => key.toString())
+        try {
+            setFetchingEvaluations(true)
+            await deleteEvaluations(evaluationsIds)
+            setEvaluationsList((prevEvaluationsList) =>
+                prevEvaluationsList.filter(
+                    (evaluation) => !evaluationsIds.includes(evaluation.key),
+                ),
+            )
+            setSelectedRowKeys([])
+            message.success("Evaluations Deleted")
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setFetchingEvaluations(false)
+        }
+    }
+
     const handleNavigation = (variantName: string, revisionNum: string) => {
         router.push(`/apps/${appId}/playground?variant=${variantName}&revision=${revisionNum}`)
     }
@@ -221,7 +253,7 @@ const SingleModelEvalOverview = () => {
                                     onClick: (e) => {
                                         e.domEvent.stopPropagation()
                                         router.push(
-                                            `/apps/${appId}/annotations/single_model_test/${record.key}`,
+                                            `/apps/${appId}/evaluations/single_model_test/${record.key}`,
                                         )
                                     },
                                 },
@@ -275,26 +307,65 @@ const SingleModelEvalOverview = () => {
 
     return (
         <div className={classes.container}>
-            <div className="flex items-center justify-between">
-                <Space>
-                    <Title>Single Model Evaluations</Title>
+            {viewType === "overview" ? (
+                <div className="flex items-center justify-between">
+                    <Space>
+                        <Title>Human Annotation</Title>
 
-                    <Button size="small" href={`/apps/${appId}/annotations/single_model_test`}>
-                        View all
+                        <Button
+                            size="small"
+                            href={`/apps/${appId}/evaluations?selectedEvaluation=single_model_evaluation`}
+                        >
+                            View all
+                        </Button>
+                    </Space>
+
+                    <Button
+                        icon={<PlusOutlined />}
+                        size="small"
+                        onClick={() => setIsEvalModalOpen(true)}
+                    >
+                        Create new
+                    </Button>
+                </div>
+            ) : (
+                <div className="flex items-center justify-between">
+                    <Button
+                        type="primary"
+                        icon={<Plus size={14} />}
+                        className={classes.button}
+                        onClick={() => setIsEvalModalOpen(true)}
+                        data-cy="new-human-eval-modal-button"
+                    >
+                        Start new evaluation
                     </Button>
-                </Space>
 
-                <Button
-                    icon={<PlusOutlined />}
-                    size="small"
-                    onClick={() => setIsEvalModalOpen(true)}
-                >
-                    Create new
-                </Button>
-            </div>
+                    <Space>
+                        <Button
+                            danger
+                            type="text"
+                            icon={<Trash size={14} />}
+                            className={classes.button}
+                            onClick={() => setIsDeleteEvalMultipleModalOpen(true)}
+                            disabled={selectedRowKeys.length == 0}
+                        >
+                            Delete
+                        </Button>
+                    </Space>
+                </div>
+            )}
 
             <Spin spinning={fetchingEvaluations}>
                 <Table
+                    rowSelection={
+                        viewType === "evaluation"
+                            ? {
+                                  type: "checkbox",
+                                  columnWidth: 48,
+                                  ...rowSelection,
+                              }
+                            : undefined
+                    }
                     className="ph-no-capture"
                     columns={columns}
                     dataSource={evaluationsList}
@@ -305,7 +376,7 @@ const SingleModelEvalOverview = () => {
                         style: {cursor: "pointer"},
                         onClick: () =>
                             router.push(
-                                `/apps/${appId}/annotations/single_model_test/${record.key}`,
+                                `/apps/${appId}/evaluations/single_model_test/${record.key}`,
                             ),
                     })}
                 />
@@ -328,8 +399,19 @@ const SingleModelEvalOverview = () => {
                     evaluationType={"single model evaluation"}
                 />
             )}
+            {isDeleteEvalMultipleModalOpen && (
+                <DeleteEvaluationModal
+                    open={isDeleteEvalMultipleModalOpen}
+                    onCancel={() => setIsDeleteEvalMultipleModalOpen(false)}
+                    onOk={async () => {
+                        await handleDeleteMultipleEvaluations()
+                        setIsDeleteEvalMultipleModalOpen(false)
+                    }}
+                    evaluationType={"single model evaluation"}
+                />
+            )}
         </div>
     )
 }
 
-export default SingleModelEvalOverview
+export default SingleModelEvaluation
diff --git a/agenta-web/src/components/Layout/Layout.tsx b/agenta-web/src/components/Layout/Layout.tsx
index 1e61ddbc1..5808fc852 100644
--- a/agenta-web/src/components/Layout/Layout.tsx
+++ b/agenta-web/src/components/Layout/Layout.tsx
@@ -166,57 +166,70 @@ const App: React.FC<LayoutProps> = ({children}) => {
         <NoSSRWrapper>
             {typeof window === "undefined" ? null : (
                 <ThemeProvider theme={{...token, isDark: isDarkTheme}}>
-                    <Layout hasSider className={classes.layout}>
-                        <Sidebar />
+                    {router.pathname.includes("/auth") ||
+                    router.pathname.includes("/post-signup") ? (
                         <Layout className={classes.layout}>
-                            <div>
-                                <div className={classes.breadcrumbContainer}>
-                                    <Breadcrumb
-                                        items={[
-                                            {
-                                                title: (
-                                                    <div className="flex items-center gap-1">
-                                                        <Lightning size={16} />
-                                                        <Link href="/apps">Apps</Link>
-                                                    </div>
-                                                ),
-                                            },
-                                            {title: capitalizedAppName},
-                                        ]}
-                                    />
-                                    <div className={classes.topRightBar}>
-                                        <Text>agenta v{packageJsonData.version}</Text>
+                            <ErrorBoundary FallbackComponent={ErrorFallback}>
+                                {children}
+                                {contextHolder}
+                            </ErrorBoundary>
+                        </Layout>
+                    ) : (
+                        <Layout hasSider className={classes.layout}>
+                            <Sidebar />
+                            <Layout className={classes.layout}>
+                                <div>
+                                    <div className={classes.breadcrumbContainer}>
+                                        <Breadcrumb
+                                            items={[
+                                                {
+                                                    title: (
+                                                        <div className="flex items-center gap-1">
+                                                            <Lightning size={16} />
+                                                            <Link href="/apps">Apps</Link>
+                                                        </div>
+                                                    ),
+                                                },
+                                                {title: capitalizedAppName},
+                                            ]}
+                                        />
+                                        <div className={classes.topRightBar}>
+                                            <Text>agenta v{packageJsonData.version}</Text>
+                                        </div>
                                     </div>
+                                    <Content className={classes.content}>
+                                        <ErrorBoundary FallbackComponent={ErrorFallback}>
+                                            {children}
+                                            {contextHolder}
+                                        </ErrorBoundary>
+                                    </Content>
                                 </div>
-                                <Content className={classes.content}>
-                                    <ErrorBoundary FallbackComponent={ErrorFallback}>
-                                        {children}
-                                        {contextHolder}
-                                    </ErrorBoundary>
-                                </Content>
-                            </div>
-                            <Footer ref={footerRef} className={classes.footer}>
-                                <Space className={classes.footerLeft} size={10}>
-                                    <Link
-                                        href={"https://github.com/Agenta-AI/agenta"}
-                                        target="_blank"
-                                    >
-                                        <GithubFilled className={classes.footerLinkIcon} />
-                                    </Link>
-                                    <Link
-                                        href={"https://www.linkedin.com/company/agenta-ai/"}
-                                        target="_blank"
-                                    >
-                                        <LinkedinFilled className={classes.footerLinkIcon} />
-                                    </Link>
-                                    <Link href={"https://twitter.com/agenta_ai"} target="_blank">
-                                        <TwitterOutlined className={classes.footerLinkIcon} />
-                                    </Link>
-                                </Space>
-                                <div>Copyright © {new Date().getFullYear()} | Agenta.</div>
-                            </Footer>
+                                <Footer ref={footerRef} className={classes.footer}>
+                                    <Space className={classes.footerLeft} size={10}>
+                                        <Link
+                                            href={"https://github.com/Agenta-AI/agenta"}
+                                            target="_blank"
+                                        >
+                                            <GithubFilled className={classes.footerLinkIcon} />
+                                        </Link>
+                                        <Link
+                                            href={"https://www.linkedin.com/company/agenta-ai/"}
+                                            target="_blank"
+                                        >
+                                            <LinkedinFilled className={classes.footerLinkIcon} />
+                                        </Link>
+                                        <Link
+                                            href={"https://twitter.com/agenta_ai"}
+                                            target="_blank"
+                                        >
+                                            <TwitterOutlined className={classes.footerLinkIcon} />
+                                        </Link>
+                                    </Space>
+                                    <div>Copyright © {new Date().getFullYear()} | Agenta.</div>
+                                </Footer>
+                            </Layout>
                         </Layout>
-                    </Layout>
+                    )}
                 </ThemeProvider>
             )}
         </NoSSRWrapper>
diff --git a/agenta-web/src/components/Layout/ThemeContextProvider.tsx b/agenta-web/src/components/Layout/ThemeContextProvider.tsx
index 4f4da66fd..5b9ce1041 100644
--- a/agenta-web/src/components/Layout/ThemeContextProvider.tsx
+++ b/agenta-web/src/components/Layout/ThemeContextProvider.tsx
@@ -55,7 +55,7 @@ const ThemeContextProvider: React.FC<PropsWithChildren> = ({children}) => {
         setAppTheme(getAppTheme(themeMode))
     }, [themeMode])
 
-    const val = appTheme || ThemeMode.Light
+    const val = ThemeMode.Light
 
     return (
         <ThemeContext.Provider
@@ -67,8 +67,7 @@ const ThemeContextProvider: React.FC<PropsWithChildren> = ({children}) => {
         >
             <ConfigProvider
                 theme={{
-                    algorithm:
-                        val === ThemeMode.Dark ? theme.darkAlgorithm : theme.defaultAlgorithm,
+                    algorithm: theme.defaultAlgorithm,
                     ...antdTokens,
                 }}
             >
diff --git a/agenta-web/src/components/NoResultsFound/NoResultsFound.tsx b/agenta-web/src/components/NoResultsFound/NoResultsFound.tsx
new file mode 100644
index 000000000..9b173ef30
--- /dev/null
+++ b/agenta-web/src/components/NoResultsFound/NoResultsFound.tsx
@@ -0,0 +1,37 @@
+import React from "react"
+import {Typography} from "antd"
+import Image from "next/image"
+import {createUseStyles} from "react-jss"
+import {JSSTheme} from "@/lib/Types"
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    notFound: {
+        width: "100%",
+        display: "flex",
+        flexDirection: "column",
+        alignItems: "center",
+        justifyContent: "center",
+        padding: "80px 0px",
+        gap: 16,
+        "& > span": {
+            lineHeight: theme.lineHeightHeading4,
+            fontSize: theme.fontSizeHeading4,
+            fontWeight: theme.fontWeightMedium,
+        },
+    },
+}))
+
+const NoResultsFound = ({className}: {className?: string}) => {
+    const classes = useStyles()
+    return (
+        <div className={`${classes.notFound} ${className}`}>
+            <Image src="/assets/not-found.png" alt="not-found" width={240} height={210} />
+            <Typography.Text>No Results found</Typography.Text>
+            <Typography.Paragraph type="secondary">
+                No results match the search criteria.
+            </Typography.Paragraph>
+        </div>
+    )
+}
+
+export default NoResultsFound
diff --git a/agenta-web/src/components/Sidebar/config.tsx b/agenta-web/src/components/Sidebar/config.tsx
index 07c4bc06b..bdf9a25a6 100644
--- a/agenta-web/src/components/Sidebar/config.tsx
+++ b/agenta-web/src/components/Sidebar/config.tsx
@@ -93,51 +93,11 @@ export const useSidebarConfig = () => {
             isHidden: !appId && !recentlyVisitedAppId,
         },
         {
-            key: "app-auto-evaluations-link",
-            title: "Automatic Evaluation",
-            icon: <ChartDonut size={16} />,
+            key: "app-evaluations-link",
+            title: "Evaluations",
+            link: `/apps/${appId || recentlyVisitedAppId}/evaluations`,
             isHidden: !appId && !recentlyVisitedAppId,
-            submenu: [
-                {
-                    key: "app-evaluators-link",
-                    title: "Evaluators",
-                    tooltip:
-                        "Select and customize evaluators such as custom code or regex evaluators.",
-                    link: `/apps/${appId || recentlyVisitedAppId}/evaluations/new-evaluator`,
-                    icon: <Dot size={16} />,
-                },
-                {
-                    key: "app-evaluations-results-link",
-                    title: "Results",
-                    tooltip: "Choose your variants and evaluators to start the evaluation process.",
-                    link: `/apps/${appId || recentlyVisitedAppId}/evaluations/results`,
-                    icon: <Dot size={16} />,
-                },
-            ],
-        },
-        {
-            key: "app-human-evaluations-link",
-            title: "Human Evaluation",
-            icon: <PersonSimpleRun size={16} />,
-            isHidden: !appId && !recentlyVisitedAppId,
-            submenu: [
-                {
-                    key: "app-human-ab-testing-link",
-                    title: "A/B Evaluation",
-                    tooltip:
-                        "A/B tests allow you to compare the performance of two different variants manually.",
-                    link: `/apps/${appId || recentlyVisitedAppId}/annotations/human_a_b_testing`,
-                    icon: <Dot size={16} />,
-                },
-                {
-                    key: "app-single-model-test-link",
-                    title: "Single Model Eval.",
-                    tooltip:
-                        "Single model test allows you to score the performance of a single LLM app manually.",
-                    link: `/apps/${appId || recentlyVisitedAppId}/annotations/single_model_test`,
-                    icon: <Dot size={16} />,
-                },
-            ],
+            icon: <ChartDonut size={16} />,
         },
         {
             key: "app-observability-link",
diff --git a/agenta-web/src/components/TestSetTable/TableHeaderComponent.tsx b/agenta-web/src/components/TestSetTable/TableHeaderComponent.tsx
index 857f6f2bc..a69bb1f95 100644
--- a/agenta-web/src/components/TestSetTable/TableHeaderComponent.tsx
+++ b/agenta-web/src/components/TestSetTable/TableHeaderComponent.tsx
@@ -105,6 +105,7 @@ const TableHeaderComponent = ({
             setInputValues(scopedInputValues)
             updateTable(scopedInputValues)
             setIsEditInputOpen(false)
+            setIsDataChanged(true)
         }
     }
 
@@ -112,7 +113,6 @@ const TableHeaderComponent = ({
         const values = [...inputValues]
         values[index] = event.target.value
         setScopedInputValues(values)
-        setIsDataChanged(true)
     }
 
     const onAddColumn = () => {
diff --git a/agenta-web/src/components/TestSetTable/TestsetTable.tsx b/agenta-web/src/components/TestSetTable/TestsetTable.tsx
index 872076b7c..2e833be06 100644
--- a/agenta-web/src/components/TestSetTable/TestsetTable.tsx
+++ b/agenta-web/src/components/TestSetTable/TestsetTable.tsx
@@ -4,8 +4,7 @@ import {IHeaderParams} from "ag-grid-community"
 import {createUseStyles} from "react-jss"
 import {Button, Input, Typography, message} from "antd"
 import TestsetMusHaveNameModal from "./InsertTestsetNameModal"
-import {fetchVariants} from "@/services/api"
-import {createNewTestset, fetchTestset, updateTestset} from "@/services/testsets/api"
+import {fetchTestset, updateTestset} from "@/services/testsets/api"
 import {useRouter} from "next/router"
 import {useAppTheme} from "../Layout/ThemeContextProvider"
 import useBlockNavigation from "@/hooks/useBlockNavigation"
@@ -13,7 +12,6 @@ import {useUpdateEffect} from "usehooks-ts"
 import useStateCallback from "@/hooks/useStateCallback"
 import {AxiosResponse} from "axios"
 import EditRowModal from "./EditRowModal"
-import {getVariantInputParameters} from "@/lib/helpers/variantHelper"
 import {convertToCsv, downloadCsv} from "@/lib/helpers/fileManipulations"
 import {NoticeType} from "antd/es/message/interface"
 import {GenericObject, KeyValuePair} from "@/lib/Types"
@@ -21,7 +19,7 @@ import TableCellsRenderer from "./TableCellsRenderer"
 import TableHeaderComponent from "./TableHeaderComponent"
 
 type TestsetTableProps = {
-    mode: "create" | "edit"
+    mode: "edit"
 }
 export type ColumnDefsType = {field: string; [key: string]: any}
 
@@ -85,7 +83,6 @@ const TestsetTable: React.FC<TestsetTableProps> = ({mode}) => {
     const [inputValues, setInputValues] = useStateCallback(columnDefs.map((col) => col.field))
     const [focusedRowData, setFocusedRowData] = useState<GenericObject>()
     const [writeMode, setWriteMode] = useState(mode)
-    const [testsetId, setTestsetId] = useState(undefined)
     const gridRef = useRef<any>(null)
 
     const [selectedRow, setSelectedRow] = useState([])
@@ -119,19 +116,10 @@ const TestsetTable: React.FC<TestsetTableProps> = ({mode}) => {
         async function applyColData(colData: {field: string}[] = []) {
             const newColDefs = createNewColDefs(colData)
             setColumnDefs(newColDefs)
-            if (writeMode === "create") {
-                const initialRowData = Array(3).fill({})
-                const separateRowData = initialRowData.map(() => {
-                    return colData.reduce((acc, curr) => ({...acc, [curr.field]: ""}), {})
-                })
-
-                setRowData(separateRowData)
-            }
             setInputValues(newColDefs.filter((col) => !!col.field).map((col) => col.field))
         }
 
         if (writeMode === "edit" && testset_id) {
-            setIsDataChanged(true)
             fetchTestset(testset_id as string).then((data) => {
                 setTestsetName(data.name)
                 setRowData(data.csvdata)
@@ -141,19 +129,6 @@ const TestsetTable: React.FC<TestsetTableProps> = ({mode}) => {
                     })),
                 )
             })
-        } else if (writeMode === "create" && appId) {
-            setIsDataChanged(true)
-            ;(async () => {
-                const backendVariants = await fetchVariants(appId)
-                const variant = backendVariants[0]
-                const inputParams = await getVariantInputParameters(appId, variant)
-                const colData = inputParams.map((param) => ({field: param.name}))
-                colData.push({field: "correct_answer"})
-
-                applyColData(colData)
-            })().catch(() => {
-                applyColData([])
-            })
         }
     }, [writeMode, testset_id, appId])
 
@@ -233,24 +208,11 @@ const TestsetTable: React.FC<TestsetTableProps> = ({mode}) => {
                 }
             }
 
-            if (writeMode === "create") {
-                if (!testsetName) {
-                    setIsModalOpen(true)
-                    setIsLoading(false)
-                } else {
-                    const response = await createNewTestset(appId, testsetName, rowData)
-                    afterSave(response)
-                    setTestsetId(response.data.id)
-                }
-            } else if (writeMode === "edit") {
+            if (writeMode === "edit") {
                 if (!testsetName) {
                     setIsModalOpen(true)
                 } else {
-                    const response = await updateTestset(
-                        (testsetId || testset_id) as string,
-                        testsetName,
-                        rowData,
-                    )
+                    const response = await updateTestset(testset_id as string, testsetName, rowData)
                     afterSave(response)
                 }
             }
diff --git a/agenta-web/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx b/agenta-web/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx
new file mode 100644
index 000000000..526123239
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx
@@ -0,0 +1,43 @@
+import {EvaluationError, JSSTheme, TypedValue} from "@/lib/Types"
+import {InfoCircleOutlined} from "@ant-design/icons"
+import {Button, Popover, Typography} from "antd"
+import React from "react"
+import {createUseStyles} from "react-jss"
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    errModalStackTrace: {
+        maxWidth: 300,
+        "& code": {
+            display: "block",
+            width: "100%",
+        },
+    },
+}))
+
+const EvaluationErrorPopover = (result: {
+    result: TypedValue & {
+        error: null | EvaluationError
+    }
+}) => {
+    const classes = useStyles()
+
+    return (
+        <Popover
+            placement="bottom"
+            trigger={"click"}
+            arrow={false}
+            content={
+                <Typography.Paragraph code className={classes.errModalStackTrace}>
+                    {result.result.error?.stacktrace}
+                </Typography.Paragraph>
+            }
+            title={result.result.error?.message}
+        >
+            <Button onClick={(e) => e.stopPropagation()} icon={<InfoCircleOutlined />} type="link">
+                Read more
+            </Button>
+        </Popover>
+    )
+}
+
+export default EvaluationErrorPopover
diff --git a/agenta-web/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx b/agenta-web/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
index 884a43b15..f008293c0 100644
--- a/agenta-web/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
+++ b/agenta-web/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
@@ -1,7 +1,7 @@
 import {JSSTheme} from "@/lib/Types"
 import {CheckOutlined, DownOutlined} from "@ant-design/icons"
 import {Button, Dropdown, Space} from "antd"
-import {ItemType} from "antd/es/menu/hooks/useItems"
+import {ItemType} from "antd/es/menu/interface"
 import React from "react"
 import {createUseStyles} from "react-jss"
 import {ColDef} from "ag-grid-community"
diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/NewEvaluationModal.tsx b/agenta-web/src/components/pages/evaluations/NewEvaluation/NewEvaluationModal.tsx
similarity index 100%
rename from agenta-web/src/components/pages/evaluations/evaluationResults/NewEvaluationModal.tsx
rename to agenta-web/src/components/pages/evaluations/NewEvaluation/NewEvaluationModal.tsx
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx
new file mode 100644
index 000000000..687640387
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx
@@ -0,0 +1,595 @@
+import {_Evaluation, EvaluationStatus} from "@/lib/Types"
+import {
+    ArrowsLeftRight,
+    Database,
+    Gauge,
+    GearSix,
+    Note,
+    Plus,
+    Rocket,
+    Trash,
+} from "@phosphor-icons/react"
+import {Button, Dropdown, DropdownProps, message, Space, Table, Tag, Typography} from "antd"
+import React, {useEffect, useMemo, useRef, useState} from "react"
+import {createUseStyles} from "react-jss"
+import {ColumnsType} from "antd/es/table"
+import {MoreOutlined} from "@ant-design/icons"
+import EvaluatorsModal from "./EvaluatorsModal/EvaluatorsModal"
+import {useQueryParam} from "@/hooks/useQuery"
+import {formatDay} from "@/lib/helpers/dateTimeHelper"
+import {calcEvalDuration, getTypedValue} from "@/lib/helpers/evaluate"
+import {variantNameWithRev} from "@/lib/helpers/variantHelper"
+import NewEvaluationModal from "@/components/pages/evaluations/NewEvaluation/NewEvaluationModal"
+import {
+    deleteEvaluations,
+    fetchAllEvaluations,
+    fetchAllEvaluatorConfigs,
+    fetchAllEvaluators,
+    fetchEvaluationStatus,
+} from "@/services/evaluations/api"
+import {useAppId} from "@/hooks/useAppId"
+import {useAtom} from "jotai"
+import {evaluatorConfigsAtom, evaluatorsAtom} from "@/lib/atoms/evaluation"
+import DeleteEvaluationModal from "@/components/DeleteEvaluationModal/DeleteEvaluationModal"
+import {useRouter} from "next/router"
+import EditColumns, {generateEditItems} from "./Filters/EditColumns"
+import StatusRenderer from "../cellRenderers/StatusRenderer"
+import {runningStatuses} from "../../evaluations/cellRenderers/cellRenderers"
+import {useUpdateEffect} from "usehooks-ts"
+import {shortPoll} from "@/lib/helpers/utils"
+import {getFilterParams} from "./Filters/SearchFilter"
+import {uniqBy} from "lodash"
+import EvaluationErrorPopover from "../EvaluationErrorProps/EvaluationErrorPopover"
+import dayjs from "dayjs"
+
+const useStyles = createUseStyles(() => ({
+    button: {
+        display: "flex",
+        alignItems: "center",
+    },
+}))
+
+const AutoEvaluation = () => {
+    const classes = useStyles()
+    const appId = useAppId()
+    const router = useRouter()
+
+    const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
+    const [evaluationList, setEvaluationList] = useState<_Evaluation[]>([])
+    const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
+    const [isEvalLoading, setIsEvalLoading] = useState(false)
+    const [evaluators, setEvaluators] = useAtom(evaluatorsAtom)
+    const setEvaluatorConfigs = useAtom(evaluatorConfigsAtom)[1]
+    const [selectedEvalRecord, setSelectedEvalRecord] = useState<_Evaluation>()
+    const [isDeleteEvalModalOpen, setIsDeleteEvalModalOpen] = useState(false)
+    const [isDeleteEvalMultipleModalOpen, setIsDeleteEvalMultipleModalOpen] = useState(false)
+    const [editColumns, setEditColumns] = useState<string[]>([])
+    const [isFilterColsDropdownOpen, setIsFilterColsDropdownOpen] = useState(false)
+    const [isEditEvalConfigOpen, setIsEditEvalConfigOpen] = useState(false)
+    const [isConfigEvaluatorModalOpen, setIsConfigEvaluatorModalOpen] = useQueryParam(
+        "configureEvaluatorModal",
+        "",
+    )
+    const stoppers = useRef<Function>()
+
+    const runningEvaluationIds = useMemo(
+        () =>
+            evaluationList
+                .filter((item) => runningStatuses.includes(item.status.value))
+                .map((item) => item.id),
+        [evaluationList],
+    )
+
+    useUpdateEffect(() => {
+        stoppers.current?.()
+
+        if (runningEvaluationIds.length) {
+            stoppers.current = shortPoll(
+                () =>
+                    Promise.all(runningEvaluationIds.map((id) => fetchEvaluationStatus(id)))
+                        .then((res) => {
+                            setEvaluationList((prev) => {
+                                const newEvals = [...prev]
+                                runningEvaluationIds.forEach((id, ix) => {
+                                    const index = newEvals.findIndex((e) => e.id === id)
+                                    if (index !== -1) {
+                                        newEvals[index].status = res[ix].status
+                                        newEvals[index].duration = calcEvalDuration(newEvals[index])
+                                    }
+                                })
+                                if (
+                                    res.some((item) => !runningStatuses.includes(item.status.value))
+                                )
+                                    fetchEvaluations()
+                                return newEvals
+                            })
+                        })
+                        .catch(console.error),
+                {delayMs: 2000, timeoutMs: Infinity},
+            ).stopper
+        }
+
+        return () => {
+            stoppers.current?.()
+        }
+    }, [JSON.stringify(runningEvaluationIds)])
+
+    useEffect(() => {
+        if (!appId) return
+
+        fetchEvaluations()
+    }, [appId])
+
+    useEffect(() => {
+        const defaultColumnNames = columns.flatMap((col) =>
+            "children" in col ? [col.key, ...col.children.map((child) => child.key)] : [col.key],
+        )
+        setEditColumns(defaultColumnNames as string[])
+    }, [isEvalLoading])
+
+    const fetchEvaluations = async () => {
+        try {
+            setIsEvalLoading(true)
+            const [allEvaluations, allEvaluators, allEvaluatorConfigs] = await Promise.all([
+                fetchAllEvaluations(appId),
+                fetchAllEvaluators(),
+                fetchAllEvaluatorConfigs(appId),
+            ])
+            const result = allEvaluations.sort(
+                (a, b) =>
+                    new Date(b.created_at || 0).getTime() - new Date(a.created_at || 0).getTime(),
+            )
+            setEvaluationList(result)
+            setEvaluators(allEvaluators)
+            setEvaluatorConfigs(allEvaluatorConfigs)
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setIsEvalLoading(false)
+        }
+    }
+
+    const handleDeleteMultipleEvaluations = async () => {
+        const evaluationsIds = selectedRowKeys.map((key) => key.toString())
+        try {
+            setIsEvalLoading(true)
+            await deleteEvaluations(evaluationsIds)
+            setEvaluationList((prevEvaluationsList) =>
+                prevEvaluationsList.filter((evaluation) => !evaluationsIds.includes(evaluation.id)),
+            )
+            setSelectedRowKeys([])
+            message.success("Evaluations Deleted")
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setIsEvalLoading(false)
+        }
+    }
+
+    const handleDeleteEvaluation = async (record: _Evaluation) => {
+        try {
+            setIsEvalLoading(true)
+            await deleteEvaluations([record.id])
+            setEvaluationList((prevEvaluationsList) =>
+                prevEvaluationsList.filter((evaluation) => ![record.id].includes(evaluation.id)),
+            )
+            message.success("Evaluation Deleted")
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setIsEvalLoading(false)
+        }
+    }
+
+    const compareDisabled = useMemo(() => {
+        const evalList = evaluationList.filter((e) => selectedRowKeys.includes(e.id))
+        return (
+            evalList.length < 2 ||
+            evalList.some(
+                (item) =>
+                    item.status.value === EvaluationStatus.STARTED ||
+                    item.status.value === EvaluationStatus.INITIALIZED ||
+                    item.testset.id !== evalList[0].testset.id,
+            )
+        )
+    }, [selectedRowKeys])
+
+    const onToggleEvaluatorVisibility = (evalConfigId: string) => {
+        if (!editColumns.includes(evalConfigId)) {
+            setEditColumns([...editColumns, evalConfigId])
+        } else {
+            setEditColumns(editColumns.filter((item) => item !== evalConfigId))
+        }
+    }
+
+    const handleOpenChangeEditCols: DropdownProps["onOpenChange"] = (nextOpen, info) => {
+        if (info.source === "trigger" || nextOpen) {
+            setIsFilterColsDropdownOpen(nextOpen)
+        }
+    }
+
+    const handleNavigation = (variantName: string, revisionNum: string) => {
+        router.push(`/apps/${appId}/playground?variant=${variantName}&revision=${revisionNum}`)
+    }
+
+    const evaluatorConfigs = useMemo(
+        () =>
+            uniqBy(
+                evaluationList
+                    .map((item) =>
+                        item.aggregated_results.map((item) => ({
+                            ...item.evaluator_config,
+                            evaluator: evaluators.find(
+                                (e) => e.key === item.evaluator_config.evaluator_key,
+                            ),
+                        })),
+                    )
+                    .flat(),
+                "id",
+            ),
+        [evaluationList],
+    )
+
+    const columns: ColumnsType<_Evaluation> = [
+        {
+            title: "Variant",
+            dataIndex: "variants",
+            key: "variants",
+            fixed: "left",
+            onHeaderCell: () => ({
+                style: {minWidth: 160},
+            }),
+            render: (value, record) => {
+                return (
+                    <span>
+                        {variantNameWithRev({
+                            variant_name: value[0].variantName,
+                            revision: record.revisions[0],
+                        })}
+                    </span>
+                )
+            },
+            ...getFilterParams("variants", "text"),
+        },
+        {
+            title: "Testset",
+            dataIndex: "testsetName",
+            key: "testsetName",
+            onHeaderCell: () => ({
+                style: {minWidth: 160},
+            }),
+            render: (_, record) => {
+                return <span>{record.testset.name}</span>
+            },
+            ...getFilterParams("testset", "text"),
+        },
+        {
+            title: "Status",
+            dataIndex: "status",
+            key: "status",
+            onHeaderCell: () => ({
+                style: {minWidth: 240},
+            }),
+            render: (_, record) => {
+                return <StatusRenderer {...record} />
+            },
+            ...getFilterParams("status", "text"),
+        },
+        {
+            title: "Results",
+            key: "results",
+            onHeaderCell: () => ({style: {minWidth: 240}}),
+            children: evaluatorConfigs.map((evaluator) => ({
+                title: () => {
+                    return (
+                        <div className="w-full flex items-center justify-between">
+                            <span className="whitespace-nowrap">{evaluator.name}</span>
+                            <Tag className="ml-2" color={evaluator.evaluator?.color}>
+                                {evaluator.evaluator?.name}
+                            </Tag>
+                        </div>
+                    )
+                },
+                key: evaluator.name,
+                onHeaderCell: () => ({style: {minWidth: 240}}),
+                sortDirections: ["descend", "ascend"],
+                sorter: {
+                    compare: (a, b) => {
+                        const getSortValue = (item: _Evaluation, evaluatorId: string) => {
+                            const matchingResult = item.aggregated_results.find(
+                                (result) => result.evaluator_config.id === evaluatorId,
+                            )
+
+                            if (matchingResult && typeof matchingResult.result.value === "number") {
+                                return matchingResult.result.value
+                            }
+
+                            return 0
+                        }
+
+                        return getSortValue(a, evaluator.id) - getSortValue(b, evaluator.id)
+                    },
+                },
+                render: (_, record) => {
+                    if (!evaluators?.length) return
+
+                    const matchingResults = record.aggregated_results.filter(
+                        (result) => result.evaluator_config.id === evaluator.id,
+                    )
+
+                    if (matchingResults.length === 0) {
+                        return <span>-</span>
+                    }
+
+                    return (
+                        <Space>
+                            {matchingResults.map((result, index) =>
+                                result.result.error ? (
+                                    <EvaluationErrorPopover result={result.result} key={index} />
+                                ) : (
+                                    <Typography key={index}>
+                                        {getTypedValue(result.result)}
+                                    </Typography>
+                                ),
+                            )}
+                        </Space>
+                    )
+                },
+            })),
+        },
+        {
+            title: "Created on",
+            dataIndex: "created_at",
+            key: "createdAt",
+            onHeaderCell: () => ({
+                style: {minWidth: 160},
+            }),
+            sorter: {
+                compare: (a, b) => dayjs(a.created_at).valueOf() - dayjs(b.created_at).valueOf(),
+            },
+            render: (_, record) => {
+                return formatDay(record.created_at)
+            },
+            ...getFilterParams("created_at", "date"),
+        },
+        {
+            title: "Avg. Latency",
+            dataIndex: "average_latency",
+            key: "average_latency",
+            onHeaderCell: () => ({
+                style: {minWidth: 160},
+            }),
+            sorter: {
+                compare: (a, b) =>
+                    Number(a.average_latency?.value) - Number(b.average_latency?.value),
+            },
+            render: (_, record) => {
+                return getTypedValue(record.average_latency)
+            },
+            ...getFilterParams("average_latency", "number"),
+        },
+        {
+            title: "Total Cost",
+            dataIndex: "average_cost",
+            key: "average_cost",
+            onHeaderCell: () => ({
+                style: {minWidth: 160},
+            }),
+            sorter: {
+                compare: (a, b) => Number(a.average_cost?.value) - Number(b.average_cost?.value),
+            },
+            render: (_, record) => {
+                return getTypedValue(record.average_cost)
+            },
+            ...getFilterParams("total_cost", "number"),
+        },
+        {
+            title: <GearSix size={16} />,
+            key: "key",
+            width: 56,
+            fixed: "right",
+            align: "center",
+            render: (_, record) => {
+                return (
+                    <Dropdown
+                        trigger={["click"]}
+                        overlayStyle={{width: 180}}
+                        menu={{
+                            items: [
+                                {
+                                    key: "details",
+                                    label: "Open details",
+                                    icon: <Note size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        router.push(
+                                            `/apps/${appId}/evaluations/results/${record.id}`,
+                                        )
+                                    },
+                                },
+                                {
+                                    key: "variant",
+                                    label: "View variant",
+                                    icon: <Rocket size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        handleNavigation(
+                                            record.variants[0].variantName,
+                                            record.revisions[0],
+                                        )
+                                    },
+                                },
+                                {
+                                    key: "view_testset",
+                                    label: "View test set",
+                                    icon: <Database size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        router.push(`/apps/${appId}/testsets/${record.testset.id}`)
+                                    },
+                                },
+                                {type: "divider"},
+                                {
+                                    key: "delete_eval",
+                                    label: "Delete",
+                                    icon: <Trash size={16} />,
+                                    danger: true,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        setSelectedEvalRecord(record)
+                                        setIsDeleteEvalModalOpen(true)
+                                    },
+                                },
+                            ],
+                        }}
+                    >
+                        <Button
+                            onClick={(e) => e.stopPropagation()}
+                            type="text"
+                            icon={<MoreOutlined />}
+                            size="small"
+                        />
+                    </Dropdown>
+                )
+            },
+        },
+    ]
+
+    const editedColumns = columns.map((item) => ({
+        ...item,
+        hidden: !editColumns?.includes(item.key as string),
+        ...("children" in item && {
+            children: item.children.map((child) => ({
+                ...child,
+                hidden: !editColumns.includes(child.key as string),
+            })),
+        }),
+    }))
+
+    return (
+        <div className="flex flex-col gap-2">
+            <div className="flex items-center justify-between">
+                <Space>
+                    <Button
+                        type="primary"
+                        icon={<Plus size={14} />}
+                        className={classes.button}
+                        onClick={() => setNewEvalModalOpen(true)}
+                        data-cy="new-evaluation-button"
+                    >
+                        Start new evaluation
+                    </Button>
+                    <Button
+                        icon={<Gauge size={14} />}
+                        className={classes.button}
+                        onClick={() => setIsConfigEvaluatorModalOpen("open")}
+                    >
+                        Configure evaluators
+                    </Button>
+                </Space>
+                <Space>
+                    <Button
+                        danger
+                        type="text"
+                        icon={<Trash size={14} />}
+                        className={classes.button}
+                        onClick={() => setIsDeleteEvalMultipleModalOpen(true)}
+                        disabled={selectedRowKeys.length == 0}
+                        data-cy="delete-evaluation-button"
+                    >
+                        Delete
+                    </Button>
+                    <Button
+                        type="text"
+                        icon={<ArrowsLeftRight size={14} />}
+                        className={classes.button}
+                        disabled={compareDisabled}
+                        data-cy="evaluation-results-compare-button"
+                        onClick={() =>
+                            router.push(
+                                `/apps/${appId}/evaluations/results/compare?evaluations=${selectedRowKeys.join(",")}`,
+                            )
+                        }
+                    >
+                        Compare
+                    </Button>
+                    <EditColumns
+                        items={generateEditItems(columns as ColumnsType, editColumns)}
+                        isOpen={isFilterColsDropdownOpen}
+                        handleOpenChange={handleOpenChangeEditCols}
+                        shownCols={editColumns}
+                        onClick={({key}) => {
+                            onToggleEvaluatorVisibility(key)
+                            setIsFilterColsDropdownOpen(true)
+                        }}
+                    />
+                </Space>
+            </div>
+
+            <Table
+                loading={isEvalLoading}
+                rowSelection={{
+                    type: "checkbox",
+                    columnWidth: 48,
+                    onChange: (selectedRowKeys: React.Key[]) => {
+                        setSelectedRowKeys(selectedRowKeys)
+                    },
+                }}
+                className="ph-no-capture"
+                columns={editedColumns}
+                rowKey={"id"}
+                dataSource={evaluationList}
+                scroll={{x: true}}
+                bordered
+                pagination={false}
+                onRow={(record) => ({
+                    style: {cursor: "pointer"},
+                    onClick: () => router.push(`/apps/${appId}/evaluations/results/${record.id}`),
+                })}
+            />
+
+            <NewEvaluationModal
+                open={newEvalModalOpen}
+                onCancel={() => {
+                    setNewEvalModalOpen(false)
+                }}
+                onSuccess={() => {
+                    setNewEvalModalOpen(false)
+                    fetchEvaluations()
+                }}
+            />
+
+            {isConfigEvaluatorModalOpen === "open" && (
+                <EvaluatorsModal
+                    open={isConfigEvaluatorModalOpen === "open"}
+                    onCancel={() => setIsConfigEvaluatorModalOpen("")}
+                />
+            )}
+
+            {selectedEvalRecord && (
+                <DeleteEvaluationModal
+                    open={isDeleteEvalModalOpen}
+                    onCancel={() => setIsDeleteEvalModalOpen(false)}
+                    onOk={async () => {
+                        await handleDeleteEvaluation(selectedEvalRecord)
+                        setIsDeleteEvalModalOpen(false)
+                    }}
+                    evaluationType={"automatic evaluation"}
+                />
+            )}
+            {isDeleteEvalMultipleModalOpen && (
+                <DeleteEvaluationModal
+                    open={isDeleteEvalMultipleModalOpen}
+                    onCancel={() => setIsDeleteEvalMultipleModalOpen(false)}
+                    onOk={async () => {
+                        await handleDeleteMultipleEvaluations()
+                        setIsDeleteEvalMultipleModalOpen(false)
+                    }}
+                    evaluationType={"single model evaluation"}
+                />
+            )}
+        </div>
+    )
+}
+
+export default AutoEvaluation
diff --git a/agenta-web/src/components/pages/evaluations/evaluators/AdvancedSettings.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
similarity index 78%
rename from agenta-web/src/components/pages/evaluations/evaluators/AdvancedSettings.tsx
rename to agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
index f09e1e73c..883ffccdc 100644
--- a/agenta-web/src/components/pages/evaluations/evaluators/AdvancedSettings.tsx
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
@@ -1,9 +1,10 @@
 import React from "react"
-import {Form, Input, InputNumber, Switch, Tooltip, Collapse, theme} from "antd"
+import {Form, Input, InputNumber, Switch, Tooltip, Collapse, theme, AutoComplete} from "antd"
 import {CaretRightOutlined, InfoCircleOutlined} from "@ant-design/icons"
 import {createUseStyles} from "react-jss"
 import {Editor} from "@monaco-editor/react"
 import {useAppTheme} from "@/components/Layout/ThemeContextProvider"
+import {generatePaths} from "@/lib/transformers"
 
 const useStyles = createUseStyles((theme: any) => ({
     label: {
@@ -20,9 +21,12 @@ const useStyles = createUseStyles((theme: any) => ({
 
 type AdvancedSettingsProps = {
     settings: Record<string, any>[]
+    selectedTestcase: {
+        testcase: Record<string, any> | null
+    }
 }
 
-const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({settings}) => {
+const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({settings, selectedTestcase}) => {
     const classes = useStyles()
     const {appTheme} = useAppTheme()
     const {token} = theme.useToken()
@@ -31,7 +35,6 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({settings}) => {
         <Collapse
             bordered={false}
             expandIcon={({isActive}) => <CaretRightOutlined rotate={isActive ? 90 : 0} />}
-            className={"my-[10px]"}
         >
             <Collapse.Panel
                 key="1"
@@ -63,8 +66,19 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({settings}) => {
                             initialValue={field.default}
                             rules={rules}
                         >
-                            {field.type === "string" || field.type === "regex" ? (
-                                <Input data-cy="new-evaluator-column-name" />
+                            {(field.type === "string" || field.type === "regex") &&
+                            selectedTestcase.testcase ? (
+                                <AutoComplete
+                                    options={generatePaths(selectedTestcase)}
+                                    data-cy="new-evaluator-advance-settings-input"
+                                    filterOption={(inputValue, option) =>
+                                        option!.value
+                                            .toUpperCase()
+                                            .indexOf(inputValue.toUpperCase()) !== -1
+                                    }
+                                />
+                            ) : field.type === "string" || field.type === "regex" ? (
+                                <Input data-cy="new-evaluator-advance-settings-input" />
                             ) : field.type === "number" ? (
                                 <InputNumber min={field.min} max={field.max} step={0.1} />
                             ) : field.type === "boolean" || field.type === "bool" ? (
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
new file mode 100644
index 000000000..aabd464a8
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
@@ -0,0 +1,144 @@
+import {useAppTheme} from "@/components/Layout/ThemeContextProvider"
+import {isValidRegex} from "@/lib/helpers/validators"
+import {generatePaths} from "@/lib/transformers"
+import {EvaluationSettingsTemplate, JSSTheme} from "@/lib/Types"
+import {InfoCircleOutlined} from "@ant-design/icons"
+import {Editor} from "@monaco-editor/react"
+import {theme, Form, Tooltip, InputNumber, Switch, Input, AutoComplete} from "antd"
+import {Rule} from "antd/es/form"
+import Link from "next/link"
+import {createUseStyles} from "react-jss"
+
+type DynamicFormFieldProps = EvaluationSettingsTemplate & {
+    name: string | string[]
+    traceTree: Record<string, any>
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    editor: {
+        border: `1px solid ${theme.colorBorder}`,
+        borderRadius: theme.borderRadius,
+        overflow: "hidden",
+        "& .monaco-editor": {
+            width: "0 !important",
+        },
+    },
+    ExternalHelp: {
+        marginBottom: "20px",
+        display: "flex",
+        alignItems: "center",
+        gap: "0.3em",
+    },
+    ExternalHelpLink: {
+        margin: "0px",
+        padding: "0px",
+        textDecoration: "underline",
+        color: theme.isDark ? "rgba(255, 255, 255, 0.85)" : "#000",
+
+        "&:hover": {
+            color: theme.isDark ? "rgba(255, 255, 255, 0.85)" : "#000",
+            textDecoration: "underline",
+        },
+    },
+}))
+
+export const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
+    name,
+    label,
+    type,
+    default: defaultVal,
+    description,
+    min,
+    max,
+    required,
+    traceTree,
+}) => {
+    const {appTheme} = useAppTheme()
+    const classes = useStyles()
+    const {token} = theme.useToken()
+
+    const rules: Rule[] = [{required: required ?? true, message: "This field is required"}]
+    if (type === "regex")
+        rules.push({
+            validator: (_, value) =>
+                new Promise((res, rej) =>
+                    isValidRegex(value) ? res("") : rej("Regex pattern is not valid"),
+                ),
+        })
+
+    const ExternalHelpInfo =
+        name[1] === "webhook_url" ? (
+            <div className={classes.ExternalHelp}>
+                <span>Learn</span>
+                <Link
+                    href="https://docs.agenta.ai/evaluation/webhook_evaluator"
+                    target="_blank"
+                    rel="noopener noreferrer"
+                    className={classes.ExternalHelpLink}
+                >
+                    more
+                </Link>
+                <span>about the evaluator</span>
+            </div>
+        ) : null
+
+    return (
+        <>
+            {label !== "Correct Answer" && (
+                <Form.Item
+                    name={name}
+                    label={
+                        <div className="flex items-center gap-2">
+                            <span>{label}</span>
+                            {description && (
+                                <Tooltip title={description}>
+                                    <InfoCircleOutlined style={{color: token.colorPrimary}} />
+                                </Tooltip>
+                            )}
+                        </div>
+                    }
+                    initialValue={defaultVal}
+                    rules={rules}
+                >
+                    {name[1] === "question_key" ||
+                    name[1] === "answer_key" ||
+                    name[1] === "contexts_key" ? (
+                        <AutoComplete
+                            options={generatePaths(traceTree)}
+                            filterOption={(inputValue, option) =>
+                                option!.value.toUpperCase().indexOf(inputValue.toUpperCase()) !== -1
+                            }
+                        />
+                    ) : type === "string" || type === "regex" ? (
+                        <Input />
+                    ) : type === "number" ? (
+                        <InputNumber min={min} max={max} step={0.1} />
+                    ) : type === "boolean" || type === "bool" ? (
+                        <Switch />
+                    ) : type === "text" ? (
+                        <Input.TextArea rows={10} />
+                    ) : type === "code" ? (
+                        <Editor
+                            className={classes.editor}
+                            height={375}
+                            width="100%"
+                            language="python"
+                            theme={`vs-${appTheme}`}
+                        />
+                    ) : type === "object" ? (
+                        <Editor
+                            className={classes.editor}
+                            height={120}
+                            width="100%"
+                            language="json"
+                            options={{lineNumbers: "off"}}
+                            theme={`vs-${appTheme}`}
+                        />
+                    ) : null}
+                </Form.Item>
+            )}
+
+            {ExternalHelpInfo}
+        </>
+    )
+}
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
new file mode 100644
index 000000000..dc07836d0
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
@@ -0,0 +1,331 @@
+import {Evaluator, EvaluatorConfig, JSSTheme, testset, Variant} from "@/lib/Types"
+import {CloseOutlined} from "@ant-design/icons"
+import {ArrowLeft, CaretDoubleRight} from "@phosphor-icons/react"
+import {Button, Flex, Form, Input, message, Space, Tooltip, Typography} from "antd"
+import React, {useEffect, useMemo, useState} from "react"
+import {createUseStyles} from "react-jss"
+import AdvancedSettings from "./AdvancedSettings"
+import {DynamicFormField} from "./DynamicFormField"
+import {
+    CreateEvaluationConfigData,
+    createEvaluatorConfig,
+    updateEvaluatorConfig,
+} from "@/services/evaluations/api"
+import {useAppId} from "@/hooks/useAppId"
+import {isDemo} from "@/lib/helpers/utils"
+import {dynamicComponent} from "@/lib/helpers/dynamic"
+
+const DebugSection: any = dynamicComponent(
+    "pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection",
+)
+
+type ConfigureEvaluatorProps = {
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    handleOnCancel: () => void
+    onSuccess: () => void
+    selectedEvaluator: Evaluator
+    variants: Variant[] | null
+    testsets: testset[] | null
+    selectedTestcase: {
+        testcase: Record<string, any> | null
+    }
+    setSelectedVariant: React.Dispatch<React.SetStateAction<Variant | null>>
+    selectedVariant: Variant | null
+    editMode: boolean
+    editEvalEditValues: EvaluatorConfig | null
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    setEditMode: (value: React.SetStateAction<boolean>) => void
+    cloneConfig: boolean
+    setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
+    setSelectedTestcase: React.Dispatch<
+        React.SetStateAction<{
+            testcase: Record<string, any> | null
+        }>
+    >
+    setDebugEvaluator: React.Dispatch<React.SetStateAction<boolean>>
+    debugEvaluator: boolean
+    setSelectedTestset: React.Dispatch<React.SetStateAction<string>>
+    selectedTestset: string
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    headerText: {
+        "& .ant-typography": {
+            lineHeight: theme.lineHeightLG,
+            fontSize: theme.fontSizeHeading4,
+            fontWeight: theme.fontWeightStrong,
+        },
+    },
+    title: {
+        fontSize: theme.fontSizeLG,
+        fontWeight: theme.fontWeightMedium,
+        lineHeight: theme.lineHeightLG,
+    },
+    formContainer: {
+        display: "flex",
+        flexDirection: "column",
+        maxWidth: 552,
+        gap: theme.padding,
+        overflowY: "auto",
+        maxHeight: 580,
+        "& .ant-form-item": {
+            marginBottom: 0,
+        },
+        "& .ant-form-item-label": {
+            paddingBottom: theme.paddingXXS,
+        },
+    },
+    formTitleText: {
+        fontSize: theme.fontSize,
+        lineHeight: theme.lineHeight,
+        fontWeight: theme.fontWeightMedium,
+    },
+}))
+
+const ConfigureEvaluator = ({
+    setCurrent,
+    selectedEvaluator,
+    handleOnCancel,
+    variants,
+    testsets,
+    onSuccess,
+    selectedTestcase,
+    selectedVariant,
+    setSelectedVariant,
+    editMode,
+    editEvalEditValues,
+    setEditEvalEditValues,
+    setEditMode,
+    cloneConfig,
+    setCloneConfig,
+    setSelectedTestcase,
+    debugEvaluator,
+    setDebugEvaluator,
+    selectedTestset,
+    setSelectedTestset,
+}: ConfigureEvaluatorProps) => {
+    const appId = useAppId()
+    const classes = useStyles()
+    const [form] = Form.useForm()
+    const [submitLoading, setSubmitLoading] = useState(false)
+    const [traceTree, setTraceTree] = useState<{
+        trace: Record<string, any> | string | null
+    }>({
+        trace: null,
+    })
+
+    const evalFields = useMemo(
+        () =>
+            Object.keys(selectedEvaluator?.settings_template || {})
+                .filter((key) => !!selectedEvaluator?.settings_template[key]?.type)
+                .map((key) => ({
+                    key,
+                    ...selectedEvaluator?.settings_template[key]!,
+                    advanced: selectedEvaluator?.settings_template[key]?.advanced || false,
+                })),
+        [selectedEvaluator],
+    )
+
+    const advancedSettingsFields = evalFields.filter((field) => field.advanced)
+    const basicSettingsFields = evalFields.filter((field) => !field.advanced)
+
+    const onSubmit = (values: CreateEvaluationConfigData) => {
+        try {
+            setSubmitLoading(true)
+            if (!selectedEvaluator.key) throw new Error("No selected key")
+            const settingsValues = values.settings_values || {}
+
+            const data = {
+                ...values,
+                evaluator_key: selectedEvaluator.key,
+                settings_values: settingsValues,
+            }
+            ;(editMode
+                ? updateEvaluatorConfig(editEvalEditValues?.id!, data)
+                : createEvaluatorConfig(appId, data)
+            )
+                .then(onSuccess)
+                .catch(console.error)
+                .finally(() => setSubmitLoading(false))
+        } catch (error: any) {
+            setSubmitLoading(false)
+            console.error(error)
+            message.error(error.message)
+        }
+    }
+
+    useEffect(() => {
+        form.resetFields()
+        if (editMode) {
+            form.setFieldsValue(editEvalEditValues)
+        } else if (cloneConfig) {
+            form.setFieldValue("settings_values", editEvalEditValues?.settings_values)
+        }
+    }, [editMode, cloneConfig])
+
+    return (
+        <div className="flex flex-col gap-6 h-full">
+            <div className="flex items-center justify-between">
+                <Space className={classes.headerText}>
+                    {editMode ? (
+                        <>
+                            <Button
+                                icon={<ArrowLeft size={14} />}
+                                className="flex items-center justify-center"
+                                onClick={() => {
+                                    setCurrent(0)
+                                    setEditMode(false)
+                                    setCloneConfig(false)
+                                    setEditEvalEditValues(null)
+                                }}
+                            />
+                            <Typography.Text>Configure evaluator</Typography.Text>
+                        </>
+                    ) : (
+                        <>
+                            <Button
+                                icon={<ArrowLeft size={14} />}
+                                className="flex items-center justify-center"
+                                onClick={() => {
+                                    setCurrent(1)
+                                    setEditMode(false)
+                                    setCloneConfig(false)
+                                    setEditEvalEditValues(null)
+                                }}
+                            />
+                            <Typography.Text>Step 2/2:</Typography.Text>
+                            <Typography.Text>Configure new evaluator</Typography.Text>
+                        </>
+                    )}
+                </Space>
+
+                <Button onClick={handleOnCancel} type="text" icon={<CloseOutlined />} />
+            </div>
+
+            <Flex gap={16} className="h-full">
+                <div className="flex-1 flex flex-col gap-4">
+                    <Space direction="vertical">
+                        <Flex justify="space-between">
+                            <Typography.Text className={classes.title}>
+                                {selectedEvaluator.name}
+                            </Typography.Text>
+
+                            <Tooltip
+                                title={
+                                    isDemo()
+                                        ? ""
+                                        : "Test evaluator feature available in Cloud/Enterprise editions only"
+                                }
+                                placement="bottom"
+                            >
+                                <Button
+                                    size="small"
+                                    onClick={() => setDebugEvaluator(!debugEvaluator)}
+                                    disabled={!isDemo()}
+                                >
+                                    {debugEvaluator ? (
+                                        <div className="flex items-center gap-2">
+                                            <CloseOutlined />
+                                            Test
+                                        </div>
+                                    ) : (
+                                        <div className="flex items-center gap-2">
+                                            Test
+                                            <CaretDoubleRight />
+                                        </div>
+                                    )}
+                                </Button>
+                            </Tooltip>
+                        </Flex>
+                        <Typography.Text type="secondary">
+                            {selectedEvaluator.description}
+                        </Typography.Text>
+                    </Space>
+
+                    <div className="flex-1">
+                        <Form
+                            requiredMark={false}
+                            form={form}
+                            name="new-evaluator"
+                            onFinish={onSubmit}
+                            layout="vertical"
+                            className={classes.formContainer}
+                        >
+                            <Space direction="vertical" size={4}>
+                                <div className="flex gap-4">
+                                    <Form.Item
+                                        name="name"
+                                        label="Name"
+                                        rules={[
+                                            {required: true, message: "This field is required"},
+                                        ]}
+                                        className="flex-1"
+                                    >
+                                        <Input data-cy="configure-new-evaluator-modal-input" />
+                                    </Form.Item>
+                                </div>
+                            </Space>
+
+                            {basicSettingsFields.length ? (
+                                <Space direction="vertical" size={4}>
+                                    <Typography.Text className={classes.formTitleText}>
+                                        Parameters
+                                    </Typography.Text>
+                                    {basicSettingsFields.map((field) => (
+                                        <DynamicFormField
+                                            {...field}
+                                            key={field.key}
+                                            traceTree={traceTree}
+                                            name={["settings_values", field.key]}
+                                        />
+                                    ))}
+                                </Space>
+                            ) : (
+                                ""
+                            )}
+
+                            {advancedSettingsFields.length > 0 && (
+                                <AdvancedSettings
+                                    settings={advancedSettingsFields}
+                                    selectedTestcase={selectedTestcase}
+                                />
+                            )}
+                        </Form>
+                    </div>
+
+                    <Flex gap={8} justify="end">
+                        <Button type="text" onClick={() => form.resetFields()}>
+                            Reset
+                        </Button>
+                        <Button
+                            type="primary"
+                            loading={submitLoading}
+                            onClick={form.submit}
+                            data-cy="configure-new-evaluator-modal-save-btn"
+                        >
+                            {editMode ? "Edit configuration" : "Save configuration"}
+                        </Button>
+                    </Flex>
+                </div>
+
+                <DebugSection
+                    selectedEvaluator={selectedEvaluator}
+                    selectedTestcase={selectedTestcase}
+                    selectedVariant={selectedVariant}
+                    setTraceTree={setTraceTree}
+                    debugEvaluator={debugEvaluator}
+                    form={form}
+                    testsets={testsets}
+                    traceTree={traceTree}
+                    variants={variants}
+                    setSelectedVariant={setSelectedVariant}
+                    setSelectedTestcase={setSelectedTestcase}
+                    selectedTestset={selectedTestset}
+                    setSelectedTestset={setSelectedTestset}
+                />
+            </Flex>
+        </div>
+    )
+}
+
+export default ConfigureEvaluator
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
new file mode 100644
index 000000000..3c826ee70
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
@@ -0,0 +1,71 @@
+import {checkIfResourceValidForDeletion} from "@/lib/helpers/evaluate"
+import {EvaluatorConfig, JSSTheme} from "@/lib/Types"
+import {deleteEvaluatorConfig} from "@/services/evaluations/api"
+import {ExclamationCircleOutlined} from "@ant-design/icons"
+import {Modal, Space, theme, Typography} from "antd"
+import React, {useState} from "react"
+import {createUseStyles} from "react-jss"
+
+type DeleteModalProps = {
+    selectedEvalConfig: EvaluatorConfig
+    onSuccess: () => void
+} & React.ComponentProps<typeof Modal>
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    title: {
+        fontSize: theme.fontSizeLG,
+        fontWeight: theme.fontWeightStrong,
+        lineHeight: theme.lineHeightLG,
+    },
+}))
+
+const DeleteModal = ({selectedEvalConfig, onSuccess, ...props}: DeleteModalProps) => {
+    const classes = useStyles()
+    const {
+        token: {colorWarning},
+    } = theme.useToken()
+    const [isLoading, setIsLoading] = useState(false)
+
+    const handleDelete = async () => {
+        try {
+            if (
+                !(await checkIfResourceValidForDeletion({
+                    resourceType: "evaluator_config",
+                    resourceIds: [selectedEvalConfig.id],
+                }))
+            )
+                return
+            try {
+                setIsLoading(true)
+                await deleteEvaluatorConfig(selectedEvalConfig.id)
+                await onSuccess()
+                props.onCancel?.({} as any)
+            } catch (error) {
+                console.error(error)
+            }
+        } catch (error) {
+            console.error(error)
+        } finally {
+            setIsLoading(false)
+        }
+    }
+    return (
+        <Modal
+            title={
+                <Space>
+                    <ExclamationCircleOutlined style={{color: colorWarning}} />
+                    <Typography className={classes.title}>Delete evaluator</Typography>
+                </Space>
+            }
+            centered
+            okText={"Delete"}
+            okButtonProps={{danger: true, loading: isLoading}}
+            onOk={handleDelete}
+            {...props}
+        >
+            <Typography>Are you sure you want to delete this evaluator?</Typography>
+        </Modal>
+    )
+}
+
+export default DeleteModal
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
new file mode 100644
index 000000000..b2260c59e
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
@@ -0,0 +1,210 @@
+import {evaluatorsAtom} from "@/lib/atoms/evaluation"
+import {Evaluator, EvaluatorConfig, JSSTheme} from "@/lib/Types"
+import {MoreOutlined} from "@ant-design/icons"
+import {Copy, Note, Trash} from "@phosphor-icons/react"
+import {Button, Card, Dropdown, Empty, Tag, Typography} from "antd"
+import {useAtom} from "jotai"
+import React, {useState} from "react"
+import {createUseStyles} from "react-jss"
+import DeleteModal from "./DeleteModal"
+import dayjs from "dayjs"
+
+interface EvaluatorCardProps {
+    evaluatorConfigs: EvaluatorConfig[]
+    setEditMode: React.Dispatch<React.SetStateAction<boolean>>
+    setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    onSuccess: () => void
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    container: {
+        display: "flex",
+        flexWrap: "wrap",
+        gap: theme.padding,
+        height: "100%",
+        maxHeight: 600,
+        overflowY: "auto",
+    },
+    cardTitle: {
+        fontSize: theme.fontSizeLG,
+        lineHeight: theme.lineHeightLG,
+        fontWeight: theme.fontWeightMedium,
+    },
+    evaluatorCard: {
+        width: 276,
+        display: "flex",
+        height: "fit-content",
+        flexDirection: "column",
+        transition: "all 0.025s ease-in",
+        cursor: "pointer",
+        "& > .ant-card-head": {
+            minHeight: 0,
+            padding: theme.paddingSM,
+
+            "& .ant-card-head-title": {
+                fontSize: theme.fontSize,
+                fontWeight: theme.fontWeightMedium,
+                lineHeight: theme.lineHeight,
+            },
+        },
+        "& > .ant-card-body": {
+            padding: theme.paddingSM,
+            display: "flex",
+            flexDirection: "column",
+            gap: theme.marginXS,
+            "& div": {
+                display: "flex",
+                alignItems: "center",
+                justifyContent: "space-between",
+            },
+        },
+        "&:hover": {
+            boxShadow: theme.boxShadowTertiary,
+        },
+    },
+    centeredItem: {
+        display: "grid",
+        placeItems: "center",
+        width: "100%",
+        height: 600,
+    },
+}))
+
+const EvaluatorCard = ({
+    evaluatorConfigs,
+    setEditMode,
+    setCurrent,
+    setSelectedEvaluator,
+    setEditEvalEditValues,
+    onSuccess,
+    setCloneConfig,
+}: EvaluatorCardProps) => {
+    const classes = useStyles()
+    const evaluators = useAtom(evaluatorsAtom)[0]
+    const [openDeleteModal, setOpenDeleteModal] = useState(false)
+    const [selectedDelEval, setSelectedDelEval] = useState<EvaluatorConfig | null>(null)
+
+    return (
+        <div className={classes.container}>
+            {evaluatorConfigs.length ? (
+                evaluatorConfigs.map((item) => {
+                    const evaluator = evaluators.find((e) => e.key === item.evaluator_key)
+
+                    return (
+                        <Card
+                            key={item.id}
+                            className={classes.evaluatorCard}
+                            onClick={() => {
+                                const selectedEval = evaluators.find(
+                                    (e) => e.key === item.evaluator_key,
+                                )
+                                if (selectedEval) {
+                                    setEditMode(true)
+                                    setSelectedEvaluator(selectedEval)
+                                    setEditEvalEditValues(item)
+                                    setCurrent(2)
+                                }
+                            }}
+                            title={item.name}
+                            extra={
+                                <Dropdown
+                                    trigger={["click"]}
+                                    placement="bottomCenter"
+                                    overlayStyle={{width: 180}}
+                                    menu={{
+                                        items: [
+                                            {
+                                                key: "view_config",
+                                                label: "View configuration",
+                                                icon: <Note size={16} />,
+                                                onClick: (e: any) => {
+                                                    e.domEvent.stopPropagation()
+                                                    const selectedEval = evaluators.find(
+                                                        (e) => e.key === item.evaluator_key,
+                                                    )
+                                                    if (selectedEval) {
+                                                        setEditMode(true)
+                                                        setSelectedEvaluator(selectedEval)
+                                                        setEditEvalEditValues(item)
+                                                        setCurrent(2)
+                                                    }
+                                                },
+                                            },
+                                            {
+                                                key: "clone",
+                                                label: "Clone",
+                                                icon: <Copy size={16} />,
+                                                onClick: (e: any) => {
+                                                    e.domEvent.stopPropagation()
+                                                    const selectedEval = evaluators.find(
+                                                        (e) => e.key === item.evaluator_key,
+                                                    )
+                                                    if (selectedEval) {
+                                                        setCloneConfig(true)
+                                                        setSelectedEvaluator(selectedEval)
+                                                        setEditEvalEditValues(item)
+                                                        setCurrent(2)
+                                                    }
+                                                },
+                                            },
+                                            {type: "divider"},
+                                            {
+                                                key: "delete_app",
+                                                label: "Delete",
+                                                icon: <Trash size={16} />,
+                                                danger: true,
+                                                onClick: (e: any) => {
+                                                    e.domEvent.stopPropagation()
+                                                    setOpenDeleteModal(true)
+                                                    setSelectedDelEval(item)
+                                                },
+                                            },
+                                        ],
+                                    }}
+                                >
+                                    <Button
+                                        type="text"
+                                        onClick={(e) => e.stopPropagation()}
+                                        icon={<MoreOutlined />}
+                                        size="small"
+                                    />
+                                </Dropdown>
+                            }
+                        >
+                            <div>
+                                <Typography.Text>Type</Typography.Text>
+                                <Tag color={item.color} className="mr-0">
+                                    {evaluator?.name}
+                                </Tag>
+                            </div>
+                            <div>
+                                <Typography.Text>Date Modified</Typography.Text>
+                                <Typography.Text type="secondary">
+                                    {dayjs(item.updated_at).format("DD MMM YY")}
+                                </Typography.Text>
+                            </div>
+                        </Card>
+                    )
+                })
+            ) : (
+                <div className={classes.centeredItem}>
+                    <Empty description="Evaluation not found" />
+                </div>
+            )}
+
+            {selectedDelEval && (
+                <DeleteModal
+                    open={openDeleteModal}
+                    onCancel={() => setOpenDeleteModal(false)}
+                    selectedEvalConfig={selectedDelEval}
+                    onSuccess={onSuccess}
+                />
+            )}
+        </div>
+    )
+}
+
+export default EvaluatorCard
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
new file mode 100644
index 000000000..719aee6a2
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
@@ -0,0 +1,171 @@
+import {evaluatorsAtom} from "@/lib/atoms/evaluation"
+import {Evaluator, EvaluatorConfig} from "@/lib/Types"
+import {MoreOutlined} from "@ant-design/icons"
+import {Copy, GearSix, Note, Trash} from "@phosphor-icons/react"
+import {Button, Dropdown, Table, Tag} from "antd"
+import {ColumnsType} from "antd/es/table"
+import {useAtom} from "jotai"
+import React, {useState} from "react"
+import DeleteModal from "./DeleteModal"
+
+interface EvaluatorListProps {
+    evaluatorConfigs: EvaluatorConfig[]
+    setEditMode: React.Dispatch<React.SetStateAction<boolean>>
+    setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    onSuccess: () => void
+}
+
+const EvaluatorList = ({
+    evaluatorConfigs,
+    setCloneConfig,
+    setCurrent,
+    setEditEvalEditValues,
+    setEditMode,
+    setSelectedEvaluator,
+    onSuccess,
+}: EvaluatorListProps) => {
+    const evaluators = useAtom(evaluatorsAtom)[0]
+    const [openDeleteModal, setOpenDeleteModal] = useState(false)
+    const [selectedDelEval, setSelectedDelEval] = useState<EvaluatorConfig | null>(null)
+
+    const columns: ColumnsType<EvaluatorConfig> = [
+        // {
+        //     title: "Version",
+        //     dataIndex: "version",
+        //     key: "version",
+        //     onHeaderCell: () => ({
+        //         style: {minWidth: 80},
+        //     }),
+        // },
+        {
+            title: "Name",
+            dataIndex: "name",
+            key: "name",
+            render: (_, record) => {
+                return <div>{record.name}</div>
+            },
+        },
+        {
+            title: "Type",
+            dataIndex: "type",
+            key: "type",
+            render: (_, record) => {
+                const evaluator = evaluators.find((item) => item.key === record.evaluator_key)
+                return <Tag color={record.color}>{evaluator?.name}</Tag>
+            },
+        },
+        {
+            title: <GearSix size={16} />,
+            key: "key",
+            width: 56,
+            fixed: "right",
+            align: "center",
+            render: (_, record) => {
+                return (
+                    <Dropdown
+                        trigger={["click"]}
+                        placement="bottomRight"
+                        overlayStyle={{width: 180}}
+                        menu={{
+                            items: [
+                                {
+                                    key: "view_config",
+                                    label: "View configuration",
+                                    icon: <Note size={16} />,
+                                    onClick: (e: any) => {
+                                        e.domEvent.stopPropagation()
+                                        const selectedEval = evaluators.find(
+                                            (e) => e.key === record.evaluator_key,
+                                        )
+                                        if (selectedEval) {
+                                            setEditMode(true)
+                                            setSelectedEvaluator(selectedEval)
+                                            setEditEvalEditValues(record)
+                                            setCurrent(2)
+                                        }
+                                    },
+                                },
+                                {
+                                    key: "clone",
+                                    label: "Clone",
+                                    icon: <Copy size={16} />,
+                                    onClick: (e: any) => {
+                                        e.domEvent.stopPropagation()
+                                        const selectedEval = evaluators.find(
+                                            (e) => e.key === record.evaluator_key,
+                                        )
+                                        if (selectedEval) {
+                                            setCloneConfig(true)
+                                            setSelectedEvaluator(selectedEval)
+                                            setEditEvalEditValues(record)
+                                            setCurrent(2)
+                                        }
+                                    },
+                                },
+                                {type: "divider"},
+                                {
+                                    key: "delete_app",
+                                    label: "Delete",
+                                    icon: <Trash size={16} />,
+                                    danger: true,
+                                    onClick: (e: any) => {
+                                        e.domEvent.stopPropagation()
+                                        setOpenDeleteModal(true)
+                                        setSelectedDelEval(record)
+                                    },
+                                },
+                            ],
+                        }}
+                    >
+                        <Button
+                            type="text"
+                            onClick={(e) => e.stopPropagation()}
+                            icon={<MoreOutlined />}
+                            size="small"
+                            data-cy="evaluator-menu-button"
+                        />
+                    </Dropdown>
+                )
+            },
+        },
+    ]
+
+    return (
+        <>
+            <Table
+                className="ph-no-capture"
+                columns={columns}
+                rowKey={"id"}
+                dataSource={evaluatorConfigs}
+                scroll={{x: true}}
+                bordered
+                onRow={(record) => ({
+                    style: {cursor: "pointer"},
+                    "data-cy": "evaluator-list",
+                    onClick: () => {
+                        const selectedEval = evaluators.find((e) => e.key === record.evaluator_key)
+                        if (selectedEval) {
+                            setEditMode(true)
+                            setSelectedEvaluator(selectedEval)
+                            setEditEvalEditValues(record)
+                            setCurrent(2)
+                        }
+                    },
+                })}
+            />
+            {selectedDelEval && (
+                <DeleteModal
+                    open={openDeleteModal}
+                    onCancel={() => setOpenDeleteModal(false)}
+                    selectedEvalConfig={selectedDelEval}
+                    onSuccess={onSuccess}
+                />
+            )}
+        </>
+    )
+}
+
+export default EvaluatorList
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
new file mode 100644
index 000000000..99600dda6
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
@@ -0,0 +1,194 @@
+import {Evaluator, EvaluatorConfig, JSSTheme} from "@/lib/Types"
+import {CloseOutlined, PlusOutlined} from "@ant-design/icons"
+import {Cards, Table} from "@phosphor-icons/react"
+import {Button, Divider, Flex, Input, Radio, Space, Spin, Typography} from "antd"
+import React, {useMemo, useState} from "react"
+import {createUseStyles} from "react-jss"
+import EvaluatorCard from "./EvaluatorCard"
+import EvaluatorList from "./EvaluatorList"
+import {getEvaluatorTags} from "@/lib/helpers/evaluate"
+import {useAtom} from "jotai"
+import {evaluatorsAtom} from "@/lib/atoms/evaluation"
+
+type EvaluatorsProps = {
+    evaluatorConfigs: EvaluatorConfig[]
+    handleOnCancel: () => void
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    fetchingEvalConfigs: boolean
+    setEditMode: React.Dispatch<React.SetStateAction<boolean>>
+    setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    onSuccess: () => void
+    setEvaluatorsDisplay: any
+    evaluatorsDisplay: string
+    setSelectedEvaluatorCategory: React.Dispatch<React.SetStateAction<string>>
+    selectedEvaluatorCategory: string
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    titleContainer: {
+        display: "flex",
+        alignItems: "center",
+        justifyContent: "space-between",
+        "& .ant-typography": {
+            fontSize: theme.fontSizeHeading4,
+            fontWeight: theme.fontWeightStrong,
+            lineHeight: theme.lineHeightLG,
+        },
+    },
+    header: {
+        display: "flex",
+        flexDirection: "column",
+        gap: theme.padding,
+    },
+    radioBtnContainer: {
+        display: "flex",
+        alignItems: "center",
+        gap: theme.marginXS,
+        "& .ant-radio-button-wrapper": {
+            borderRadius: theme.borderRadius,
+            borderInlineStartWidth: "1px",
+            "&:before": {
+                width: 0,
+            },
+            "&:not(.ant-radio-button-wrapper-checked)": {
+                border: "none",
+                "&:hover": {
+                    backgroundColor: theme.colorBgTextHover,
+                },
+            },
+        },
+    },
+}))
+
+const Evaluators = ({
+    evaluatorConfigs,
+    handleOnCancel,
+    setCurrent,
+    setSelectedEvaluator,
+    fetchingEvalConfigs,
+    setEditMode,
+    setEditEvalEditValues,
+    onSuccess,
+    setCloneConfig,
+    setEvaluatorsDisplay,
+    evaluatorsDisplay,
+    selectedEvaluatorCategory,
+    setSelectedEvaluatorCategory,
+}: EvaluatorsProps) => {
+    const classes = useStyles()
+    const [searchTerm, setSearchTerm] = useState("")
+    const evaluatorTags = getEvaluatorTags()
+    const evaluators = useAtom(evaluatorsAtom)[0]
+
+    const updatedEvaluatorConfigs = useMemo(() => {
+        return evaluatorConfigs.map((config) => {
+            const matchingEvaluator = evaluators.find(
+                (evaluator) => evaluator.key === config.evaluator_key,
+            )
+            return matchingEvaluator ? {...config, tags: matchingEvaluator.tags} : config
+        })
+    }, [evaluatorConfigs, evaluators])
+
+    const filteredEvaluators = useMemo(() => {
+        let filtered = updatedEvaluatorConfigs
+
+        if (selectedEvaluatorCategory !== "view_all") {
+            filtered = filtered.filter((item) => item.tags?.includes(selectedEvaluatorCategory))
+        }
+
+        if (searchTerm) {
+            filtered = filtered.filter((item) =>
+                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+            )
+        }
+
+        return filtered
+    }, [searchTerm, selectedEvaluatorCategory, updatedEvaluatorConfigs])
+
+    return (
+        <div>
+            <div className={classes.header}>
+                <div className={classes.titleContainer}>
+                    <Typography.Text>Configure evaluators</Typography.Text>
+
+                    <Space>
+                        <Button
+                            type="primary"
+                            icon={<PlusOutlined />}
+                            onClick={() => setCurrent(1)}
+                            data-cy="create-new-evaluator-button"
+                        >
+                            Create new evaluator
+                        </Button>
+                        <Button onClick={handleOnCancel} type="text" icon={<CloseOutlined />} />
+                    </Space>
+                </div>
+                <div>
+                    <div className="flex items-center justify-between">
+                        <Radio.Group
+                            defaultValue={"view_all"}
+                            className={classes.radioBtnContainer}
+                            onChange={(e) => setSelectedEvaluatorCategory(e.target.value)}
+                        >
+                            <Radio.Button value={"view_all"}>View all</Radio.Button>
+                            <Divider type="vertical" className="h-7" />
+                            {evaluatorTags.map((val, idx) => (
+                                <Radio.Button key={idx} value={val.value}>
+                                    {val.label}
+                                </Radio.Button>
+                            ))}
+                        </Radio.Group>
+                        <Flex gap={8}>
+                            <Input.Search
+                                style={{width: 400}}
+                                placeholder="Search"
+                                allowClear
+                                onChange={(e) => setSearchTerm(e.target.value)}
+                            />
+                            <Radio.Group
+                                defaultValue={evaluatorsDisplay}
+                                onChange={(e) => setEvaluatorsDisplay(e.target.value)}
+                            >
+                                <Radio.Button value="list">
+                                    <Table size={16} className="h-full" />
+                                </Radio.Button>
+                                <Radio.Button value="card">
+                                    <Cards size={16} className="h-full" />
+                                </Radio.Button>
+                            </Radio.Group>
+                        </Flex>
+                    </div>
+                    <Divider className="my-4" />
+                </div>
+            </div>
+
+            <Spin spinning={fetchingEvalConfigs}>
+                {evaluatorsDisplay === "list" ? (
+                    <EvaluatorList
+                        evaluatorConfigs={filteredEvaluators}
+                        setEditMode={setEditMode}
+                        setCurrent={setCurrent}
+                        setSelectedEvaluator={setSelectedEvaluator}
+                        setEditEvalEditValues={setEditEvalEditValues}
+                        onSuccess={onSuccess}
+                        setCloneConfig={setCloneConfig}
+                    />
+                ) : (
+                    <EvaluatorCard
+                        evaluatorConfigs={filteredEvaluators}
+                        setEditMode={setEditMode}
+                        setCurrent={setCurrent}
+                        setSelectedEvaluator={setSelectedEvaluator}
+                        setEditEvalEditValues={setEditEvalEditValues}
+                        onSuccess={onSuccess}
+                        setCloneConfig={setCloneConfig}
+                    />
+                )}
+            </Spin>
+        </div>
+    )
+}
+
+export default Evaluators
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
new file mode 100644
index 000000000..a9e70174e
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
@@ -0,0 +1,175 @@
+import {useAppId} from "@/hooks/useAppId"
+import {evaluatorConfigsAtom, evaluatorsAtom} from "@/lib/atoms/evaluation"
+import {Evaluator, EvaluatorConfig, testset, Variant} from "@/lib/Types"
+import {fetchAllEvaluatorConfigs, fetchAllEvaluators} from "@/services/evaluations/api"
+import {Modal} from "antd"
+import {useAtom} from "jotai"
+import React, {useEffect, useState} from "react"
+import {createUseStyles} from "react-jss"
+import {fetchVariants} from "@/services/api"
+import {fetchTestsets} from "@/services/testsets/api"
+import ConfigureEvaluator from "./ConfigureEvaluator"
+import NewEvaluator from "./NewEvaluator"
+import Evaluators from "./Evaluators"
+import {useLocalStorage} from "usehooks-ts"
+
+type EvaluatorsModalProps = {} & React.ComponentProps<typeof Modal>
+
+const useStyles = createUseStyles(() => ({
+    modalWrapper: {
+        transition: "width 0.3s ease",
+        "& .ant-modal-content": {
+            height: 800,
+            "& .ant-modal-body": {
+                height: "100%",
+            },
+        },
+    },
+}))
+
+const EvaluatorsModal = ({...props}: EvaluatorsModalProps) => {
+    const classes = useStyles()
+    const appId = useAppId()
+    const [current, setCurrent] = useState(0)
+    const [evaluators, setEvaluators] = useAtom(evaluatorsAtom)
+    const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
+    const [selectedEvaluator, setSelectedEvaluator] = useState<Evaluator | null>(null)
+    const [variants, setVariants] = useState<Variant[] | null>(null)
+    const [testsets, setTestsets] = useState<testset[] | null>(null)
+    const [fetchingEvalConfigs, setFetchingEvalConfigs] = useState(false)
+    const [selectedTestcase, setSelectedTestcase] = useState<{
+        testcase: Record<string, any> | null
+    }>({
+        testcase: null,
+    })
+    const [selectedVariant, setSelectedVariant] = useState<Variant | null>(null)
+    const [editMode, setEditMode] = useState(false)
+    const [cloneConfig, setCloneConfig] = useState(false)
+    const [editEvalEditValues, setEditEvalEditValues] = useState<EvaluatorConfig | null>(null)
+    const [evaluatorsDisplay, setEvaluatorsDisplay] = useLocalStorage<"card" | "list">(
+        "evaluator_view",
+        "list",
+    )
+    const [selectedEvaluatorCategory, setSelectedEvaluatorCategory] = useState("view_all")
+    const [debugEvaluator, setDebugEvaluator] = useLocalStorage("isDebugSelectionOpen", false)
+    const [selectedTestset, setSelectedTestset] = useState("")
+
+    const evalConfigFetcher = () => {
+        setFetchingEvalConfigs(true)
+        fetchAllEvaluatorConfigs(appId)
+            .then(setEvaluatorConfigs)
+            .catch(console.error)
+            .finally(() => setFetchingEvalConfigs(false))
+    }
+
+    useEffect(() => {
+        Promise.all([
+            fetchAllEvaluators(),
+            fetchAllEvaluatorConfigs(appId),
+            fetchVariants(appId),
+            fetchTestsets(appId),
+        ]).then(([evaluators, configs, variants, testsets]) => {
+            setEvaluators(evaluators)
+            setEvaluatorConfigs(configs)
+            setVariants(variants)
+            if (variants.length) {
+                setSelectedVariant(variants[0])
+            }
+            setTestsets(testsets)
+            if (testsets.length) {
+                setSelectedTestset(testsets[0]._id)
+            }
+        })
+    }, [appId])
+
+    const steps = [
+        {
+            content: (
+                <Evaluators
+                    evaluatorConfigs={evaluatorConfigs}
+                    handleOnCancel={() => props.onCancel?.({} as any)}
+                    setCurrent={setCurrent}
+                    setSelectedEvaluator={setSelectedEvaluator}
+                    fetchingEvalConfigs={fetchingEvalConfigs}
+                    setEditMode={setEditMode}
+                    setEditEvalEditValues={setEditEvalEditValues}
+                    onSuccess={() => evalConfigFetcher()}
+                    setCloneConfig={setCloneConfig}
+                    setEvaluatorsDisplay={setEvaluatorsDisplay}
+                    evaluatorsDisplay={evaluatorsDisplay}
+                    selectedEvaluatorCategory={selectedEvaluatorCategory}
+                    setSelectedEvaluatorCategory={setSelectedEvaluatorCategory}
+                />
+            ),
+        },
+        {
+            content: (
+                <NewEvaluator
+                    evaluators={evaluators}
+                    setCurrent={setCurrent}
+                    handleOnCancel={() => props.onCancel?.({} as any)}
+                    setSelectedEvaluator={setSelectedEvaluator}
+                    setEvaluatorsDisplay={setEvaluatorsDisplay}
+                    evaluatorsDisplay={evaluatorsDisplay}
+                    selectedEvaluatorCategory={selectedEvaluatorCategory}
+                    setSelectedEvaluatorCategory={setSelectedEvaluatorCategory}
+                />
+            ),
+        },
+    ]
+
+    if (selectedEvaluator) {
+        steps.push({
+            content: (
+                <ConfigureEvaluator
+                    selectedEvaluator={selectedEvaluator}
+                    setCurrent={setCurrent}
+                    handleOnCancel={() => {
+                        props.onCancel?.({} as any)
+                        setEditMode(false)
+                        setCloneConfig(false)
+                        setEditEvalEditValues(null)
+                    }}
+                    variants={variants}
+                    testsets={testsets}
+                    onSuccess={() => {
+                        evalConfigFetcher()
+                        setCurrent(0)
+                        setEditMode(false)
+                    }}
+                    selectedTestcase={selectedTestcase}
+                    selectedVariant={selectedVariant}
+                    setSelectedVariant={setSelectedVariant}
+                    editMode={editMode}
+                    editEvalEditValues={editEvalEditValues}
+                    setEditEvalEditValues={setEditEvalEditValues}
+                    setEditMode={setEditMode}
+                    cloneConfig={cloneConfig}
+                    setCloneConfig={setCloneConfig}
+                    setSelectedTestcase={setSelectedTestcase}
+                    setDebugEvaluator={setDebugEvaluator}
+                    debugEvaluator={debugEvaluator}
+                    selectedTestset={selectedTestset}
+                    setSelectedTestset={setSelectedTestset}
+                />
+            ),
+        })
+    }
+
+    return (
+        <Modal
+            footer={null}
+            width={current === 2 && !debugEvaluator ? 600 : 1200}
+            closeIcon={null}
+            title={null}
+            className={classes.modalWrapper}
+            maskClosable={false}
+            centered
+            {...props}
+        >
+            {steps[current]?.content}
+        </Modal>
+    )
+}
+
+export default EvaluatorsModal
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx
new file mode 100644
index 000000000..771338980
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx
@@ -0,0 +1,115 @@
+import {Evaluator, JSSTheme} from "@/lib/Types"
+import {ArrowRightOutlined} from "@ant-design/icons"
+import {ArrowRight} from "@phosphor-icons/react"
+import {Card, Empty, Typography} from "antd"
+import React from "react"
+import {createUseStyles} from "react-jss"
+
+interface CreateEvaluatorCardProps {
+    evaluators: Evaluator[]
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    setCurrent: (value: React.SetStateAction<number>) => void
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    container: {
+        display: "flex",
+        flexWrap: "wrap",
+        gap: theme.padding,
+        height: "100%",
+        maxHeight: 600,
+        overflowY: "auto",
+    },
+    cardTitle: {
+        fontSize: theme.fontSizeLG,
+        lineHeight: theme.lineHeightLG,
+        fontWeight: theme.fontWeightMedium,
+    },
+    evaluatorCard: {
+        flexDirection: "column",
+        width: 276,
+        display: "flex",
+        height: "fit-content",
+        transition: "all 0.025s ease-in",
+        cursor: "pointer",
+        position: "relative",
+        "& > .ant-card-head": {
+            minHeight: 0,
+            padding: theme.paddingSM,
+
+            "& .ant-card-head-title": {
+                fontSize: theme.fontSize,
+                fontWeight: theme.fontWeightMedium,
+                lineHeight: theme.lineHeight,
+                display: "flex",
+                justifyContent: "space-between",
+                alignItems: "center",
+            },
+        },
+        "& > .ant-card-body": {
+            height: 122,
+            overflowY: "auto",
+            padding: theme.paddingSM,
+            "& .ant-typography": {
+                color: theme.colorTextSecondary,
+            },
+        },
+        "&:hover": {
+            boxShadow: theme.boxShadowTertiary,
+        },
+    },
+    arrowIcon: {
+        opacity: 0,
+        transition: "opacity 0.3s",
+    },
+    evaluatorCardHover: {
+        "&:hover $arrowIcon": {
+            opacity: 1,
+        },
+    },
+    centeredItem: {
+        display: "grid",
+        placeItems: "center",
+        width: "100%",
+        height: 600,
+    },
+}))
+
+const CreateEvaluatorCard = ({
+    evaluators,
+    setSelectedEvaluator,
+    setCurrent,
+}: CreateEvaluatorCardProps) => {
+    const classes = useStyles()
+
+    return (
+        <div className={classes.container}>
+            {evaluators.length ? (
+                evaluators.map((evaluator) => (
+                    <Card
+                        key={evaluator.key}
+                        className={`${classes.evaluatorCard} ${classes.evaluatorCardHover}`}
+                        title={
+                            <>
+                                {evaluator.name}
+                                <ArrowRight className={classes.arrowIcon} size={14} />
+                            </>
+                        }
+                        onClick={() => {
+                            setSelectedEvaluator(evaluator)
+                            setCurrent(2)
+                        }}
+                    >
+                        <Typography.Text>{evaluator.description}</Typography.Text>
+                    </Card>
+                ))
+            ) : (
+                <div className={classes.centeredItem}>
+                    <Empty description="Evaluator not found" />
+                </div>
+            )}
+        </div>
+    )
+}
+
+export default CreateEvaluatorCard
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx
new file mode 100644
index 000000000..37c115752
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx
@@ -0,0 +1,86 @@
+import {Evaluator, JSSTheme} from "@/lib/Types"
+import {ArrowRight} from "@phosphor-icons/react"
+import {Table, Tag, Typography} from "antd"
+import {ColumnsType} from "antd/es/table"
+import React from "react"
+import {createUseStyles} from "react-jss"
+
+interface CreateEvaluatorListProps {
+    evaluators: Evaluator[]
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    setCurrent: (value: React.SetStateAction<number>) => void
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    arrowIcon: {
+        opacity: 0,
+        transition: "opacity 0.3s",
+    },
+    evaluatorCardHover: {
+        "&:hover $arrowIcon": {
+            opacity: 1,
+        },
+    },
+}))
+
+const CreateEvaluatorList = ({
+    evaluators,
+    setSelectedEvaluator,
+    setCurrent,
+}: CreateEvaluatorListProps) => {
+    const classes = useStyles()
+
+    const columns: ColumnsType<Evaluator> = [
+        {
+            title: "Name",
+            dataIndex: "key",
+            key: "key",
+            width: 200,
+            render: (_, record) => {
+                return (
+                    <div className="h-[56px] flex items-center ">
+                        <Tag color={record.color}>{record.name}</Tag>
+                    </div>
+                )
+            },
+        },
+        {
+            title: "Description",
+            dataIndex: "description",
+            key: "description",
+            render: (_, record) => {
+                return (
+                    <div className="flex items-center gap-2">
+                        <Typography.Text className="flex-1" type="secondary">
+                            {record.description}
+                        </Typography.Text>
+
+                        <ArrowRight className={classes.arrowIcon} size={14} />
+                    </div>
+                )
+            },
+        },
+    ]
+    return (
+        <Table
+            columns={columns}
+            dataSource={evaluators}
+            bordered
+            rowKey={"key"}
+            className="ph-no-capture"
+            scroll={{x: true, y: 550}}
+            style={{cursor: "pointer"}}
+            onRow={(record) => ({
+                "data-cy": "new-evaluator-list",
+                className: classes.evaluatorCardHover,
+                onClick: () => {
+                    setSelectedEvaluator(record)
+                    setCurrent(2)
+                },
+            })}
+            pagination={false}
+        />
+    )
+}
+
+export default CreateEvaluatorList
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx
new file mode 100644
index 000000000..972c3efc2
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx
@@ -0,0 +1,167 @@
+import {Evaluator, JSSTheme} from "@/lib/Types"
+import {CloseOutlined} from "@ant-design/icons"
+import {ArrowLeft, Cards, Table} from "@phosphor-icons/react"
+import {Button, Divider, Flex, Input, Radio, Space, Typography} from "antd"
+import React, {useMemo, useState} from "react"
+import {createUseStyles} from "react-jss"
+import NewEvaluatorList from "./NewEvaluatorList"
+import NewEvaluatorCard from "./NewEvaluatorCard"
+import {getEvaluatorTags} from "@/lib/helpers/evaluate"
+
+type NewEvaluatorProps = {
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    handleOnCancel: () => void
+    evaluators: Evaluator[]
+    setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
+    setEvaluatorsDisplay: any
+    evaluatorsDisplay: string
+    setSelectedEvaluatorCategory: React.Dispatch<React.SetStateAction<string>>
+    selectedEvaluatorCategory: string
+}
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    title: {
+        display: "flex",
+        alignItems: "center",
+        justifyContent: "space-between",
+        "& .ant-typography": {
+            fontSize: theme.fontSizeHeading4,
+            fontWeight: theme.fontWeightStrong,
+            lineHeight: theme.lineHeightLG,
+        },
+    },
+    subTitle: {
+        fontSize: theme.fontSizeLG,
+        lineHeight: theme.lineHeightLG,
+        fontWeight: theme.fontWeightMedium,
+    },
+    radioBtnContainer: {
+        display: "flex",
+        alignItems: "center",
+        gap: theme.marginXS,
+        "& .ant-radio-button-wrapper": {
+            borderRadius: theme.borderRadius,
+            borderInlineStartWidth: "1px",
+            "&:before": {
+                width: 0,
+            },
+            "&:not(.ant-radio-button-wrapper-checked)": {
+                border: "none",
+                "&:hover": {
+                    backgroundColor: theme.colorBgTextHover,
+                },
+            },
+        },
+    },
+}))
+
+const NewEvaluator = ({
+    evaluators,
+    setCurrent,
+    handleOnCancel,
+    setSelectedEvaluator,
+    setEvaluatorsDisplay,
+    evaluatorsDisplay,
+    selectedEvaluatorCategory,
+    setSelectedEvaluatorCategory,
+}: NewEvaluatorProps) => {
+    const classes = useStyles()
+    const [searchTerm, setSearchTerm] = useState("")
+    const evaluatorTags = getEvaluatorTags()
+
+    const filteredEvaluators = useMemo(() => {
+        let filtered = evaluators
+
+        if (selectedEvaluatorCategory !== "view_all") {
+            filtered = filtered.filter((item) => item.tags.includes(selectedEvaluatorCategory))
+        }
+
+        if (searchTerm) {
+            filtered = filtered.filter((item) =>
+                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+            )
+        }
+
+        return filtered
+    }, [searchTerm, selectedEvaluatorCategory, evaluators])
+
+    return (
+        <div>
+            <div className="flex flex-col gap-4">
+                <div className={classes.title}>
+                    <Space>
+                        <Button
+                            icon={<ArrowLeft size={14} />}
+                            className="flex items-center justify-center"
+                            onClick={() => setCurrent(0)}
+                        />
+                        <Typography.Text>Step 1/2: Select new evaluator</Typography.Text>
+                    </Space>
+
+                    <Button onClick={handleOnCancel} type="text" icon={<CloseOutlined />} />
+                </div>
+                <div>
+                    <div className="flex items-center justify-between">
+                        <Radio.Group
+                            defaultValue={"view_all"}
+                            className={classes.radioBtnContainer}
+                            onChange={(e) => setSelectedEvaluatorCategory(e.target.value)}
+                        >
+                            <Radio.Button value={"view_all"}>View all</Radio.Button>
+                            <Divider type="vertical" className="h-7" />
+                            {evaluatorTags.map((val, idx) => (
+                                <Radio.Button key={idx} value={val.value}>
+                                    {val.label}
+                                </Radio.Button>
+                            ))}
+                        </Radio.Group>
+
+                        <Flex gap={8}>
+                            <Input.Search
+                                style={{width: 400}}
+                                onChange={(e) => setSearchTerm(e.target.value)}
+                                placeholder="Search"
+                                allowClear
+                            />
+                            {/* <Radio.Group
+                                defaultValue={evaluatorsDisplay}
+                                onChange={(e) => setEvaluatorsDisplay(e.target.value)}
+                            >
+                                <Radio.Button value="list">
+                                    <Table size={16} className="h-full" />
+                                </Radio.Button>
+                                <Radio.Button value="card">
+                                    <Cards size={16} className="h-full" />
+                                </Radio.Button>
+                            </Radio.Group> */}
+                        </Flex>
+                    </div>
+                </div>
+                <Divider className="mt-0 mb-4" />
+            </div>
+
+            <div>
+                {/* {evaluatorsDisplay === "list" ? (
+                    <NewEvaluatorList
+                        evaluators={filteredEvaluators}
+                        setSelectedEvaluator={setSelectedEvaluator}
+                        setCurrent={setCurrent}
+                    />
+                ) : (
+                    <NewEvaluatorCard
+                        evaluators={filteredEvaluators}
+                        setSelectedEvaluator={setSelectedEvaluator}
+                        setCurrent={setCurrent}
+                    />
+                )} */}
+                <NewEvaluatorList
+                    evaluators={filteredEvaluators}
+                    setSelectedEvaluator={setSelectedEvaluator}
+                    setCurrent={setCurrent}
+                />
+            </div>
+        </div>
+    )
+}
+
+export default NewEvaluator
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/EditColumns.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/EditColumns.tsx
new file mode 100644
index 000000000..e82b5b839
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/EditColumns.tsx
@@ -0,0 +1,106 @@
+import {_Evaluation, JSSTheme} from "@/lib/Types"
+import {Button, Dropdown, Space, Checkbox} from "antd"
+import React from "react"
+import {createUseStyles} from "react-jss"
+import {Columns} from "@phosphor-icons/react"
+import {ColumnsType} from "antd/es/table"
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    dropdownMenu: {
+        "&>.ant-dropdown-menu-item": {
+            "& .anticon-check": {
+                display: "none",
+            },
+        },
+        "&>.ant-dropdown-menu-item-selected": {
+            "&:not(:hover)": {
+                backgroundColor: "transparent !important",
+            },
+            "& .anticon-check": {
+                display: "inline-flex !important",
+            },
+        },
+    },
+    button: {
+        display: "flex",
+        alignItems: "center",
+    },
+}))
+
+export const generateEditItems = (columns: ColumnsType, editColumns: string[]) => {
+    return columns
+        .filter((col) => col.key !== "key")
+        .flatMap((col) => [
+            {
+                key: col.key,
+                label: (
+                    <Space>
+                        <Checkbox
+                            value={col.key}
+                            checked={editColumns.includes(col.key as string)}
+                        />
+                        {col.title as string}
+                    </Space>
+                ),
+            },
+            ...(("children" in col &&
+                col.children?.map((child) => ({
+                    key: child.key,
+                    label: (
+                        <Space className="ml-4">
+                            <Checkbox
+                                value={child.key}
+                                checked={editColumns.includes(child.key as string)}
+                            />
+                            {child.key as string}
+                        </Space>
+                    ),
+                }))) ||
+                []),
+        ])
+}
+
+interface EditColumnsProps {
+    isOpen: boolean
+    handleOpenChange: (
+        open: boolean,
+        info: {
+            source: "trigger" | "menu"
+        },
+    ) => void
+    shownCols: string[]
+    items: any
+    onClick: ({key}: {key: string}) => void
+    buttonText?: string
+}
+
+const EditColumns = ({
+    isOpen,
+    handleOpenChange,
+    shownCols,
+    items,
+    onClick,
+    buttonText,
+}: EditColumnsProps) => {
+    const classes = useStyles()
+
+    return (
+        <Dropdown
+            trigger={["click"]}
+            open={isOpen}
+            onOpenChange={handleOpenChange}
+            menu={{
+                selectedKeys: shownCols,
+                items,
+                onClick,
+                className: classes.dropdownMenu,
+            }}
+        >
+            <Button icon={<Columns size={14} />} className={classes.button}>
+                Edit columns
+            </Button>
+        </Dropdown>
+    )
+}
+
+export default EditColumns
diff --git a/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx b/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx
new file mode 100644
index 000000000..ddcc2db2f
--- /dev/null
+++ b/agenta-web/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx
@@ -0,0 +1,77 @@
+import {_Evaluation, EvaluationStatus} from "@/lib/Types"
+import {Input, TableColumnType, DatePicker} from "antd"
+import {FilterDropdownProps} from "antd/es/table/interface"
+import dayjs from "dayjs"
+import {statusMapper} from "@/components/pages/evaluations/cellRenderers/cellRenderers"
+
+type DataIndex = keyof _Evaluation
+
+type CellDataType = "number" | "text" | "date"
+
+export function getFilterParams(
+    dataIndex: DataIndex,
+    type: CellDataType,
+): TableColumnType<_Evaluation> {
+    const filterDropdown = ({setSelectedKeys, selectedKeys, confirm}: FilterDropdownProps) => {
+        return (
+            <div className="p-2" onKeyDown={(e) => e.stopPropagation()}>
+                {type === "date" ? (
+                    <DatePicker
+                        value={selectedKeys[0] ? dayjs(selectedKeys[0] as string) : null}
+                        onChange={(_, dateString: any) => {
+                            setSelectedKeys(dateString ? [dateString] : [])
+                            confirm()
+                        }}
+                    />
+                ) : (
+                    <Input
+                        placeholder={`Search ${dataIndex}`}
+                        value={selectedKeys[0]}
+                        onChange={(e) => {
+                            setSelectedKeys(e.target.value ? [e.target.value] : [])
+                            confirm({closeDropdown: false})
+                        }}
+                        style={{display: "block"}}
+                        step={0.1}
+                        type={type}
+                    />
+                )}
+            </div>
+        )
+    }
+
+    const onFilter = (value: any, record: any) => {
+        try {
+            const cellValue = record[dataIndex]
+
+            if (type === "date") {
+                return dayjs(cellValue).isSame(dayjs(value), "day")
+            }
+            if (dataIndex === "status") {
+                const statusLabel = statusMapper({} as any)(record.status.value as EvaluationStatus)
+                    .label as EvaluationStatus
+                return statusLabel.toLowerCase().includes(value.toLowerCase())
+            }
+
+            if (typeof cellValue === "object" && cellValue !== null) {
+                if (Array.isArray(cellValue)) {
+                    return cellValue.some((item) =>
+                        item.variantName?.toLowerCase().includes(value.toLowerCase()),
+                    )
+                } else if (cellValue.hasOwnProperty("name")) {
+                    return cellValue.name.toString().toLowerCase().includes(value.toLowerCase())
+                } else if (cellValue.hasOwnProperty("value")) {
+                    return cellValue.value.toString().toLowerCase().includes(value.toLowerCase())
+                }
+            }
+            return cellValue?.toString().toLowerCase().includes(value.toLowerCase())
+        } catch (error) {
+            console.error(error)
+        }
+    }
+
+    return {
+        filterDropdown,
+        onFilter,
+    }
+}
diff --git a/agenta-web/src/components/pages/overview/automaticEvaluation/StatusRenderer.tsx b/agenta-web/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
similarity index 87%
rename from agenta-web/src/components/pages/overview/automaticEvaluation/StatusRenderer.tsx
rename to agenta-web/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
index 107498527..b1f1f6e8d 100644
--- a/agenta-web/src/components/pages/overview/automaticEvaluation/StatusRenderer.tsx
+++ b/agenta-web/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
@@ -4,7 +4,7 @@ import {InfoCircleOutlined} from "@ant-design/icons"
 import {theme, Tooltip, Typography} from "antd"
 import React from "react"
 import {createUseStyles} from "react-jss"
-import {runningStatuses, statusMapper} from "../../evaluations/cellRenderers/cellRenderers"
+import {runningStatuses, statusMapper} from "./cellRenderers"
 
 const useStyles = createUseStyles((theme: JSSTheme) => ({
     statusCell: {
@@ -35,15 +35,14 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({
 const StatusRenderer = (record: _Evaluation) => {
     const classes = useStyles()
     const {token} = theme.useToken()
-    const value = statusMapper(token)(record.status.value as EvaluationStatus)
-        .label as EvaluationStatus
+    const value = record.status.value
     const duration = useDurationCounter(record.duration || 0, runningStatuses.includes(value))
     const {label, color} = statusMapper(token)(record.status.value as EvaluationStatus)
     const errorMsg = record.status.error?.message
     const errorStacktrace = record.status.error?.stacktrace
 
     return (
-        <Typography.Text className={classes.statusCell}>
+        <Typography.Text className={classes.statusCell} data-cy="evaluation-status-cell">
             <div style={{backgroundColor: color}} />
             <span>{label}</span>
             {errorMsg && (
diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EmptyEvaluations.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EmptyEvaluations.tsx
deleted file mode 100644
index 64de7a3f5..000000000
--- a/agenta-web/src/components/pages/evaluations/evaluationResults/EmptyEvaluations.tsx
+++ /dev/null
@@ -1,89 +0,0 @@
-import {JSSTheme} from "@/lib/Types"
-import {PlusCircleOutlined, SlidersOutlined} from "@ant-design/icons"
-import {Button, Empty, Space, Tooltip, Typography} from "antd"
-import Image from "next/image"
-import React from "react"
-import {createUseStyles} from "react-jss"
-import evaluationIllustration from "@/media/eval-illustration.png"
-
-const useStyles = createUseStyles((theme: JSSTheme) => ({
-    emptyRoot: {
-        height: "calc(100vh - 260px)",
-        display: "grid",
-        placeItems: "center",
-    },
-    empty: {
-        "& .ant-empty-description": {
-            fontSize: 18,
-            marginTop: "0.75rem",
-            marginBottom: "1.5rem",
-        },
-    },
-    emptyImg: {
-        width: 120,
-        height: 120,
-        objectFit: "contain",
-        filter: theme.isDark ? "invert(1)" : "none",
-        opacity: 0.85,
-    },
-}))
-
-interface Props {
-    onConfigureEvaluators?: () => void
-    onBeginEvaluation?: () => void
-}
-
-const EmptyEvaluations: React.FC<Props> = ({onConfigureEvaluators, onBeginEvaluation}) => {
-    const classes = useStyles()
-
-    return (
-        <div className={classes.emptyRoot}>
-            <Empty
-                className={classes.empty}
-                description={
-                    <span>
-                        Get Started with Your First Evaluation
-                        <br />
-                    </span>
-                }
-                image={
-                    <Image
-                        className={classes.emptyImg}
-                        alt="no evaluation illustration"
-                        src={evaluationIllustration}
-                    />
-                }
-            >
-                <Space direction="vertical">
-                    <Tooltip title="Select and customize evaluators such as custom code or regex evaluators.">
-                        <Button
-                            size="large"
-                            icon={<SlidersOutlined />}
-                            type="primary"
-                            onClick={onConfigureEvaluators}
-                        >
-                            Configure Your Evaluators
-                        </Button>
-                    </Tooltip>
-                    <Typography.Text>Or</Typography.Text>
-                    <Tooltip
-                        title="Choose your variants and evaluators to start the evaluation process."
-                        placement="bottom"
-                    >
-                        <Button
-                            size="large"
-                            icon={<PlusCircleOutlined />}
-                            type="default"
-                            onClick={onBeginEvaluation}
-                            data-cy={"new-evaluation-button__no_variants"}
-                        >
-                            Begin Evaluation Now
-                        </Button>
-                    </Tooltip>
-                </Space>
-            </Empty>
-        </div>
-    )
-}
-
-export default EmptyEvaluations
diff --git a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx b/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx
deleted file mode 100644
index f9f6f524b..000000000
--- a/agenta-web/src/components/pages/evaluations/evaluationResults/EvaluationResults.tsx
+++ /dev/null
@@ -1,540 +0,0 @@
-import React, {useEffect, useMemo, useRef, useState} from "react"
-import {AgGridReact} from "ag-grid-react"
-import {useAppTheme} from "@/components/Layout/ThemeContextProvider"
-import {ColDef, ValueGetterParams} from "ag-grid-community"
-import {createUseStyles} from "react-jss"
-import {Button, DropdownProps, Space, Spin, Tag, Tooltip, Typography, theme} from "antd"
-import {
-    DeleteOutlined,
-    DownloadOutlined,
-    PlusCircleOutlined,
-    SlidersOutlined,
-    SwapOutlined,
-} from "@ant-design/icons"
-import {EvaluationStatus, GenericObject, JSSTheme, _Evaluation} from "@/lib/Types"
-import {uniqBy} from "lodash"
-import dayjs from "dayjs"
-import relativeTime from "dayjs/plugin/relativeTime"
-import duration from "dayjs/plugin/duration"
-import NewEvaluationModal from "./NewEvaluationModal"
-import {useAppId} from "@/hooks/useAppId"
-import {
-    deleteEvaluations,
-    fetchAllEvaluations,
-    fetchEvaluationStatus,
-} from "@/services/evaluations/api"
-import {useUpdateEffect} from "usehooks-ts"
-import {shortPoll} from "@/lib/helpers/utils"
-import AlertPopup from "@/components/AlertPopup/AlertPopup"
-import {
-    DateFromNowRenderer,
-    LinkCellRenderer,
-    StatusRenderer,
-    runningStatuses,
-    statusMapper,
-} from "../cellRenderers/cellRenderers"
-import {useAtom} from "jotai"
-import {evaluatorsAtom} from "@/lib/atoms/evaluation"
-import AgCustomHeader from "@/components/AgCustomHeader/AgCustomHeader"
-import {useRouter} from "next/router"
-import EmptyEvaluations from "./EmptyEvaluations"
-import {calcEvalDuration, getFilterParams, getTypedValue} from "@/lib/helpers/evaluate"
-import Link from "next/link"
-import FilterColumns, {generateFilterItems} from "../FilterColumns/FilterColumns"
-import {variantNameWithRev} from "@/lib/helpers/variantHelper"
-import {getAppValues} from "@/contexts/app.context"
-import {convertToCsv, downloadCsv} from "@/lib/helpers/fileManipulations"
-import {formatDate24} from "@/lib/helpers/dateTimeHelper"
-import {useQueryParam} from "@/hooks/useQuery"
-
-dayjs.extend(relativeTime)
-dayjs.extend(duration)
-
-const useStyles = createUseStyles((theme: JSSTheme) => ({
-    root: {
-        display: "flex",
-        flexDirection: "column",
-        gap: "1rem",
-    },
-    table: {
-        height: "calc(100vh - 260px)",
-    },
-    buttonsGroup: {
-        marginTop: "1rem",
-        alignSelf: "flex-end",
-    },
-    dropdownMenu: {
-        "&>.ant-dropdown-menu-item": {
-            "& .anticon-check": {
-                display: "none",
-            },
-        },
-        "&>.ant-dropdown-menu-item-selected": {
-            "&:not(:hover)": {
-                backgroundColor: "transparent !important",
-            },
-            "& .anticon-check": {
-                display: "inline-flex !important",
-            },
-        },
-    },
-}))
-
-interface Props {}
-
-const EvaluationResults: React.FC<Props> = () => {
-    const {appTheme} = useAppTheme()
-    const classes = useStyles()
-    const appId = useAppId()
-    const [evaluations, setEvaluations] = useState<_Evaluation[]>([])
-    const [evaluators] = useAtom(evaluatorsAtom)
-    const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
-    const [queryNewEvalModalOpen, setQueryNewEvalModalOpen] =
-        useQueryParam("openNewEvaluationModal")
-    const [fetching, setFetching] = useState(false)
-    const [selected, setSelected] = useState<_Evaluation[]>([])
-    const stoppers = useRef<Function>()
-    const router = useRouter()
-    const {token} = theme.useToken()
-    const gridRef = useRef<AgGridReact>()
-    const [hiddenCols, setHiddenCols] = useState<string[]>([])
-    const [isFilterColsDropdownOpen, setIsFilterColsDropdownOpen] = useState(false)
-
-    const runningEvaluationIds = useMemo(
-        () =>
-            evaluations
-                .filter((item) => runningStatuses.includes(item.status.value))
-                .map((item) => item.id),
-        [evaluations],
-    )
-
-    const onDelete = () => {
-        AlertPopup({
-            title: "Delete Evaluations",
-            message: `Are you sure you want to delete all ${selected.length} selected evaluations?`,
-            onOk: () =>
-                deleteEvaluations(selected.map((item) => item.id))
-                    .catch(console.error)
-                    .then(fetcher),
-        })
-    }
-
-    const fetcher = () => {
-        setFetching(true)
-        fetchAllEvaluations(appId)
-            .then(setEvaluations)
-            .catch(console.error)
-            .finally(() => setFetching(false))
-    }
-
-    useEffect(() => {
-        fetcher()
-    }, [appId])
-
-    //update status of running evaluations through short polling
-    useUpdateEffect(() => {
-        stoppers.current?.()
-
-        if (runningEvaluationIds.length) {
-            stoppers.current = shortPoll(
-                () =>
-                    Promise.all(runningEvaluationIds.map((id) => fetchEvaluationStatus(id)))
-                        .then((res) => {
-                            setEvaluations((prev) => {
-                                const newEvals = [...prev]
-                                runningEvaluationIds.forEach((id, ix) => {
-                                    const index = newEvals.findIndex((e) => e.id === id)
-                                    if (index !== -1) {
-                                        newEvals[index].status = res[ix].status
-                                        newEvals[index].duration = calcEvalDuration(newEvals[index])
-                                    }
-                                })
-                                if (
-                                    res.some((item) => !runningStatuses.includes(item.status.value))
-                                )
-                                    fetcher()
-                                return newEvals
-                            })
-                        })
-                        .catch(console.error),
-                {delayMs: 2000, timeoutMs: Infinity},
-            ).stopper
-        }
-
-        return () => {
-            stoppers.current?.()
-        }
-    }, [JSON.stringify(runningEvaluationIds)])
-
-    const evaluatorConfigs = useMemo(
-        () =>
-            uniqBy(
-                evaluations
-                    .map((item) =>
-                        item.aggregated_results.map((item) => ({
-                            ...item.evaluator_config,
-                            evaluator: evaluators.find(
-                                (e) => e.key === item.evaluator_config.evaluator_key,
-                            ),
-                        })),
-                    )
-                    .flat(),
-                "id",
-            ),
-        [evaluations, evaluators],
-    )
-
-    const compareDisabled = useMemo(
-        () =>
-            selected.length < 2 ||
-            selected.some(
-                (item) =>
-                    item.status.value === EvaluationStatus.STARTED ||
-                    item.status.value === EvaluationStatus.INITIALIZED ||
-                    item.testset.id !== selected[0].testset.id,
-            ),
-        [selected],
-    )
-
-    const colDefs = useMemo(() => {
-        const colDefs: ColDef<_Evaluation>[] = [
-            {
-                field: "variants",
-                flex: 1,
-                minWidth: 160,
-                pinned: "left",
-                headerCheckboxSelection: true,
-                hide: hiddenCols.includes("Variant"),
-                checkboxSelection: true,
-                showDisabledCheckboxes: true,
-                cellRenderer: (params: any) => {
-                    const {revisions, variants} = params.data
-                    return (
-                        <Link
-                            href={`/apps/${appId}/playground?variant=${variants[0].variantName}&revision=${revisions[0]}`}
-                        >
-                            {params.value}
-                        </Link>
-                    )
-                },
-                onCellClicked(params: any) {
-                    const {revisions, variants} = params.data
-                    router.push(
-                        `/apps/${appId}/playground?variant=${variants[0].variantName}&revision=${revisions[0]}`,
-                    )
-                },
-                valueGetter: (params) =>
-                    variantNameWithRev({
-                        variant_name: params.data?.variants[0].variantName ?? "",
-                        revision: params.data?.revisions[0],
-                    }),
-                headerName: "Variant",
-                tooltipValueGetter: (params) => params.data?.variants[0].variantName,
-                ...getFilterParams("text"),
-            },
-            {
-                field: "testset.name",
-                hide: hiddenCols.includes("Testset"),
-                headerName: "Testset",
-                cellRenderer: (params: any) => (
-                    <LinkCellRenderer
-                        {...params}
-                        href={`/apps/${appId}/testsets/${params.data?.testset.id}`}
-                    />
-                ),
-                flex: 1,
-                minWidth: 160,
-                tooltipValueGetter: (params) => params.value,
-                ...getFilterParams("text"),
-                onCellClicked(params) {
-                    router.push(`/apps/${appId}/testsets/${params.data?.testset.id}`)
-                },
-            },
-            ...evaluatorConfigs.map(
-                (config) =>
-                    ({
-                        flex: 1,
-                        minWidth: 190,
-                        hide: hiddenCols.includes(config.name),
-                        field: "aggregated_results",
-                        headerName: config.name,
-                        headerComponent: (props: any) => (
-                            <AgCustomHeader {...props}>
-                                <Space
-                                    direction="vertical"
-                                    size="small"
-                                    style={{padding: "0.75rem 0"}}
-                                >
-                                    <Space size="small">
-                                        <SlidersOutlined />
-                                        <span>{config.name}</span>
-                                    </Space>
-                                    <Tag color={config.evaluator?.color}>
-                                        {config.evaluator?.name}
-                                    </Tag>
-                                </Space>
-                            </AgCustomHeader>
-                        ),
-                        autoHeaderHeight: true,
-                        ...getFilterParams("number"),
-                        cellRenderer: (params: ValueGetterParams<_Evaluation, any>) => {
-                            const result = params.data?.aggregated_results.find(
-                                (item) => item.evaluator_config.id === config.id,
-                            )?.result
-
-                            return result?.error ? (
-                                <Typography.Text type="danger" strong>
-                                    Error
-                                </Typography.Text>
-                            ) : (
-                                <Typography.Text>{getTypedValue(result)}</Typography.Text>
-                            )
-                        },
-                        valueGetter: (params) =>
-                            getTypedValue(
-                                params.data?.aggregated_results.find(
-                                    (item) => item.evaluator_config.id === config.id,
-                                )?.result,
-                            ),
-                        tooltipValueGetter: (params) =>
-                            params.data?.aggregated_results
-                                .find((item) => item.evaluator_config.id === config.id)
-                                ?.result?.value?.toString() || "",
-                    }) as ColDef<_Evaluation>,
-            ),
-            {
-                flex: 1,
-                headerName: "Status",
-                hide: hiddenCols.includes("Status"),
-                field: "status",
-                minWidth: 185,
-                pinned: "right",
-                ...getFilterParams("text"),
-                filterValueGetter: (params) =>
-                    statusMapper(token)(params.data?.status.value as EvaluationStatus).label,
-                cellRenderer: StatusRenderer,
-                valueGetter: (params) =>
-                    statusMapper(token)(params.data?.status.value as EvaluationStatus).label,
-            },
-            {
-                flex: 1,
-                field: "average_latency",
-                headerName: "Avg. Latency",
-                hide: hiddenCols.includes("Latency"),
-                minWidth: 120,
-                ...getFilterParams("number"),
-                valueGetter: (params) => getTypedValue(params?.data?.average_latency),
-            },
-            {
-                flex: 1,
-                field: "total_cost",
-                headerName: "Total Cost",
-                hide: hiddenCols.includes("Cost"),
-                minWidth: 120,
-                ...getFilterParams("number"),
-                valueGetter: (params) => getTypedValue(params?.data?.total_cost),
-            },
-            {
-                flex: 1,
-                field: "created_at",
-                headerName: "Created",
-                hide: hiddenCols.includes("Created"),
-                minWidth: 160,
-                ...getFilterParams("date"),
-                cellRenderer: DateFromNowRenderer,
-                sort: "desc",
-                valueFormatter: (params) => formatDate24(params.value),
-            },
-        ]
-        return colDefs
-    }, [evaluatorConfigs, hiddenCols, appId, router, token])
-
-    const compareBtnNode = (
-        <Button
-            disabled={compareDisabled}
-            icon={<SwapOutlined />}
-            type="primary"
-            data-cy="evaluation-results-compare-button"
-            onClick={() =>
-                router.push(
-                    `/apps/${appId}/evaluations/results/compare/?evaluations=${selected
-                        .map((item) => item.id)
-                        .join(",")}`,
-                )
-            }
-        >
-            Compare
-        </Button>
-    )
-    const onToggleEvaluatorVisibility = (evalConfigId: string) => {
-        if (!hiddenCols.includes(evalConfigId)) {
-            setHiddenCols([...hiddenCols, evalConfigId])
-        } else {
-            setHiddenCols(hiddenCols.filter((item) => item !== evalConfigId))
-        }
-    }
-
-    const shownCols = useMemo(
-        () =>
-            colDefs
-                .map((item) => item.headerName)
-                .filter((item) => item !== undefined && !hiddenCols.includes(item)) as string[],
-        [colDefs, hiddenCols],
-    )
-
-    const handleOpenChangeFilterCols: DropdownProps["onOpenChange"] = (nextOpen, info) => {
-        if (info.source === "trigger" || nextOpen) {
-            setIsFilterColsDropdownOpen(nextOpen)
-        }
-    }
-
-    const onExport = () => {
-        if (!gridRef.current) return
-        const {currentApp} = getAppValues()
-        const filename = `${currentApp?.app_name}_evaluation_scenarios.csv`
-        if (!!selected.length) {
-            const csvData = convertToCsv(
-                selected.map((item) => ({
-                    Variant: variantNameWithRev({
-                        variant_name: item.variants[0].variantName ?? "",
-                        revision: item.revisions[0],
-                    }),
-                    Testset: item.testset.name,
-                    ...item.aggregated_results.reduce((acc, curr) => {
-                        if (!acc[curr.evaluator_config.name]) {
-                            acc[curr.evaluator_config.name] = getTypedValue(curr.result)
-                        }
-                        return acc
-                    }, {} as GenericObject),
-                    "Avg. Latency": getTypedValue(item.average_latency),
-                    "Total Cost": getTypedValue(item.average_cost),
-                    Created: formatDate24(item.created_at),
-                    Status: statusMapper(token)(item.status.value as EvaluationStatus).label,
-                })),
-                colDefs.map((col) => col.headerName!),
-            )
-            downloadCsv(csvData, filename)
-        } else {
-            gridRef.current.api.exportDataAsCsv({
-                fileName: filename,
-            })
-        }
-    }
-    return (
-        <>
-            {!fetching && !evaluations.length ? (
-                <EmptyEvaluations
-                    onConfigureEvaluators={() =>
-                        router.push(`/apps/${appId}/evaluations/new-evaluator`)
-                    }
-                    onBeginEvaluation={() => {
-                        setNewEvalModalOpen(true)
-                    }}
-                />
-            ) : (
-                <div className={classes.root}>
-                    <Space className={classes.buttonsGroup}>
-                        <Button
-                            disabled={selected.length === 0}
-                            icon={<DeleteOutlined />}
-                            type="primary"
-                            data-cy="evaluation-results-delete-button"
-                            danger
-                            onClick={onDelete}
-                        >
-                            Delete
-                        </Button>
-                        {compareDisabled ? (
-                            <Tooltip title="Select 2 or more evaluations from the same testset to compare">
-                                {compareBtnNode}
-                            </Tooltip>
-                        ) : (
-                            compareBtnNode
-                        )}
-                        <Button
-                            icon={<PlusCircleOutlined />}
-                            type="primary"
-                            onClick={() => {
-                                setNewEvalModalOpen(true)
-                            }}
-                            data-cy="new-evaluation-button"
-                        >
-                            New Evaluation
-                        </Button>
-                    </Space>
-
-                    <Space className={classes.buttonsGroup}>
-                        <FilterColumns
-                            items={generateFilterItems(colDefs)}
-                            isOpen={isFilterColsDropdownOpen}
-                            handleOpenChange={handleOpenChangeFilterCols}
-                            shownCols={shownCols}
-                            onClick={({key}) => {
-                                onToggleEvaluatorVisibility(key)
-                                setIsFilterColsDropdownOpen(true)
-                            }}
-                        />
-                        <Button onClick={onExport} icon={<DownloadOutlined />}>
-                            {!!selected.length ? `Export (${selected.length})` : "Export All"}
-                        </Button>
-                    </Space>
-
-                    <Spin spinning={fetching}>
-                        <div
-                            className={`${
-                                appTheme === "dark" ? "ag-theme-alpine-dark" : "ag-theme-alpine"
-                            } ${classes.table}`}
-                        >
-                            <AgGridReact<_Evaluation>
-                                ref={gridRef as any}
-                                rowData={evaluations}
-                                columnDefs={colDefs}
-                                rowStyle={{
-                                    cursor: "pointer",
-                                }}
-                                getRowId={(params) => params.data.id}
-                                onRowClicked={(params) => {
-                                    // ignore clicks on the checkbox col
-                                    if (
-                                        params.eventPath?.find(
-                                            (item: any) => item.ariaColIndex === "1",
-                                        )
-                                    )
-                                        return
-                                    ;(EvaluationStatus.FINISHED === params.data?.status.value ||
-                                        EvaluationStatus.FINISHED_WITH_ERRORS ===
-                                            params.data?.status.value ||
-                                        EvaluationStatus.AGGREGATION_FAILED ===
-                                            params.data?.status.value) &&
-                                        router.push(
-                                            `/apps/${appId}/evaluations/results/${params.data?.id}`,
-                                        )
-                                }}
-                                rowSelection="multiple"
-                                suppressRowClickSelection
-                                onSelectionChanged={(event) =>
-                                    setSelected(event.api.getSelectedRows())
-                                }
-                                tooltipShowDelay={0}
-                            />
-                        </div>
-                    </Spin>
-                </div>
-            )}
-            <NewEvaluationModal
-                open={queryNewEvalModalOpen === "open" || newEvalModalOpen}
-                onCancel={() => {
-                    setNewEvalModalOpen(false)
-                    setQueryNewEvalModalOpen("")
-                }}
-                onSuccess={() => {
-                    setNewEvalModalOpen(false)
-                    setQueryNewEvalModalOpen("")
-                    fetcher()
-                }}
-            />
-        </>
-    )
-}
-
-export default EvaluationResults
diff --git a/agenta-web/src/components/pages/evaluations/evaluators/EvaluatorCard.tsx b/agenta-web/src/components/pages/evaluations/evaluators/EvaluatorCard.tsx
deleted file mode 100644
index 85bc2c83d..000000000
--- a/agenta-web/src/components/pages/evaluations/evaluators/EvaluatorCard.tsx
+++ /dev/null
@@ -1,126 +0,0 @@
-import React from "react"
-import {EvaluatorConfig, JSSTheme} from "@/lib/Types"
-import {DeleteOutlined, EditOutlined} from "@ant-design/icons"
-import {Card, Tag, Typography} from "antd"
-import {createUseStyles} from "react-jss"
-import dayjs from "dayjs"
-import Image from "next/image"
-import AlertPopup from "@/components/AlertPopup/AlertPopup"
-import {deleteEvaluatorConfig} from "@/services/evaluations/api"
-import {useAtom} from "jotai"
-import {evaluatorsAtom} from "@/lib/atoms/evaluation"
-import {checkIfResourceValidForDeletion} from "@/lib/helpers/evaluate"
-import ResultComponent from "@/components/ResultComponent/ResultComponent"
-
-const useStyles = createUseStyles((theme: JSSTheme) => ({
-    card: {
-        display: "flex",
-        flexDirection: "column",
-        "& .ant-card-body": {
-            padding: "1.25rem 0.75rem 1rem 1rem",
-            flex: 1,
-        },
-    },
-    body: {
-        display: "flex",
-        flexDirection: "column",
-        alignItems: "center",
-    },
-    headerRow: {
-        display: "flex",
-        alignItems: "center",
-        alignSelf: "stretch",
-        justifyContent: "space-between",
-        marginBottom: "1.5rem",
-    },
-    evaluationImg: {
-        width: 32,
-        height: 32,
-        marginRight: "8px",
-        filter: theme.isDark ? "invert(1)" : "none",
-    },
-    name: {
-        marginTop: "0.5rem",
-        marginBottom: "0 !important",
-        fontWeight: "500 !important",
-        fontSize: "1rem",
-    },
-    date: {
-        fontSize: "0.75rem",
-        color: "#8c8c8c",
-    },
-}))
-
-interface Props {
-    evaluatorConfig: EvaluatorConfig
-    onEdit?: () => void
-    onSuccessDelete?: () => void
-}
-
-const EvaluatorCard: React.FC<Props> = ({evaluatorConfig, onEdit, onSuccessDelete}) => {
-    const classes = useStyles()
-    const [evaluators] = useAtom(evaluatorsAtom)
-    const evaluator = evaluators.find((item) => item.key === evaluatorConfig.evaluator_key)!
-
-    const onDelete = async () => {
-        AlertPopup({
-            title: "Delete evaluator",
-            message: "Are you sure you want to delete this evaluator?",
-            onOk: async () => {
-                if (
-                    !(await checkIfResourceValidForDeletion({
-                        resourceType: "evaluator_config",
-                        resourceIds: [evaluatorConfig.id],
-                    }))
-                )
-                    return
-                try {
-                    await deleteEvaluatorConfig(evaluatorConfig.id)
-                    onSuccessDelete?.()
-                } catch (error) {}
-            },
-        })
-    }
-
-    if (!evaluator) {
-        return null
-    }
-
-    return (
-        <Card
-            className={classes.card}
-            actions={[
-                <EditOutlined key="edit" data-cy="evaluator-card-edit-button" onClick={onEdit} />,
-                <DeleteOutlined
-                    key="delete"
-                    data-cy="evaluator-card-delete-button"
-                    onClick={onDelete}
-                />,
-            ]}
-            data-cy="evaluator-card"
-        >
-            <div className={classes.body}>
-                <div className={classes.headerRow}>
-                    <Typography.Text className={classes.date}>
-                        {dayjs(evaluatorConfig.created_at).format("DD MMM YY")}
-                    </Typography.Text>
-                    <Tag color={evaluator.color}>{evaluator.name}</Tag>
-                </div>
-
-                {evaluator.icon_url && (
-                    <Image
-                        src={evaluator.icon_url}
-                        alt="Exact match"
-                        className={classes.evaluationImg}
-                    />
-                )}
-
-                <Typography.Title className={classes.name} level={4}>
-                    {evaluatorConfig.name}
-                </Typography.Title>
-            </div>
-        </Card>
-    )
-}
-
-export default EvaluatorCard
diff --git a/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx b/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx
deleted file mode 100644
index d02fa6a56..000000000
--- a/agenta-web/src/components/pages/evaluations/evaluators/Evaluators.tsx
+++ /dev/null
@@ -1,119 +0,0 @@
-import React, {useMemo, useState} from "react"
-import {createUseStyles} from "react-jss"
-import EvaluatorCard from "./EvaluatorCard"
-import {Button, Empty, Input, Space, Spin} from "antd"
-import {PlusCircleOutlined} from "@ant-design/icons"
-import NewEvaluatorModal from "./NewEvaluatorModal"
-import {useAppId} from "@/hooks/useAppId"
-import {fetchAllEvaluatorConfigs} from "@/services/evaluations/api"
-import {useAtom} from "jotai"
-import {evaluatorConfigsAtom} from "@/lib/atoms/evaluation"
-import {JSSTheme} from "@/lib/Types"
-
-const useStyles = createUseStyles((theme: JSSTheme) => ({
-    root: {
-        display: "flex",
-        flexDirection: "column",
-    },
-    buttonsGroup: {
-        justifyContent: "flex-end",
-        width: "100%",
-        padding: "1rem 0",
-        position: "sticky",
-        top: 46,
-        zIndex: 1,
-        backgroundColor: theme.colorBgContainer,
-    },
-    grid: {
-        display: "grid",
-        gridTemplateColumns: "repeat(auto-fill, minmax(min(260px, 100%), 1fr))",
-        gap: "1rem",
-    },
-}))
-
-interface Props {}
-
-const Evaluators: React.FC<Props> = () => {
-    const classes = useStyles()
-    const appId = useAppId()
-    const [evaluatorConfigs, setEvaluatorConfigs] = useAtom(evaluatorConfigsAtom)
-    const [newEvalModalOpen, setNewEvalModalOpen] = useState(false)
-    const [newEvalModalConfigOpen, setNewEvalModalConfigOpen] = useState(false)
-    const [editIndex, setEditIndex] = useState<number>(-1)
-    const [fetching, setFetching] = useState(false)
-    const [searchTerm, setSearchTerm] = useState<string>("")
-
-    const fetcher = () => {
-        setFetching(true)
-        fetchAllEvaluatorConfigs(appId)
-            .then(setEvaluatorConfigs)
-            .catch(console.error)
-            .finally(() => setFetching(false))
-    }
-
-    const filtered = useMemo(() => {
-        if (!searchTerm) return evaluatorConfigs
-        return evaluatorConfigs.filter((item) =>
-            item.name.toLowerCase().includes(searchTerm.toLowerCase()),
-        )
-    }, [searchTerm, evaluatorConfigs])
-
-    return (
-        <div className={classes.root}>
-            <Space className={classes.buttonsGroup}>
-                <Input.Search
-                    value={searchTerm}
-                    onChange={(e) => setSearchTerm(e.target.value)}
-                    placeholder="Search"
-                    allowClear
-                    enterButton
-                />
-                <Button
-                    icon={<PlusCircleOutlined />}
-                    type="primary"
-                    onClick={() => {
-                        setEditIndex(-1)
-                        setNewEvalModalOpen(true)
-                    }}
-                >
-                    New Evaluator
-                </Button>
-            </Space>
-            <Spin spinning={fetching}>
-                {!fetching && !evaluatorConfigs.length ? (
-                    <Empty description="No evaluators yet" style={{marginTop: "4rem"}} />
-                ) : (
-                    <div className={classes.grid}>
-                        {filtered.map((item, ix) => (
-                            <EvaluatorCard
-                                key={item.id}
-                                evaluatorConfig={item}
-                                onEdit={() => {
-                                    setEditIndex(ix)
-                                    setNewEvalModalConfigOpen(true)
-                                }}
-                                onSuccessDelete={fetcher}
-                            />
-                        ))}
-                    </div>
-                )}
-            </Spin>
-
-            <NewEvaluatorModal
-                open={newEvalModalOpen}
-                onSuccess={() => {
-                    setNewEvalModalOpen(false)
-                    setNewEvalModalConfigOpen(false)
-                    fetcher()
-                }}
-                newEvalModalConfigOpen={newEvalModalConfigOpen}
-                setNewEvalModalConfigOpen={setNewEvalModalConfigOpen}
-                setNewEvalModalOpen={setNewEvalModalOpen}
-                editMode={editIndex !== -1}
-                initialValues={evaluatorConfigs[editIndex]}
-            />
-        </div>
-    )
-}
-
-export default Evaluators
diff --git a/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx b/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx
deleted file mode 100644
index 049ebcdc3..000000000
--- a/agenta-web/src/components/pages/evaluations/evaluators/NewEvaluatorModal.tsx
+++ /dev/null
@@ -1,467 +0,0 @@
-import {useAppTheme} from "@/components/Layout/ThemeContextProvider"
-import {useAppId} from "@/hooks/useAppId"
-import {EvaluationSettingsTemplate, Evaluator, EvaluatorConfig, JSSTheme} from "@/lib/Types"
-import {evaluatorsAtom} from "@/lib/atoms/evaluation"
-import {isValidRegex} from "@/lib/helpers/validators"
-import {
-    CreateEvaluationConfigData,
-    createEvaluatorConfig,
-    updateEvaluatorConfig,
-} from "@/services/evaluations/api"
-import {ArrowLeftOutlined, EditOutlined, InfoCircleOutlined, PlusOutlined} from "@ant-design/icons"
-import {Editor} from "@monaco-editor/react"
-import {Button, Form, Input, InputNumber, Modal, Switch, Table, Tooltip, message, theme} from "antd"
-import {Rule} from "antd/es/form"
-import {useAtom} from "jotai"
-import Image from "next/image"
-import Link from "next/link"
-import React, {useEffect, useMemo, useState} from "react"
-import {createUseStyles} from "react-jss"
-import {ColumnsType} from "antd/es/table"
-import AdvancedSettings from "./AdvancedSettings"
-
-const useStyles = createUseStyles((theme: JSSTheme) => ({
-    label: {
-        display: "flex",
-        alignItems: "center",
-        gap: "0.5rem",
-    },
-    evaluationImg: {
-        width: 20,
-        height: 20,
-        marginRight: "8px",
-        filter: theme.isDark ? "invert(1)" : "none",
-    },
-    radioGroup: {
-        "& .ant-radio-button-wrapper": {
-            margin: "0.25rem",
-            borderRadius: theme.borderRadius,
-            borderLeft: `1px solid ${theme.colorBorder}`,
-            "&::before": {
-                display: "none",
-            },
-        },
-        "& .ant-radio-button-wrapper-checked ": {
-            borderLeft: `1px solid ${theme.colorPrimary}`,
-        },
-    },
-    evalNameContainer: {
-        display: "flex",
-        alignItems: "center",
-    },
-    divider: {
-        margin: "1rem -1.5rem",
-        width: "unset",
-    },
-    editor: {
-        border: `1px solid ${theme.colorBorder}`,
-        borderRadius: theme.borderRadius,
-        overflow: "hidden",
-    },
-    ExternalHelp: {
-        marginBottom: "20px",
-        display: "flex",
-        alignItems: "center",
-        gap: "0.3em",
-    },
-    ExternalHelpLink: {
-        margin: "0px",
-        padding: "0px",
-        textDecoration: "underline",
-        color: theme.isDark ? "rgba(255, 255, 255, 0.85)" : "#000",
-
-        "&:hover": {
-            color: theme.isDark ? "rgba(255, 255, 255, 0.85)" : "#000",
-            textDecoration: "underline",
-        },
-    },
-    evaluatorsTable: {
-        maxHeight: 550,
-        overflowY: "scroll",
-        margin: "2rem 0 1rem",
-        border: `1px solid ${theme.colorBorder}`,
-        borderRadius: theme.borderRadius,
-        "& .ant-table-thead": {
-            position: "sticky",
-            top: 0,
-            zIndex: 1000,
-        },
-    },
-    evalModalBtns: {
-        display: "flex",
-        alignItems: "center",
-        gap: 10,
-        width: "100%",
-        justifyContent: "flex-end",
-    },
-    evalBtnContainer: {
-        display: "flex",
-        alignItems: "center",
-        justifyContent: "space-between",
-        width: "100%",
-    },
-    searchContainer: {
-        marginTop: "1rem",
-        width: "100%",
-        display: "flex",
-        justifyContent: "flex-end",
-    },
-}))
-
-type DynamicFormFieldProps = EvaluationSettingsTemplate & {
-    name: string | string[]
-}
-
-const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
-    name,
-    label,
-    type,
-    default: defaultVal,
-    description,
-    min,
-    max,
-    required,
-}) => {
-    const {appTheme} = useAppTheme()
-    const classes = useStyles()
-    const {token} = theme.useToken()
-    const [showAdvancedSettings, setShowAdvancedSettings] = useState(false)
-
-    const rules: Rule[] = [{required: required ?? true, message: "This field is required"}]
-    if (type === "regex")
-        rules.push({
-            validator: (_, value) =>
-                new Promise((res, rej) =>
-                    isValidRegex(value) ? res("") : rej("Regex pattern is not valid"),
-                ),
-        })
-
-    const ExternalHelpInfo =
-        name[1] === "webhook_url" ? (
-            <div className={classes.ExternalHelp}>
-                <span>Learn</span>
-                <Link
-                    href="https://docs.agenta.ai/evaluation/webhook_evaluator"
-                    target="_blank"
-                    rel="noopener noreferrer"
-                    className={classes.ExternalHelpLink}
-                >
-                    more
-                </Link>
-                <span>about the evaluator</span>
-            </div>
-        ) : null
-
-    return (
-        <>
-            {label !== "Correct Answer" && (
-                <Form.Item
-                    name={name}
-                    label={
-                        <div className={classes.label}>
-                            <span>{label}</span>
-                            {description && (
-                                <Tooltip title={description}>
-                                    <InfoCircleOutlined style={{color: token.colorPrimary}} />
-                                </Tooltip>
-                            )}
-                        </div>
-                    }
-                    initialValue={defaultVal}
-                    rules={rules}
-                >
-                    {type === "string" || type === "regex" ? (
-                        <Input />
-                    ) : type === "number" ? (
-                        <InputNumber min={min} max={max} step={0.1} />
-                    ) : type === "boolean" || type === "bool" ? (
-                        <Switch />
-                    ) : type === "text" ? (
-                        <Input.TextArea rows={10} />
-                    ) : type === "code" ? (
-                        <Editor
-                            className={classes.editor}
-                            height={400}
-                            width="100%"
-                            language="python"
-                            theme={`vs-${appTheme}`}
-                        />
-                    ) : type === "object" ? (
-                        <Editor
-                            className={classes.editor}
-                            height={120}
-                            width="100%"
-                            language="json"
-                            options={{lineNumbers: "off"}}
-                            theme={`vs-${appTheme}`}
-                        />
-                    ) : null}
-                </Form.Item>
-            )}
-
-            {ExternalHelpInfo}
-        </>
-    )
-}
-
-type Props = {
-    onSuccess?: () => void
-    initialValues?: EvaluatorConfig
-    editMode?: boolean
-    setNewEvalModalOpen: (value: React.SetStateAction<boolean>) => void
-    newEvalModalConfigOpen: boolean
-    setNewEvalModalConfigOpen: React.Dispatch<React.SetStateAction<boolean>>
-} & React.ComponentProps<typeof Modal>
-
-const NewEvaluatorModal: React.FC<Props> = ({
-    onSuccess,
-    editMode = false,
-    initialValues,
-    setNewEvalModalOpen,
-    newEvalModalConfigOpen,
-    setNewEvalModalConfigOpen,
-    ...props
-}) => {
-    const classes = useStyles()
-    const evaluators = useAtom(evaluatorsAtom)[0]
-    const [selectedEval, setSelectedEval] = useState<Evaluator | null>(null)
-    const [submitLoading, setSubmitLoading] = useState(false)
-    const [searchTerm, setSearchTerm] = useState<string>("")
-    const appId = useAppId()
-    const [form] = Form.useForm()
-
-    const filtered = useMemo(() => {
-        if (!searchTerm) return evaluators
-        return evaluators.filter((item) =>
-            item.name.toLowerCase().includes(searchTerm.toLowerCase()),
-        )
-    }, [searchTerm, evaluators])
-
-    const handleCloseModal = () => {
-        setSearchTerm("")
-        setNewEvalModalOpen(false)
-    }
-
-    const evalFields = useMemo(
-        () =>
-            Object.keys(selectedEval?.settings_template || {})
-                .filter((key) => !!selectedEval?.settings_template[key]?.type)
-                .map((key) => ({
-                    key,
-                    ...selectedEval?.settings_template[key]!,
-                    advanced: selectedEval?.settings_template[key]?.advanced || false,
-                })),
-        [selectedEval],
-    )
-
-    useEffect(() => {
-        form.resetFields()
-        if (initialValues) {
-            form.setFieldsValue(initialValues)
-            setSelectedEval(
-                evaluators.find((item) => item.key === initialValues?.evaluator_key) || null,
-            )
-        }
-    }, [newEvalModalConfigOpen])
-
-    const advancedSettingsFields = evalFields.filter((field) => field.advanced)
-    const basicSettingsFields = evalFields.filter((field) => !field.advanced)
-
-    const onSubmit = (values: CreateEvaluationConfigData) => {
-        try {
-            setSubmitLoading(true)
-            if (!selectedEval?.key) throw new Error("No selected key")
-            const settingsValues = values.settings_values || {}
-
-            const data = {
-                ...values,
-                evaluator_key: selectedEval.key,
-                settings_values: settingsValues,
-            }
-            ;(editMode
-                ? updateEvaluatorConfig(initialValues?.id!, data)
-                : createEvaluatorConfig(appId, data)
-            )
-                .then(onSuccess)
-                .catch(console.error)
-                .finally(() => setSubmitLoading(false))
-        } catch (error: any) {
-            setSubmitLoading(false)
-            console.error(error)
-            message.error(error.message)
-        }
-    }
-
-    const columns: ColumnsType<Evaluator> = [
-        {
-            title: "Name",
-            dataIndex: "name",
-            key: "name",
-            width: 200,
-            render(_, record) {
-                return (
-                    <>
-                        <div className={classes.evalNameContainer}>
-                            {record.icon_url && (
-                                <Image
-                                    src={record.icon_url}
-                                    alt="Exact match"
-                                    className={classes.evaluationImg}
-                                />
-                            )}
-                            <span>{record.name}</span>
-                        </div>
-                    </>
-                )
-            },
-        },
-        {
-            title: "Description",
-            dataIndex: "description",
-            key: "description",
-            render(_, record) {
-                return (
-                    <>
-                        <div>{record.description}</div>
-                    </>
-                )
-            },
-        },
-    ]
-
-    return (
-        <>
-            <Modal
-                title="New Evaluator"
-                data-cy="new-evaluator-modal"
-                width={1000}
-                footer={null}
-                onCancel={handleCloseModal}
-                {...props}
-            >
-                <div className={classes.searchContainer}>
-                    <Input.Search
-                        value={searchTerm}
-                        onChange={(e) => setSearchTerm(e.target.value)}
-                        placeholder="Search"
-                        allowClear
-                        enterButton
-                        style={{
-                            maxWidth: 300,
-                        }}
-                    />
-                </div>
-                <Table
-                    pagination={false}
-                    columns={columns}
-                    dataSource={filtered}
-                    className={classes.evaluatorsTable}
-                    onRow={(data, index) => {
-                        return {
-                            onClick: () => {
-                                setNewEvalModalOpen(false)
-                                setNewEvalModalConfigOpen(true)
-                                setSelectedEval(data)
-                            },
-                            style: {
-                                cursor: "pointer",
-                            },
-                            "data-cy": `select-new-evaluator-${index}`,
-                        }
-                    }}
-                />
-            </Modal>
-
-            <Modal
-                open={newEvalModalConfigOpen}
-                onCancel={() => {
-                    setNewEvalModalConfigOpen(false)
-                }}
-                destroyOnClose
-                onOk={form.submit}
-                title={
-                    editMode
-                        ? `${
-                              selectedEval?.name
-                                  ? `Edit the ${selectedEval.name} evaluator`
-                                  : "Edit your evaluator"
-                          }`
-                        : `${
-                              selectedEval?.name
-                                  ? `Configure the ${selectedEval.name} evaluator`
-                                  : "Configure your evaluator"
-                          }`
-                }
-                footer={null}
-                data-cy="configure-new-evaluator-modal"
-                width={selectedEval?.key === "auto_custom_code_run" ? 800 : 600}
-            >
-                <Form
-                    requiredMark={false}
-                    form={form}
-                    name="new-evaluator"
-                    onFinish={onSubmit}
-                    layout="vertical"
-                >
-                    <Form.Item
-                        name="name"
-                        label="Name"
-                        rules={[{required: true, message: "This field is required"}]}
-                    >
-                        <Input data-cy="configure-new-evaluator-modal-input" />
-                    </Form.Item>
-
-                    {basicSettingsFields.map((field) => (
-                        <DynamicFormField
-                            {...field}
-                            key={field.key}
-                            name={["settings_values", field.key]}
-                        />
-                    ))}
-
-                    {advancedSettingsFields.length > 0 && (
-                        <AdvancedSettings settings={advancedSettingsFields} />
-                    )}
-
-                    <Form.Item style={{marginBottom: 0}}>
-                        <div className={classes.evalBtnContainer}>
-                            {!editMode && (
-                                <Button
-                                    icon={<ArrowLeftOutlined />}
-                                    onClick={() => {
-                                        setNewEvalModalConfigOpen(false)
-                                        setNewEvalModalOpen(true)
-                                    }}
-                                    data-cy="configure-new-evaluator-modal-back-btn"
-                                >
-                                    Back
-                                </Button>
-                            )}
-
-                            <div className={classes.evalModalBtns}>
-                                <Button
-                                    type="default"
-                                    onClick={() => setNewEvalModalConfigOpen(false)}
-                                    data-cy="configure-new-evaluator-modal-cancel-btn"
-                                >
-                                    Cancel
-                                </Button>
-                                <Button
-                                    type="primary"
-                                    icon={editMode ? <EditOutlined /> : <PlusOutlined />}
-                                    loading={submitLoading}
-                                    onClick={form.submit}
-                                    data-cy="configure-new-evaluator-modal-save-btn"
-                                >
-                                    {editMode ? "Update" : "Save"}
-                                </Button>
-                            </div>
-                        </div>
-                    </Form.Item>
-                </Form>
-            </Modal>
-        </>
-    )
-}
-
-export default NewEvaluatorModal
diff --git a/agenta-web/src/components/pages/overview/automaticEvaluation/AutomaticEvalOverview.tsx b/agenta-web/src/components/pages/overview/automaticEvaluation/AutomaticEvalOverview.tsx
index 2b628ec67..1cd2b82d2 100644
--- a/agenta-web/src/components/pages/overview/automaticEvaluation/AutomaticEvalOverview.tsx
+++ b/agenta-web/src/components/pages/overview/automaticEvaluation/AutomaticEvalOverview.tsx
@@ -9,28 +9,22 @@ import {
     fetchAllEvaluators,
     fetchEvaluationStatus,
 } from "@/services/evaluations/api"
-import {
-    EditOutlined,
-    InfoCircleOutlined,
-    MoreOutlined,
-    PlusOutlined,
-    SwapOutlined,
-} from "@ant-design/icons"
+import {EditOutlined, MoreOutlined, PlusOutlined, SwapOutlined} from "@ant-design/icons"
 import {Database, GearSix, Note, Rocket, Trash} from "@phosphor-icons/react"
 import {Button, Dropdown, message, Popover, Space, Spin, Table, Tag, Typography} from "antd"
 import {ColumnsType} from "antd/es/table"
 import {useRouter} from "next/router"
 import React, {useEffect, useMemo, useRef, useState} from "react"
 import {createUseStyles} from "react-jss"
-import StatusRenderer from "./StatusRenderer"
-import NewEvaluationModal from "../../evaluations/evaluationResults/NewEvaluationModal"
+import StatusRenderer from "../../evaluations/cellRenderers/StatusRenderer"
+import NewEvaluationModal from "../../evaluations/NewEvaluation/NewEvaluationModal"
 import {useAtom} from "jotai"
 import {evaluatorConfigsAtom, evaluatorsAtom} from "@/lib/atoms/evaluation"
 import {runningStatuses} from "../../evaluations/cellRenderers/cellRenderers"
 import {useUpdateEffect} from "usehooks-ts"
 import {shortPoll} from "@/lib/helpers/utils"
-import NewEvaluatorModal from "../../evaluations/evaluators/NewEvaluatorModal"
 import DeleteEvaluationModal from "@/components/DeleteEvaluationModal/DeleteEvaluationModal"
+import EvaluationErrorPopover from "../../evaluations/EvaluationErrorProps/EvaluationErrorPopover"
 
 const {Title} = Typography
 
@@ -250,26 +244,7 @@ const AutomaticEvalOverview = () => {
                             )
 
                             return result.result.error ? (
-                                <Popover
-                                    key={index}
-                                    placement="bottom"
-                                    trigger={"hover"}
-                                    arrow={false}
-                                    content={
-                                        <div className="w-[256px]">
-                                            {result.result.error?.stacktrace}
-                                        </div>
-                                    }
-                                    title={result.result.error?.message}
-                                >
-                                    <Button
-                                        onClick={(e) => e.stopPropagation()}
-                                        icon={<InfoCircleOutlined />}
-                                        type="link"
-                                    >
-                                        Read more
-                                    </Button>
-                                </Popover>
+                                <EvaluationErrorPopover key={index} result={result.result} />
                             ) : (
                                 <Popover
                                     key={index}
@@ -430,7 +405,7 @@ const AutomaticEvalOverview = () => {
             <div className="flex items-center justify-between">
                 <Space>
                     <Title>Automatic Evaluations</Title>
-                    <Button size="small" href={`/apps/${appId}/evaluations/results`}>
+                    <Button size="small" href={`/apps/${appId}/evaluations`}>
                         View all
                     </Button>
                 </Space>
@@ -492,19 +467,6 @@ const AutomaticEvalOverview = () => {
                 }}
             />
 
-            <NewEvaluatorModal
-                open={false}
-                onSuccess={() => {
-                    setIsEditEvalConfigOpen(false)
-                    fetchEvaluations()
-                }}
-                newEvalModalConfigOpen={isEditEvalConfigOpen}
-                setNewEvalModalConfigOpen={setIsEditEvalConfigOpen}
-                setNewEvalModalOpen={() => {}}
-                editMode={true}
-                initialValues={selectedConfigEdit}
-            />
-
             {selectedEvalRecord && (
                 <DeleteEvaluationModal
                     open={isDeleteEvalModalOpen}
diff --git a/agenta-web/src/components/pages/testset/modals/CreateTestset.tsx b/agenta-web/src/components/pages/testset/modals/CreateTestset.tsx
new file mode 100644
index 000000000..aa2c67d2c
--- /dev/null
+++ b/agenta-web/src/components/pages/testset/modals/CreateTestset.tsx
@@ -0,0 +1,76 @@
+import React from "react"
+import {JSSTheme} from "@/lib/Types"
+import {Typography} from "antd"
+import {createUseStyles} from "react-jss"
+
+const {Text, Paragraph} = Typography
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    headerText: {
+        lineHeight: theme.lineHeightLG,
+        fontSize: theme.fontSizeHeading4,
+        fontWeight: theme.fontWeightStrong,
+    },
+    appTemplate: {
+        gap: 16,
+        display: "flex",
+        flexDirection: "column",
+    },
+    template: {
+        border: "1px solid",
+        borderColor: theme.colorBorderSecondary,
+        borderRadius: theme.borderRadiusLG,
+        paddingTop: theme.paddingSM,
+        paddingBottom: theme.paddingSM,
+        paddingInline: theme.padding,
+        boxShadow:
+            "0px 2px 4px 0px #00000005, 0px 1px 6px -1px #00000005, 0px 1px 2px 0px #00000008",
+        gap: 2,
+        cursor: "pointer",
+        "& > span": {
+            fontSize: theme.fontSizeLG,
+            lineHeight: theme.lineHeightLG,
+            fontWeight: theme.fontWeightMedium,
+        },
+        "& > div": {
+            marginBottom: 0,
+        },
+    },
+}))
+
+type Props = {setCurrent: React.Dispatch<React.SetStateAction<number>>}
+
+const CreateTestset: React.FC<Props> = ({setCurrent}) => {
+    const classes = useStyles()
+    return (
+        <section className={classes.appTemplate}>
+            <Text className={classes.headerText}>Create new test set</Text>
+            <div className="flex flex-col gap-6">
+                <div
+                    className={classes.template}
+                    onClick={() => setCurrent(1)}
+                    data-cy="create-testset-from-scratch"
+                >
+                    <Text>Create from scratch</Text>
+                    <Paragraph>Create a new test set directly from the webUI</Paragraph>
+                </div>
+                <div
+                    className={classes.template}
+                    onClick={() => setCurrent(2)}
+                    data-cy="upload-testset"
+                >
+                    <Text>Upload a test set</Text>
+                    <Paragraph>Upload your test set as CSV or JSON</Paragraph>
+                </div>
+                <div className={classes.template} onClick={() => setCurrent(3)}>
+                    <Text>Create with API</Text>
+                    <Paragraph>
+                        Create a test set programmatically using our API endpoints
+                    </Paragraph>
+                </div>
+            </div>
+        </section>
+    )
+}
+
+export default CreateTestset
diff --git a/agenta-web/src/components/pages/testset/modals/CreateTestsetFromApi.tsx b/agenta-web/src/components/pages/testset/modals/CreateTestsetFromApi.tsx
new file mode 100644
index 000000000..89f77e4bb
--- /dev/null
+++ b/agenta-web/src/components/pages/testset/modals/CreateTestsetFromApi.tsx
@@ -0,0 +1,182 @@
+import React, {useState} from "react"
+import CopyButton from "@/components/CopyButton/CopyButton"
+import {getAgentaApiUrl} from "@/lib/helpers/utils"
+import {JSSTheme} from "@/lib/Types"
+import {PythonOutlined} from "@ant-design/icons"
+import {ArrowLeft, FileCode, FileTs} from "@phosphor-icons/react"
+import {Button, Radio, Tabs, Typography} from "antd"
+import {useRouter} from "next/router"
+import {createUseStyles} from "react-jss"
+import pythonCode from "@/code_snippets/testsets/create_with_json/python"
+import cURLCode from "@/code_snippets/testsets/create_with_json/curl"
+import tsCode from "@/code_snippets/testsets/create_with_json/typescript"
+import CodeBlock from "@/components/DynamicCodeBlock/CodeBlock"
+import pythonCodeUpload from "@/code_snippets/testsets/create_with_upload/python"
+import cURLCodeUpload from "@/code_snippets/testsets/create_with_upload/curl"
+import tsCodeUpload from "@/code_snippets/testsets/create_with_upload/typescript"
+
+const {Text} = Typography
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    headerText: {
+        lineHeight: theme.lineHeightLG,
+        fontSize: theme.fontSizeHeading4,
+        fontWeight: theme.fontWeightStrong,
+    },
+    label: {
+        fontWeight: theme.fontWeightMedium,
+    },
+    uploadContainer: {
+        padding: theme.paddingXS,
+        display: "flex",
+        alignItems: "center",
+        justifyContent: "space-between",
+        border: "1px solid",
+        borderColor: theme.colorBorder,
+        borderRadius: theme.borderRadiusLG,
+    },
+    subText: {
+        color: theme.colorTextSecondary,
+    },
+}))
+
+type Props = {
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    onCancel: () => void
+}
+type LanguageCodeBlockProps = {
+    selectedLang: string
+    codeSnippets: Record<string, string>
+}
+
+const LanguageCodeBlock = ({selectedLang, codeSnippets}: LanguageCodeBlockProps) => {
+    return (
+        <div className="flex flex-col gap-2">
+            <div className="flex items-center justify-end">
+                <CopyButton buttonText={null} text={codeSnippets[selectedLang]} icon={true} />
+            </div>
+
+            <div className="w-[430px] max-h-[380px] !overflow-y-auto">
+                <CodeBlock
+                    key={selectedLang}
+                    language={selectedLang}
+                    value={codeSnippets[selectedLang]}
+                />
+            </div>
+        </div>
+    )
+}
+
+const CreateTestsetFromApi: React.FC<Props> = ({setCurrent, onCancel}) => {
+    const classes = useStyles()
+    const router = useRouter()
+    const [uploadType, setUploadType] = useState<"csv" | "json">("csv")
+    const [selectedLang, setSelectedLang] = useState("python")
+
+    const appId = router.query.app_id as string
+
+    const uploadURI = `${getAgentaApiUrl()}/api/testsets/upload`
+    const jsonURI = `${getAgentaApiUrl()}/api/testsets/${appId}`
+
+    const params = `{
+    "name": "testset_name",}`
+
+    const jsonCodeSnippets: Record<string, string> = {
+        python: pythonCode(jsonURI, params),
+        bash: cURLCode(jsonURI, params),
+        typescript: tsCode(jsonURI, params),
+    }
+
+    const csvCodeSnippets: Record<string, string> = {
+        python: pythonCodeUpload(uploadURI, appId),
+        bash: cURLCodeUpload(uploadURI, appId),
+        typescript: tsCodeUpload(uploadURI, appId),
+    }
+
+    const codeSnippets = uploadType === "csv" ? csvCodeSnippets : jsonCodeSnippets
+
+    return (
+        <section className="grid gap-4">
+            <div className="flex items-center gap-2 mb-1">
+                <Button
+                    icon={<ArrowLeft size={14} className="mt-0.5" />}
+                    className="flex items-center justify-center"
+                    onClick={() => setCurrent(0)}
+                />
+
+                <Text className={classes.headerText}>Create a test set with API</Text>
+            </div>
+
+            <div className="flex flex-col gap-6">
+                <Text>Create a test set programmatically using our API endpoints</Text>
+
+                <div className="grid gap-2">
+                    <Text className={classes.label}>Select type</Text>
+                    <Radio.Group value={uploadType} onChange={(e) => setUploadType(e.target.value)}>
+                        <Radio value="csv">CSV</Radio>
+                        <Radio value="json">JSON</Radio>
+                    </Radio.Group>
+                </div>
+
+                <Text>Use this endpoint to create a new Test set for your App using JSON</Text>
+
+                <div>
+                    <Tabs
+                        destroyInactiveTabPane
+                        defaultActiveKey={selectedLang}
+                        onChange={setSelectedLang}
+                        items={[
+                            {
+                                key: "python",
+                                label: "Python",
+                                children: (
+                                    <LanguageCodeBlock
+                                        codeSnippets={codeSnippets}
+                                        selectedLang={selectedLang}
+                                    />
+                                ),
+                                icon: <PythonOutlined />,
+                            },
+                            {
+                                key: "typescript",
+                                label: "TypeScript",
+                                children: (
+                                    <LanguageCodeBlock
+                                        codeSnippets={codeSnippets}
+                                        selectedLang={selectedLang}
+                                    />
+                                ),
+                                icon: <FileTs size={14} className="!-mb-[3px]" />,
+                            },
+                            {
+                                key: "bash",
+                                label: "cURL",
+                                children: (
+                                    <LanguageCodeBlock
+                                        codeSnippets={codeSnippets}
+                                        selectedLang={selectedLang}
+                                    />
+                                ),
+                                icon: <FileCode size={14} className="!-mb-[3px]" />,
+                            },
+                        ]}
+                    />
+                </div>
+            </div>
+
+            <div className="w-full flex items-center justify-between">
+                <Typography.Link
+                    href="https://docs.agenta.ai/evaluation/create-test-sets#creating-a-test-set-using-the-api"
+                    target="_blank"
+                    className={classes.subText}
+                >
+                    Read the docs
+                </Typography.Link>
+
+                <Button onClick={onCancel}>Close</Button>
+            </div>
+        </section>
+    )
+}
+
+export default CreateTestsetFromApi
diff --git a/agenta-web/src/components/pages/testset/modals/CreateTestsetFromScratch.tsx b/agenta-web/src/components/pages/testset/modals/CreateTestsetFromScratch.tsx
new file mode 100644
index 000000000..7a3a7ddf7
--- /dev/null
+++ b/agenta-web/src/components/pages/testset/modals/CreateTestsetFromScratch.tsx
@@ -0,0 +1,190 @@
+import React, {useMemo, useState} from "react"
+import {JSSTheme, KeyValuePair, testset, TestsetCreationMode} from "@/lib/Types"
+import {ArrowLeft} from "@phosphor-icons/react"
+import {Button, Input, message, Typography} from "antd"
+import {createUseStyles} from "react-jss"
+import {useRouter} from "next/router"
+import {
+    createNewTestset,
+    fetchTestset,
+    updateTestset,
+    useLoadTestsetsList,
+} from "@/services/testsets/api"
+import {fetchVariants} from "@/services/api"
+import {getVariantInputParameters} from "@/lib/helpers/variantHelper"
+
+const {Text} = Typography
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    headerText: {
+        lineHeight: theme.lineHeightLG,
+        fontSize: theme.fontSizeHeading4,
+        fontWeight: theme.fontWeightStrong,
+    },
+    label: {
+        fontWeight: theme.fontWeightMedium,
+    },
+}))
+
+type Props = {
+    mode: TestsetCreationMode
+    setMode: React.Dispatch<React.SetStateAction<TestsetCreationMode>>
+    editTestsetValues: testset | null
+    setEditTestsetValues: React.Dispatch<React.SetStateAction<testset | null>>
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    onCancel: () => void
+}
+
+const CreateTestsetFromScratch: React.FC<Props> = ({
+    mode,
+    setMode,
+    editTestsetValues,
+    setEditTestsetValues,
+    setCurrent,
+    onCancel,
+}) => {
+    const classes = useStyles()
+    const router = useRouter()
+    const appId = router.query.app_id as string
+    const [testsetName, setTestsetName] = useState(
+        mode === "rename" ? (editTestsetValues?.name as string) : "",
+    )
+    const [isLoading, setIsLoading] = useState(false)
+    const {mutate} = useLoadTestsetsList(appId)
+
+    const generateInitialRowData = async (): Promise<KeyValuePair[]> => {
+        const backendVariants = await fetchVariants(appId)
+        const variant = backendVariants[0]
+        const inputParams = await getVariantInputParameters(appId, variant)
+        const fields = [...inputParams.map((param) => param.name), "correct_answer"]
+        return Array(3)
+            .fill({})
+            .map(() => fields.reduce((acc, field) => ({...acc, [field]: ""}), {}))
+    }
+
+    const handleCreateTestset = async (data?: KeyValuePair[]) => {
+        setIsLoading(true)
+        try {
+            const rowData = data || (await generateInitialRowData())
+            const response = await createNewTestset(appId, testsetName, rowData)
+            message.success("Test set created successfully")
+            router.push(`/apps/${appId}/testsets/${response.data.id}`)
+        } catch (error) {
+            console.error("Error saving test set:", error)
+            message.error("Failed to create Test set. Please try again!")
+        } finally {
+            setIsLoading(false)
+        }
+    }
+
+    const handleCloneTestset = async (testsetId: string) => {
+        setIsLoading(true)
+        try {
+            const fetchedTestset = await fetchTestset(testsetId)
+            if (fetchedTestset.csvdata) {
+                await handleCreateTestset(fetchedTestset.csvdata)
+            } else {
+                throw new Error("Failed to load instances")
+            }
+        } catch (error) {
+            console.error("Error cloning test set:", error)
+            message.error("Failed to clone Test set. Please try again!")
+        } finally {
+            setIsLoading(false)
+        }
+    }
+
+    const handleRenameTestset = async (testsetId: string) => {
+        setIsLoading(true)
+        try {
+            const fetchedTestset = await fetchTestset(testsetId)
+            if (fetchedTestset.csvdata) {
+                await updateTestset(testsetId, testsetName, fetchedTestset.csvdata)
+                message.success("Test set renamed successfully")
+                mutate()
+                onCancel()
+            } else {
+                throw new Error("Failed to load instances")
+            }
+        } catch (error) {
+            console.error("Error renaming test set:", error)
+            message.error("Failed to rename Test set. Please try again!")
+        } finally {
+            setIsLoading(false)
+        }
+    }
+
+    const onSubmit = () => {
+        switch (mode) {
+            case "create":
+                handleCreateTestset()
+                break
+            case "clone":
+                handleCloneTestset(editTestsetValues?._id as string)
+                break
+            case "rename":
+                handleRenameTestset(editTestsetValues?._id as string)
+                break
+        }
+    }
+
+    const getHeaderText = useMemo(() => {
+        switch (mode) {
+            case "create":
+                return "Create from scratch"
+            case "clone":
+                return "Clone Test set"
+            case "rename":
+                return "Rename Test set"
+        }
+    }, [mode])
+
+    const goBackToInitialStep = () => {
+        setMode("create")
+        setEditTestsetValues(null)
+        setCurrent(0)
+    }
+
+    return (
+        <section className="grid gap-4">
+            <div className="flex items-center gap-2 mb-1">
+                <Button
+                    icon={<ArrowLeft size={14} className="mt-0.5" />}
+                    className="flex items-center justify-center"
+                    onClick={goBackToInitialStep}
+                />
+
+                <Text className={classes.headerText}>{getHeaderText}</Text>
+            </div>
+
+            <Text>Create a new test set directly from the webUI</Text>
+
+            <div className="grid gap-1">
+                <Text className={classes.label}>Test Set Name</Text>
+                <Input
+                    placeholder="Enter a name"
+                    value={testsetName}
+                    onChange={(e) => setTestsetName(e.target.value)}
+                    data-cy="testset-name-input"
+                />
+            </div>
+
+            <div className="flex justify-end gap-2 mt-3">
+                <Button onClick={onCancel} disabled={isLoading}>
+                    Cancel
+                </Button>
+                <Button
+                    type="primary"
+                    disabled={!testsetName}
+                    onClick={onSubmit}
+                    loading={isLoading}
+                    data-cy="create-new-testset-button"
+                >
+                    {mode === "rename" ? "Rename" : "Create test set"}
+                </Button>
+            </div>
+        </section>
+    )
+}
+
+export default CreateTestsetFromScratch
diff --git a/agenta-web/src/components/pages/testset/modals/UploadTestset.tsx b/agenta-web/src/components/pages/testset/modals/UploadTestset.tsx
new file mode 100644
index 000000000..950bd9ecd
--- /dev/null
+++ b/agenta-web/src/components/pages/testset/modals/UploadTestset.tsx
@@ -0,0 +1,305 @@
+import React, {useState} from "react"
+import {GenericObject, JSSTheme} from "@/lib/Types"
+import {ArrowLeft, FileCode, FileCsv, Trash} from "@phosphor-icons/react"
+import {Button, Collapse, Form, Input, message, Radio, Typography, Upload, UploadFile} from "antd"
+import {createUseStyles} from "react-jss"
+import {UploadOutlined} from "@ant-design/icons"
+import {isValidCSVFile, isValidJSONFile} from "@/lib/helpers/fileManipulations"
+import {useRouter} from "next/router"
+import {globalErrorHandler} from "@/lib/helpers/errorHandler"
+import {uploadTestsets, useLoadTestsetsList} from "@/services/testsets/api"
+
+const {Text} = Typography
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    headerText: {
+        lineHeight: theme.lineHeightLG,
+        fontSize: theme.fontSizeHeading4,
+        fontWeight: theme.fontWeightStrong,
+    },
+    label: {
+        fontWeight: theme.fontWeightMedium,
+    },
+    uploadContainer: {
+        padding: theme.paddingXS,
+        display: "flex",
+        alignItems: "center",
+        justifyContent: "space-between",
+        border: "1px solid",
+        borderColor: theme.colorBorder,
+        borderRadius: theme.borderRadiusLG,
+        position: "relative",
+        overflow: "hidden",
+    },
+    trashIcon: {
+        color: theme.colorTextSecondary,
+        cursor: "pointer",
+    },
+    progressBar: {
+        position: "absolute",
+        top: 0,
+        bottom: 0,
+        left: 0,
+        right: 0,
+        backgroundColor: theme["cyan5"],
+        opacity: 0.3,
+    },
+}))
+
+type Props = {
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+    onCancel: () => void
+}
+
+const UploadTestset: React.FC<Props> = ({setCurrent, onCancel}) => {
+    const classes = useStyles()
+    const router = useRouter()
+    const [form] = Form.useForm()
+    const testsetFile = Form.useWatch("file", form)
+    const appId = router.query.app_id as string
+    const [uploadType, setUploadType] = useState<"JSON" | "CSV" | undefined>("CSV")
+    const [testsetName, setTestsetName] = useState("")
+    const [uploadLoading, setUploadLoading] = useState(false)
+    const [fileProgress, setFileProgress] = useState<UploadFile>({} as UploadFile)
+    const {mutate} = useLoadTestsetsList(appId)
+
+    const onFinish = async (values: any) => {
+        const {file} = values
+        const fileObj = file[0].originFileObj
+        const malformedFileError = `The file you uploaded is either malformed or is not a valid ${uploadType} file`
+
+        if (file && file.length > 0 && uploadType) {
+            const isValidFile = await (uploadType == "CSV"
+                ? isValidCSVFile(fileObj)
+                : isValidJSONFile(fileObj))
+            if (!isValidFile) {
+                message.error(malformedFileError)
+                return
+            }
+
+            const formData = new FormData()
+            formData.append("upload_type", uploadType)
+            formData.append("file", fileObj)
+            if (testsetName && testsetName.trim() !== "") {
+                formData.append("testset_name", testsetName)
+            }
+            formData.append("app_id", appId)
+
+            try {
+                setUploadLoading(true)
+                await uploadTestsets(formData)
+                form.resetFields()
+                setTestsetName("")
+                mutate()
+                onCancel()
+            } catch (e: any) {
+                if (
+                    e?.response?.data?.detail?.find((item: GenericObject) =>
+                        item?.loc?.includes("csvdata"),
+                    )
+                )
+                    message.error(malformedFileError)
+                else globalErrorHandler(e)
+            } finally {
+                setUploadLoading(false)
+            }
+        }
+    }
+
+    return (
+        <section className="grid gap-4">
+            <div className="flex items-center gap-2 mb-1">
+                <Button
+                    icon={<ArrowLeft size={14} className="mt-0.5" />}
+                    className="flex items-center justify-center"
+                    onClick={() => setCurrent(0)}
+                />
+
+                <Text className={classes.headerText}>Upload a test set</Text>
+            </div>
+
+            <div className="flex flex-col gap-6">
+                <Text>Upload your test set as CSV or JSON</Text>
+
+                <div className="grid gap-2">
+                    <Text className={classes.label}>Select type</Text>
+                    <Radio.Group value={uploadType} onChange={(e) => setUploadType(e.target.value)}>
+                        <Radio value="CSV">CSV</Radio>
+                        <Radio value="JSON">JSON</Radio>
+                    </Radio.Group>
+                </div>
+
+                <div className="grid gap-1">
+                    <Text className={classes.label}>Test Set Name</Text>
+                    <Input
+                        placeholder="Enter a name"
+                        value={testsetName}
+                        onChange={(e) => setTestsetName(e.target.value)}
+                        data-cy="upload-testset-file-name"
+                    />
+                </div>
+
+                <div className="flex flex-col gap-2">
+                    <div className="flex items-center justify-between">
+                        <Text className={classes.label}>Upload CSV or JSON</Text>
+
+                        <Form onFinish={onFinish} form={form}>
+                            <Form.Item
+                                name="file"
+                                valuePropName="fileList"
+                                getValueFromEvent={(e) => e.fileList}
+                                className="mb-0"
+                                rules={[{required: true}]}
+                            >
+                                <Upload
+                                    name="file"
+                                    accept={uploadType == "CSV" ? ".csv" : ".json"}
+                                    multiple={false}
+                                    maxCount={1}
+                                    showUploadList={false}
+                                    onChange={(e) => {
+                                        setFileProgress(e.fileList[0])
+                                        !testsetName &&
+                                            setTestsetName(
+                                                e.fileList[0].name.split(".")[0] as string,
+                                            )
+                                    }}
+                                >
+                                    <Button icon={<UploadOutlined />}>Upload</Button>
+                                </Upload>
+                            </Form.Item>
+                        </Form>
+                    </div>
+
+                    {fileProgress.name && (
+                        <div className={classes.uploadContainer}>
+                            {fileProgress.status == "uploading" && (
+                                <div
+                                    className={classes.progressBar}
+                                    style={{width: `${fileProgress.percent}%`}}
+                                ></div>
+                            )}
+                            <div className="flex items-center gap-2">
+                                {uploadType === "CSV" ? (
+                                    <FileCsv size={32} />
+                                ) : (
+                                    <FileCode size={32} />
+                                )}
+                                <Text>{fileProgress.name}</Text>
+                            </div>
+
+                            <Trash
+                                size={22}
+                                className={classes.trashIcon}
+                                onClick={() => {
+                                    form.resetFields()
+                                    setTestsetName("")
+                                    setFileProgress({} as UploadFile)
+                                }}
+                            />
+                        </div>
+                    )}
+                </div>
+
+                <div>
+                    <Collapse
+                        defaultActiveKey={["1"]}
+                        expandIconPosition="end"
+                        items={[
+                            {
+                                key: "1",
+                                label: "Instructions",
+                                children: (
+                                    <div className="flex flex-col items-start gap-4">
+                                        {uploadType === "CSV" ? (
+                                            <>
+                                                <Text>
+                                                    The test set should be in CSV format with the
+                                                    following requirements:
+                                                </Text>
+                                                <div className="flex flex-col">
+                                                    <Text>1. Comma separated values</Text>
+                                                    <Text>
+                                                        2. The first row should contain the headers
+                                                    </Text>
+                                                </div>
+                                                <Typography.Paragraph>
+                                                    Here is an example of a valid CSV file: <br />
+                                                    recipe_name,correct_answer <br />
+                                                    ChickenParmesan,Chicken <br /> "a, special,
+                                                    recipe",Beef
+                                                </Typography.Paragraph>
+                                            </>
+                                        ) : (
+                                            <>
+                                                <Text>
+                                                    The test set should be in JSON format with the
+                                                    following requirements:
+                                                </Text>
+
+                                                <div className="flex flex-col">
+                                                    <Text>
+                                                        1. A json file with an array of rows
+                                                    </Text>
+                                                    <Text>
+                                                        2. Each row in the array should be an object
+                                                    </Text>
+                                                    <Text>
+                                                        of column header name as key and row data as
+                                                        value.
+                                                    </Text>
+                                                </div>
+
+                                                <Typography.Paragraph>
+                                                    Here is an example of a valid JSON file: <br />
+                                                    {JSON.stringify(
+                                                        [
+                                                            {
+                                                                recipe_name: "Chicken Parmesan",
+                                                                correct_answer: "Chicken",
+                                                            },
+                                                            {
+                                                                recipe_name: "a, special, recipe",
+                                                                correct_answer: "Beef",
+                                                            },
+                                                        ],
+                                                        null,
+                                                        2,
+                                                    )}
+                                                </Typography.Paragraph>
+                                            </>
+                                        )}
+
+                                        <Typography.Link
+                                            href="https://docs.agenta.ai/evaluation/create-test-sets#creating-a-test-set-from-a-csv-or-json"
+                                            target="_blank"
+                                        >
+                                            <Button>Read the docs</Button>
+                                        </Typography.Link>
+                                    </div>
+                                ),
+                            },
+                        ]}
+                    />
+                </div>
+            </div>
+
+            <div className="flex justify-end gap-2 mt-3">
+                <Button disabled={uploadLoading} onClick={onCancel}>
+                    Cancel
+                </Button>
+                <Button
+                    disabled={!testsetName || !testsetFile}
+                    loading={uploadLoading}
+                    type="primary"
+                    onClick={() => form.submit()}
+                    data-cy="testset-upload-button"
+                >
+                    Create test set
+                </Button>
+            </div>
+        </section>
+    )
+}
+
+export default UploadTestset
diff --git a/agenta-web/src/components/pages/testset/modals/index.tsx b/agenta-web/src/components/pages/testset/modals/index.tsx
new file mode 100644
index 000000000..8ea035eaf
--- /dev/null
+++ b/agenta-web/src/components/pages/testset/modals/index.tsx
@@ -0,0 +1,91 @@
+import React from "react"
+import {JSSTheme, testset, TestsetCreationMode} from "@/lib/Types"
+import {Modal} from "antd"
+import {createUseStyles} from "react-jss"
+import CreateTestset from "./CreateTestset"
+import CreateTestsetFromScratch from "./CreateTestsetFromScratch"
+import UploadTestset from "./UploadTestset"
+import CreateTestsetFromApi from "./CreateTestsetFromApi"
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    modal: {
+        transition: "width 0.3s ease",
+        "& .ant-modal-content": {
+            overflow: "hidden",
+            borderRadius: 16,
+            "& > .ant-modal-close": {
+                top: 16,
+            },
+        },
+    },
+}))
+
+type Props = {
+    testsetCreationMode: TestsetCreationMode
+    setTestsetCreationMode: React.Dispatch<React.SetStateAction<TestsetCreationMode>>
+    editTestsetValues: testset | null
+    setEditTestsetValues: React.Dispatch<React.SetStateAction<testset | null>>
+    current: number
+    setCurrent: React.Dispatch<React.SetStateAction<number>>
+} & React.ComponentProps<typeof Modal>
+
+const TestsetModal: React.FC<Props> = ({
+    testsetCreationMode,
+    setTestsetCreationMode,
+    editTestsetValues,
+    setEditTestsetValues,
+    current,
+    setCurrent,
+    ...props
+}) => {
+    const classes = useStyles()
+
+    const onCancel = () => props.onCancel?.({} as any)
+
+    const onCloseModal = () => {
+        setTestsetCreationMode("create")
+        setEditTestsetValues(null)
+        setCurrent(0)
+    }
+
+    const steps = [
+        {
+            content: <CreateTestset setCurrent={setCurrent} />,
+        },
+        {
+            content: (
+                <CreateTestsetFromScratch
+                    mode={testsetCreationMode}
+                    setMode={setTestsetCreationMode}
+                    setCurrent={setCurrent}
+                    onCancel={onCancel}
+                    editTestsetValues={editTestsetValues}
+                    setEditTestsetValues={setEditTestsetValues}
+                />
+            ),
+        },
+        {
+            content: <UploadTestset setCurrent={setCurrent} onCancel={onCancel} />,
+        },
+        {
+            content: <CreateTestsetFromApi setCurrent={setCurrent} onCancel={onCancel} />,
+        },
+    ]
+
+    return (
+        <Modal
+            afterClose={onCloseModal}
+            footer={null}
+            title={null}
+            className={classes.modal}
+            {...props}
+            width={480}
+            centered
+            destroyOnClose
+        >
+            {steps[current]?.content}
+        </Modal>
+    )
+}
+
+export default TestsetModal
diff --git a/agenta-web/src/contexts/app.context.tsx b/agenta-web/src/contexts/app.context.tsx
index b7087542e..5c6486885 100644
--- a/agenta-web/src/contexts/app.context.tsx
+++ b/agenta-web/src/contexts/app.context.tsx
@@ -7,6 +7,7 @@ import useSWR from "swr"
 import {dynamicContext} from "@/lib/helpers/dynamic"
 import {HookAPI} from "antd/es/modal/useModal"
 import {useLocalStorage} from "usehooks-ts"
+import {useProfileData} from "./profile.context"
 
 type AppContextType = {
     currentApp: ListAppsItem | null
@@ -31,6 +32,7 @@ const initialValues: AppContextType = {
 
 const useApps = () => {
     const [useOrgData, setUseOrgData] = useState<Function>(() => () => "")
+    const {user} = useProfileData()
 
     useEffect(() => {
         dynamicContext("org.context", {useOrgData}).then((context) => {
@@ -40,11 +42,13 @@ const useApps = () => {
 
     const {selectedOrg, loading} = useOrgData()
     const {data, error, isLoading, mutate} = useSWR(
-        `${getAgentaApiUrl()}/api/apps/` +
-            (isDemo()
-                ? `?org_id=${selectedOrg?.id}&workspace_id=${selectedOrg?.default_workspace.id}`
-                : ""),
-        isDemo() ? (selectedOrg?.id ? axiosFetcher : () => {}) : axiosFetcher,
+        !!user
+            ? `${getAgentaApiUrl()}/api/apps/` +
+                  (isDemo()
+                      ? `?org_id=${selectedOrg?.id}&workspace_id=${selectedOrg?.default_workspace.id}`
+                      : "")
+            : null,
+        !!user ? (isDemo() ? (selectedOrg?.id ? axiosFetcher : () => {}) : axiosFetcher) : null,
         {
             shouldRetryOnError: false,
         },
diff --git a/agenta-web/src/hooks/useSession.ts b/agenta-web/src/hooks/useSession.ts
index 196288e24..df7fb53c0 100644
--- a/agenta-web/src/hooks/useSession.ts
+++ b/agenta-web/src/hooks/useSession.ts
@@ -3,7 +3,7 @@ import {isDemo} from "@/lib/helpers/utils"
 import {useRouter} from "next/router"
 import posthog from "posthog-js"
 import {useSessionContext} from "supertokens-auth-react/recipe/session"
-import {signOut} from "supertokens-auth-react/recipe/thirdpartypasswordless"
+import {signOut} from "supertokens-auth-react/recipe/session"
 
 export const useSession: () => {loading: boolean; doesSessionExist: boolean; logout: () => void} =
     isDemo()
diff --git a/agenta-web/src/lib/Types.ts b/agenta-web/src/lib/Types.ts
index 9be734c61..df47fd87b 100644
--- a/agenta-web/src/lib/Types.ts
+++ b/agenta-web/src/lib/Types.ts
@@ -8,6 +8,7 @@ export interface testset {
     _id: string
     name: string
     created_at: string
+    updated_at: string
 }
 
 export interface TestSet {
@@ -18,6 +19,8 @@ export interface TestSet {
     csvdata: KeyValuePair[]
 }
 
+export type TestsetCreationMode = "create" | "clone" | "rename"
+
 export interface ListAppsItem {
     app_id: string
     app_name: string
@@ -356,6 +359,8 @@ export interface Evaluator {
     direct_use?: boolean
     description: string
     oss?: boolean
+    requires_llm_api_keys?: boolean
+    tags: string[]
 }
 
 export interface EvaluatorConfig {
@@ -364,6 +369,9 @@ export interface EvaluatorConfig {
     name: string
     settings_values: Record<string, any>
     created_at: string
+    color?: string
+    updated_at: string
+    tags?: string[]
 }
 
 export type EvaluationError = {
@@ -622,3 +630,35 @@ export interface TraceSpanTreeNode {
     key: string
     children?: TraceSpanTreeNode[]
 }
+
+interface VariantVotesData {
+    number_of_votes: number
+    percentage: number
+}
+export interface HumanEvaluationListTableDataType {
+    key: string
+    variants: string[]
+    testset: {
+        _id: string
+        name: string
+    }
+    evaluationType: string
+    status: EvaluationFlow
+    votesData: {
+        nb_of_rows: number
+        variants: string[]
+        flag_votes: {
+            number_of_votes: number
+            percentage: number
+        }
+        positive_votes: {
+            number_of_votes: number
+            percentage: number
+        }
+        variants_votes_data: Record<string, VariantVotesData>
+    }
+    createdAt: string
+    revisions: string[]
+    variant_revision_ids: string[]
+    variantNames: string[]
+}
diff --git a/agenta-web/src/lib/helpers/axiosConfig.ts b/agenta-web/src/lib/helpers/axiosConfig.ts
index 0a4add939..9637a9a0b 100644
--- a/agenta-web/src/lib/helpers/axiosConfig.ts
+++ b/agenta-web/src/lib/helpers/axiosConfig.ts
@@ -1,6 +1,6 @@
 import axiosApi from "axios"
 import {getErrorMessage, globalErrorHandler} from "./errorHandler"
-import {signOut} from "supertokens-auth-react/recipe/thirdpartypasswordless"
+import {signOut} from "supertokens-auth-react/recipe/session"
 import router from "next/router"
 import {getAgentaApiUrl} from "./utils"
 import {isObject} from "lodash"
diff --git a/agenta-web/src/lib/helpers/evaluate.ts b/agenta-web/src/lib/helpers/evaluate.ts
index 87b5b48f4..9fd97f3cf 100644
--- a/agenta-web/src/lib/helpers/evaluate.ts
+++ b/agenta-web/src/lib/helpers/evaluate.ts
@@ -1,4 +1,4 @@
-import {HumanEvaluationListTableDataType} from "@/components/Evaluations/HumanEvaluationResult"
+import {HumanEvaluationListTableDataType} from "@/lib/Types"
 import {
     Evaluation,
     GenericObject,
@@ -15,6 +15,7 @@ import {capitalize, round} from "lodash"
 import dayjs from "dayjs"
 import {runningStatuses} from "@/components/pages/evaluations/cellRenderers/cellRenderers"
 import {formatCurrency, formatLatency} from "./formatters"
+import {isDemo} from "./utils"
 
 export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
     const exportRow = rows.map((data, ix) => {
@@ -352,3 +353,72 @@ const getCustomComparator = (type: CellDataType) => (valueA: string, valueB: str
 export const removeCorrectAnswerPrefix = (str: string) => {
     return str.replace(/^correctAnswer_/, "")
 }
+
+export const mapTestcaseAndEvalValues = (
+    settingsValues: Record<string, any>,
+    selectedTestcase: Record<string, any>,
+) => {
+    let testcaseObj: Record<string, any> = {}
+    let evalMapObj: Record<string, any> = {}
+
+    Object.entries(settingsValues).forEach(([key, value]) => {
+        if (typeof value === "string" && value.startsWith("testcase.")) {
+            testcaseObj[key] = selectedTestcase[value.split(".")[1]]
+        } else {
+            evalMapObj[key] = value
+        }
+    })
+
+    return {testcaseObj, evalMapObj}
+}
+
+export const transformTraceKeysInSettings = (
+    settingsValues: Record<string, any>,
+): Record<string, any> => {
+    return Object.keys(settingsValues).reduce(
+        (acc, curr) => {
+            if (
+                !acc[curr] &&
+                typeof settingsValues[curr] === "string" &&
+                settingsValues[curr].startsWith("trace.")
+            ) {
+                acc[curr] = settingsValues[curr].replace("trace.", "")
+            } else {
+                acc[curr] = settingsValues[curr]
+            }
+
+            return acc
+        },
+        {} as Record<string, any>,
+    )
+}
+
+export const getEvaluatorTags = () => {
+    const evaluatorTags = [
+        {
+            label: "Classifiers",
+            value: "classifiers",
+        },
+        {
+            label: "Similarity",
+            value: "similarity",
+        },
+        {
+            label: "AI / LLM",
+            value: "ai_llm",
+        },
+        {
+            label: "Functional",
+            value: "functional",
+        },
+    ]
+
+    if (isDemo()) {
+        evaluatorTags.unshift({
+            label: "RAG",
+            value: "rag",
+        })
+    }
+
+    return evaluatorTags
+}
diff --git a/agenta-web/src/lib/helpers/utils.ts b/agenta-web/src/lib/helpers/utils.ts
index aaef7b79d..01fac1df3 100644
--- a/agenta-web/src/lib/helpers/utils.ts
+++ b/agenta-web/src/lib/helpers/utils.ts
@@ -125,14 +125,8 @@ export const removeKeys = (obj: GenericObject, keys: string[]) => {
 export const safeParse = (str: string, fallback: any = "") => {
     try {
         if (!str) return fallback
-
-        if (typeof str !== "string") {
-            return JSON.parse(str)
-        } else {
-            return str
-        }
+        return JSON.parse(str)
     } catch (error) {
-        console.log("error parsing JSON:", error)
         return fallback
     }
 }
diff --git a/agenta-web/src/lib/transformers.ts b/agenta-web/src/lib/transformers.ts
index 63ab6fb2d..b260399b3 100644
--- a/agenta-web/src/lib/transformers.ts
+++ b/agenta-web/src/lib/transformers.ts
@@ -209,3 +209,90 @@ export const fromBaseResponseToTraceSpanType = (
 
     return [top_level_spans, spans_dict]
 }
+
+export const transformTraceTreeToJson = (tree: TraceSpan[]) => {
+    const nodeMap: Record<string, any> = {}
+
+    function addTree(item: TraceSpan) {
+        if (item.name) {
+            const content = {
+                ...item.content,
+                ...(item.children ? transformTraceTreeToJson(item.children) : null),
+            }
+
+            if (!nodeMap[item.name]) {
+                nodeMap[item.name] = content
+            } else {
+                if (!Array.isArray(nodeMap[item.name])) {
+                    nodeMap[item.name] = [nodeMap[item.name]]
+                }
+                nodeMap[item.name].push(content)
+            }
+        }
+    }
+
+    tree.forEach((item) => {
+        addTree(item)
+    })
+
+    const filterEmptyValues = (obj: Record<string, any>): any => {
+        if (Array.isArray(obj)) {
+            return obj
+                .map(filterEmptyValues)
+                .filter(
+                    (item) =>
+                        item !== null &&
+                        !(typeof item === "object" && Object.keys(item).length === 0),
+                )
+        } else if (typeof obj === "object" && obj !== null) {
+            return Object.entries(obj).reduce(
+                (acc, [key, value]) => {
+                    const filteredValue = filterEmptyValues(value)
+                    if (
+                        filteredValue !== null &&
+                        !(
+                            typeof filteredValue === "object" &&
+                            Object.keys(filteredValue).length === 0
+                        )
+                    ) {
+                        acc[key] = filteredValue
+                    }
+                    return acc
+                },
+                {} as Record<string, any>,
+            )
+        } else {
+            return obj
+        }
+    }
+
+    return filterEmptyValues(nodeMap)
+}
+
+export const generatePaths = (obj: Record<string, any>, currentPath = "") => {
+    let paths: {value: string}[] = []
+
+    if (typeof obj === "object" && obj !== null && !Array.isArray(obj)) {
+        Object.entries(obj).forEach(([key, value]) => {
+            const newPath = currentPath ? `${currentPath}.${key}` : key
+            if (value && typeof value === "object" && Object.keys(value).length) {
+                paths.push({value: newPath})
+                paths = paths.concat(generatePaths(value, newPath))
+            } else if (value && typeof value !== "object") {
+                paths.push({value: newPath})
+            }
+        })
+    } else if (Array.isArray(obj)) {
+        obj.forEach((value, index) => {
+            const newPath = `${currentPath}[${index}]`
+            if (value && typeof value === "object" && Object.keys(value).length) {
+                paths.push({value: newPath})
+                paths = paths.concat(generatePaths(value, newPath))
+            } else if (value && typeof value !== "object") {
+                paths.push({value: newPath})
+            }
+        })
+    }
+
+    return paths
+}
diff --git a/agenta-web/src/media/eval-illustration.png b/agenta-web/src/media/eval-illustration.png
deleted file mode 100644
index 6565954d4..000000000
Binary files a/agenta-web/src/media/eval-illustration.png and /dev/null differ
diff --git a/agenta-web/src/media/night.png b/agenta-web/src/media/night.png
deleted file mode 100644
index 6f30eae90..000000000
Binary files a/agenta-web/src/media/night.png and /dev/null differ
diff --git a/agenta-web/src/media/score.png b/agenta-web/src/media/score.png
deleted file mode 100644
index e527e1e71..000000000
Binary files a/agenta-web/src/media/score.png and /dev/null differ
diff --git a/agenta-web/src/media/sun.png b/agenta-web/src/media/sun.png
deleted file mode 100644
index eb539f153..000000000
Binary files a/agenta-web/src/media/sun.png and /dev/null differ
diff --git a/agenta-web/src/media/testing.png b/agenta-web/src/media/testing.png
deleted file mode 100644
index 7f9ea7db2..000000000
Binary files a/agenta-web/src/media/testing.png and /dev/null differ
diff --git a/agenta-web/src/pages/_app.tsx b/agenta-web/src/pages/_app.tsx
index e7eed71ea..fa447f236 100644
--- a/agenta-web/src/pages/_app.tsx
+++ b/agenta-web/src/pages/_app.tsx
@@ -15,7 +15,10 @@ import "ag-grid-community/styles/ag-grid.css"
 import "ag-grid-community/styles/ag-theme-alpine.css"
 import {Inter} from "next/font/google"
 
-const inter = Inter({subsets: ["latin"]})
+const inter = Inter({
+    subsets: ["latin"],
+    variable: "--font-inter",
+})
 
 // Initialize the Posthog client
 if (typeof window !== "undefined") {
@@ -48,7 +51,7 @@ export default function App({Component, pageProps}: AppProps) {
                 <title>Agenta: The LLMOps platform.</title>
                 <link rel="shortcut icon" href="/assets/favicon.ico" />
             </Head>
-            <main className={inter.className}>
+            <main className={`${inter.variable} font-sans`}>
                 <PostHogProvider client={posthog}>
                     <ThemeContextProvider>
                         <ProfileContextProvider>
diff --git a/agenta-web/src/pages/apps/[app_id]/annotations/human_a_b_testing.tsx b/agenta-web/src/pages/apps/[app_id]/annotations/human_a_b_testing.tsx
deleted file mode 100644
index ea60e0a9a..000000000
--- a/agenta-web/src/pages/apps/[app_id]/annotations/human_a_b_testing.tsx
+++ /dev/null
@@ -1,21 +0,0 @@
-import HumanEvaluationResult from "@/components/Evaluations/HumanEvaluationResult"
-import HumanEvaluationModal from "@/components/HumanEvaluationModal/HumanEvaluationModal"
-import React, {useState} from "react"
-
-const HumanABTestingEvaluation = () => {
-    const [isEvalModalOpen, setIsEvalModalOpen] = useState(false)
-
-    return (
-        <>
-            <HumanEvaluationResult setIsEvalModalOpen={setIsEvalModalOpen} />
-
-            <HumanEvaluationModal
-                evaluationType={"human_a_b_testing"}
-                isEvalModalOpen={isEvalModalOpen}
-                setIsEvalModalOpen={setIsEvalModalOpen}
-            />
-        </>
-    )
-}
-
-export default HumanABTestingEvaluation
diff --git a/agenta-web/src/pages/apps/[app_id]/annotations/single_model_test.tsx b/agenta-web/src/pages/apps/[app_id]/annotations/single_model_test.tsx
deleted file mode 100644
index 9487da1bc..000000000
--- a/agenta-web/src/pages/apps/[app_id]/annotations/single_model_test.tsx
+++ /dev/null
@@ -1,21 +0,0 @@
-import React, {useState} from "react"
-import AutomaticEvaluationResult from "@/components/Evaluations/AutomaticEvaluationResult"
-import HumanEvaluationModal from "@/components/HumanEvaluationModal/HumanEvaluationModal"
-
-const SingleModelTestEvaluation = () => {
-    const [isEvalModalOpen, setIsEvalModalOpen] = useState(false)
-
-    return (
-        <>
-            <AutomaticEvaluationResult setIsEvalModalOpen={setIsEvalModalOpen} />
-
-            <HumanEvaluationModal
-                evaluationType={"single_model_test"}
-                isEvalModalOpen={isEvalModalOpen}
-                setIsEvalModalOpen={setIsEvalModalOpen}
-            />
-        </>
-    )
-}
-
-export default SingleModelTestEvaluation
diff --git a/agenta-web/src/pages/apps/[app_id]/annotations/human_a_b_testing/[evaluation_id]/index.tsx b/agenta-web/src/pages/apps/[app_id]/evaluations/human_a_b_testing/[evaluation_id]/index.tsx
similarity index 100%
rename from agenta-web/src/pages/apps/[app_id]/annotations/human_a_b_testing/[evaluation_id]/index.tsx
rename to agenta-web/src/pages/apps/[app_id]/evaluations/human_a_b_testing/[evaluation_id]/index.tsx
diff --git a/agenta-web/src/pages/apps/[app_id]/evaluations/index.tsx b/agenta-web/src/pages/apps/[app_id]/evaluations/index.tsx
new file mode 100644
index 000000000..aa69f6987
--- /dev/null
+++ b/agenta-web/src/pages/apps/[app_id]/evaluations/index.tsx
@@ -0,0 +1,77 @@
+import AbTestingEvaluation from "@/components/HumanEvaluations/AbTestingEvaluation"
+import AutoEvaluation from "@/components/pages/evaluations/autoEvaluation/AutoEvaluation"
+import SingleModelEvaluation from "@/components/HumanEvaluations/SingleModelEvaluation"
+import {useQueryParam} from "@/hooks/useQuery"
+import {_Evaluation, JSSTheme} from "@/lib/Types"
+import {ChartDonut, ListChecks, TestTube} from "@phosphor-icons/react"
+import {Tabs, TabsProps, Typography} from "antd"
+import {createUseStyles} from "react-jss"
+
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    container: {
+        display: "flex",
+        flexDirection: "column",
+        gap: theme.marginLG,
+    },
+    title: {
+        fontSize: theme.fontSizeLG,
+        fontWeight: theme.fontWeightMedium,
+        lineHeight: theme.lineHeightHeading4,
+    },
+    evaluationTabContainer: {
+        "& .ant-tabs-nav": {
+            marginBottom: theme.marginLG,
+        },
+        "& .ant-tabs-tab-btn": {
+            display: "flex",
+            alignItems: "center",
+            "& .ant-tabs-tab-icon": {
+                display: "flex",
+            },
+        },
+    },
+}))
+
+const EvaluationsPage = () => {
+    const classes = useStyles()
+    const [selectedEvaluation, setSelectedEvaluation] = useQueryParam(
+        "selectedEvaluation",
+        "auto_evaluation",
+    )
+
+    const items: TabsProps["items"] = [
+        {
+            key: "auto_evaluation",
+            label: "Automatic Evaluation",
+            icon: <ChartDonut size={16} />,
+            children: <AutoEvaluation />,
+        },
+        {
+            key: "human_annotation",
+            label: "Human annotation",
+            icon: <ListChecks size={16} />,
+            children: <SingleModelEvaluation viewType="evaluation" />,
+        },
+        {
+            key: "human_ab_testing",
+            label: "Human A/B Testing",
+            icon: <TestTube size={16} />,
+            children: <AbTestingEvaluation viewType="evaluation" />,
+        },
+    ]
+
+    return (
+        <div className={classes.container}>
+            <Typography.Text className={classes.title}>Evaluations</Typography.Text>
+
+            <Tabs
+                className={classes.evaluationTabContainer}
+                items={items}
+                defaultActiveKey={selectedEvaluation}
+                onChange={setSelectedEvaluation}
+            />
+        </div>
+    )
+}
+
+export default EvaluationsPage
diff --git a/agenta-web/src/pages/apps/[app_id]/evaluations/new-evaluator.tsx b/agenta-web/src/pages/apps/[app_id]/evaluations/new-evaluator.tsx
deleted file mode 100644
index 563726775..000000000
--- a/agenta-web/src/pages/apps/[app_id]/evaluations/new-evaluator.tsx
+++ /dev/null
@@ -1,25 +0,0 @@
-import Evaluators from "@/components/pages/evaluations/evaluators/Evaluators"
-import {useAppId} from "@/hooks/useAppId"
-import {evaluatorConfigsAtom, evaluatorsAtom} from "@/lib/atoms/evaluation"
-import {fetchAllEvaluatorConfigs, fetchAllEvaluators} from "@/services/evaluations/api"
-import {useAtom} from "jotai"
-import React, {useEffect} from "react"
-
-const NewEvaluator = () => {
-    const appId = useAppId()
-    const setEvaluators = useAtom(evaluatorsAtom)[1]
-    const setEvaluatorConfigs = useAtom(evaluatorConfigsAtom)[1]
-
-    useEffect(() => {
-        Promise.all([fetchAllEvaluators(), fetchAllEvaluatorConfigs(appId)]).then(
-            ([evaluators, configs]) => {
-                setEvaluators(evaluators)
-                setEvaluatorConfigs(configs)
-            },
-        )
-    }, [appId])
-
-    return <Evaluators />
-}
-
-export default NewEvaluator
diff --git a/agenta-web/src/pages/apps/[app_id]/evaluations/results.tsx b/agenta-web/src/pages/apps/[app_id]/evaluations/results.tsx
deleted file mode 100644
index ae10ff2b5..000000000
--- a/agenta-web/src/pages/apps/[app_id]/evaluations/results.tsx
+++ /dev/null
@@ -1,25 +0,0 @@
-import EvaluationResults from "@/components/pages/evaluations/evaluationResults/EvaluationResults"
-import {useAppId} from "@/hooks/useAppId"
-import {evaluatorConfigsAtom, evaluatorsAtom} from "@/lib/atoms/evaluation"
-import {fetchAllEvaluatorConfigs, fetchAllEvaluators} from "@/services/evaluations/api"
-import {useAtom} from "jotai"
-import React, {useEffect} from "react"
-
-const EvalResults = () => {
-    const appId = useAppId()
-    const setEvaluators = useAtom(evaluatorsAtom)[1]
-    const setEvaluatorConfigs = useAtom(evaluatorConfigsAtom)[1]
-
-    useEffect(() => {
-        Promise.all([fetchAllEvaluators(), fetchAllEvaluatorConfigs(appId)]).then(
-            ([evaluators, configs]) => {
-                setEvaluators(evaluators)
-                setEvaluatorConfigs(configs)
-            },
-        )
-    }, [appId])
-
-    return <EvaluationResults />
-}
-
-export default EvalResults
diff --git a/agenta-web/src/pages/apps/[app_id]/annotations/single_model_test/[evaluation_id]/index.tsx b/agenta-web/src/pages/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
similarity index 97%
rename from agenta-web/src/pages/apps/[app_id]/annotations/single_model_test/[evaluation_id]/index.tsx
rename to agenta-web/src/pages/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
index ed3e9630d..9c0573d2e 100644
--- a/agenta-web/src/pages/apps/[app_id]/annotations/single_model_test/[evaluation_id]/index.tsx
+++ b/agenta-web/src/pages/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
@@ -1,4 +1,4 @@
-import {Evaluation, EvaluationScenario, GenericObject} from "@/lib/Types"
+import type {Evaluation, EvaluationScenario, GenericObject} from "@/lib/Types"
 import {
     fetchLoadEvaluation,
     fetchAllLoadEvaluationsScenarios,
diff --git a/agenta-web/src/pages/apps/[app_id]/overview/index.tsx b/agenta-web/src/pages/apps/[app_id]/overview/index.tsx
index 23434e9ed..b4cb7a93f 100644
--- a/agenta-web/src/pages/apps/[app_id]/overview/index.tsx
+++ b/agenta-web/src/pages/apps/[app_id]/overview/index.tsx
@@ -1,9 +1,9 @@
 import DeleteAppModal from "@/components/AppSelector/modals/DeleteAppModal"
 import EditAppModal from "@/components/AppSelector/modals/EditAppModal"
-import AbTestingEvalOverview from "@/components/pages/overview/abTestingEvaluation/AbTestingEvalOverview"
+import AbTestingEvaluation from "@/components/HumanEvaluations/AbTestingEvaluation"
 import AutomaticEvalOverview from "@/components/pages/overview/automaticEvaluation/AutomaticEvalOverview"
 import DeploymentOverview from "@/components/pages/overview/deployments/DeploymentOverview"
-import SingleModelEvalOverview from "@/components/pages/overview/singleModelEvaluation/SingleModelEvalOverview"
+import SingleModelEvaluation from "@/components/HumanEvaluations/SingleModelEvaluation"
 import VariantsOverview from "@/components/pages/overview/variants/VariantsOverview"
 import {useAppsData} from "@/contexts/app.context"
 import {useAppId} from "@/hooks/useAppId"
@@ -165,9 +165,9 @@ export default function Overview() {
 
                 <AutomaticEvalOverview />
 
-                <AbTestingEvalOverview />
+                <AbTestingEvaluation viewType="overview" />
 
-                <SingleModelEvalOverview />
+                <SingleModelEvaluation viewType="overview" />
             </div>
             {currentApp && (
                 <DeleteAppModal
diff --git a/agenta-web/src/pages/apps/[app_id]/testsets/index.tsx b/agenta-web/src/pages/apps/[app_id]/testsets/index.tsx
index 71b4adcc3..3dcc4eeee 100644
--- a/agenta-web/src/pages/apps/[app_id]/testsets/index.tsx
+++ b/agenta-web/src/pages/apps/[app_id]/testsets/index.tsx
@@ -1,68 +1,66 @@
-import {Button, Table, Space} from "antd"
-import Link from "next/link"
-import {useRouter} from "next/router"
-import {ColumnsType} from "antd/es/table"
-import {useState} from "react"
+import React, {useMemo, useState} from "react"
+import TestsetModal from "@/components/pages/testset/modals"
+import NoResultsFound from "@/components/NoResultsFound/NoResultsFound"
 import {formatDate} from "@/lib/helpers/dateTimeHelper"
-import {DeleteOutlined} from "@ant-design/icons"
+import {checkIfResourceValidForDeletion} from "@/lib/helpers/evaluate"
+import {JSSTheme, TestSet, testset, TestsetCreationMode} from "@/lib/Types"
 import {deleteTestsets, useLoadTestsetsList} from "@/services/testsets/api"
+import {MoreOutlined, PlusOutlined} from "@ant-design/icons"
+import {Copy, GearSix, Note, PencilSimple, Trash} from "@phosphor-icons/react"
+import {Button, Dropdown, Input, Spin, Table, Typography} from "antd"
+import {ColumnsType} from "antd/es/table/interface"
+import {useRouter} from "next/router"
 import {createUseStyles} from "react-jss"
-import {testset} from "@/lib/Types"
-import {isDemo} from "@/lib/helpers/utils"
-import {checkIfResourceValidForDeletion} from "@/lib/helpers/evaluate"
+import dayjs from "dayjs"
 
-const useStyles = createUseStyles({
-    container: {
-        marginTop: 20,
-        marginBottom: 40,
+const useStyles = createUseStyles((theme: JSSTheme) => ({
+    modal: {
+        transition: "width 0.3s ease",
+        "& .ant-modal-content": {
+            overflow: "hidden",
+            borderRadius: 16,
+            "& > .ant-modal-close": {
+                top: 16,
+            },
+        },
     },
-    btnContainer: {
+    headerText: {
         display: "flex",
+        alignItems: "center",
         justifyContent: "space-between",
-        marginTop: "20px",
-    },
-    deleteBtn: {
-        marginTop: "30px",
-        "& svg": {
-            color: "red",
+        "& > .ant-typography": {
+            fontSize: theme.fontSizeHeading4,
+            lineHeight: theme.lineHeightHeading4,
+            fontWeight: theme.fontWeightMedium,
+            margin: 0,
         },
     },
-    linksContainer: {
-        display: "flex",
-        gap: "10px",
-        flexWrap: "wrap",
-    },
-    startLink: {
+    button: {
         display: "flex",
         alignItems: "center",
-        gap: 8,
     },
-})
+    table: {
+        "& table": {
+            border: "1px solid",
+            borderColor: theme.colorBorderSecondary,
+        },
+        "& .ant-table-expanded-row-fixed": {
+            width: "100% !important",
+        },
+    },
+}))
 
-export default function Testsets() {
+const Testset = () => {
     const classes = useStyles()
     const router = useRouter()
     const appId = router.query.app_id as string
     const [selectedRowKeys, setSelectedRowKeys] = useState<React.Key[]>([])
     const {testsets, isTestsetsLoading, mutate} = useLoadTestsetsList(appId)
-
-    const columns: ColumnsType<testset> = [
-        {
-            title: "Name",
-            dataIndex: "name",
-            key: "name",
-            className: "testset-column",
-        },
-        {
-            title: "Creation date",
-            dataIndex: "created_at",
-            key: "created_at",
-            render: (date: string) => {
-                return formatDate(date)
-            },
-            className: "testset-column",
-        },
-    ]
+    const [isCreateTestsetModalOpen, setIsCreateTestsetModalOpen] = useState(false)
+    const [searchTerm, setSearchTerm] = useState("")
+    const [testsetCreationMode, setTestsetCreationMode] = useState<TestsetCreationMode>("create")
+    const [editTestsetValues, setEditTestsetValues] = useState<testset | null>(null)
+    const [current, setCurrent] = useState(0)
 
     const rowSelection = {
         onChange: (selectedRowKeys: React.Key[]) => {
@@ -70,8 +68,8 @@ export default function Testsets() {
         },
     }
 
-    const onDelete = async () => {
-        const testsetsIds = selectedRowKeys.map((key) => key.toString())
+    const onDelete = async (testsetsId?: string[]) => {
+        const testsetsIds = !testsetsId ? selectedRowKeys.map((key) => key.toString()) : testsetsId
         try {
             if (
                 !(await checkIfResourceValidForDeletion({
@@ -86,79 +84,195 @@ export default function Testsets() {
         } catch {}
     }
 
-    return (
-        <div>
-            <div className={classes.container}>
-                <div className={classes.btnContainer}>
-                    <div className={classes.linksContainer}>
-                        <Link
-                            data-cy="testset-new-upload-link"
-                            href={`/apps/${appId}/testsets/new/upload`}
-                        >
-                            <Button type="primary">Upload Test Set</Button>
-                        </Link>
-                        <Link
-                            data-cy="testset-new-manual-link"
-                            href={`/apps/${appId}/testsets/new/manual`}
-                        >
-                            <Button>Create Test Set in UI</Button>
-                        </Link>
-                        <Link
-                            data-cy="testset-new-api-link"
-                            href={`/apps/${appId}/testsets/new/api`}
-                        >
-                            <Button>Create a test set with API</Button>
-                        </Link>
-                        {!isDemo() && (
-                            <Link href={`/apps/${appId}/testsets/new/endpoint`}>
-                                <Button>Import from Endpoint</Button>
-                            </Link>
-                        )}
-                    </div>
+    const filteredTestset = useMemo(() => {
+        let allTestsets = testsets.sort(
+            (a: TestSet, b: TestSet) =>
+                dayjs(b.updated_at).valueOf() - dayjs(a.updated_at).valueOf(),
+        )
+        if (searchTerm) {
+            allTestsets = testsets.filter((item: TestSet) =>
+                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+            )
+        }
+        return allTestsets
+    }, [searchTerm, testsets])
+
+    const columns: ColumnsType<testset> = [
+        {
+            title: "Name",
+            dataIndex: "name",
+            key: "name",
+            onHeaderCell: () => ({
+                style: {minWidth: 220},
+            }),
+        },
+        {
+            title: "Date Modified",
+            dataIndex: "updated_at",
+            key: "updated_at",
+            onHeaderCell: () => ({
+                style: {minWidth: 220},
+            }),
+            render: (date: string) => {
+                return formatDate(date)
+            },
+        },
+        {
+            title: "Date created",
+            dataIndex: "created_at",
+            key: "created_at",
+            render: (date: string) => {
+                return formatDate(date)
+            },
+            onHeaderCell: () => ({
+                style: {minWidth: 220},
+            }),
+        },
+        {
+            title: <GearSix size={16} />,
+            key: "key",
+            width: 56,
+            fixed: "right",
+            align: "center",
+            render: (_, record) => {
+                return (
+                    <Dropdown
+                        trigger={["click"]}
+                        overlayStyle={{width: 180}}
+                        menu={{
+                            items: [
+                                {
+                                    key: "details",
+                                    label: "View details",
+                                    icon: <Note size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        router.push(`/apps/${appId}/testsets/${record._id}`)
+                                    },
+                                },
+                                {
+                                    key: "clone",
+                                    label: "Clone",
+                                    icon: <Copy size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        setTestsetCreationMode("clone")
+                                        setEditTestsetValues(record)
+                                        setCurrent(1)
+                                        setIsCreateTestsetModalOpen(true)
+                                    },
+                                },
+                                {type: "divider"},
+                                {
+                                    key: "rename",
+                                    label: "Rename",
+                                    icon: <PencilSimple size={16} />,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        setTestsetCreationMode("rename")
+                                        setEditTestsetValues(record)
+                                        setCurrent(1)
+                                        setIsCreateTestsetModalOpen(true)
+                                    },
+                                },
+                                {
+                                    key: "delete_eval",
+                                    label: "Delete",
+                                    icon: <Trash size={16} />,
+                                    danger: true,
+                                    onClick: (e) => {
+                                        e.domEvent.stopPropagation()
+                                        onDelete([record._id])
+                                    },
+                                },
+                            ],
+                        }}
+                    >
+                        <Button
+                            onClick={(e) => e.stopPropagation()}
+                            type="text"
+                            icon={<MoreOutlined />}
+                            size="small"
+                        />
+                    </Dropdown>
+                )
+            },
+        },
+    ]
 
-                    {testsets.length > 0 && (
-                        <Space className={classes.startLink}>
-                            <Link href={`/apps/${appId}/evaluations/results`}>
-                                <Button>Start an Automatic Evaluation</Button>
-                            </Link>
+    return (
+        <>
+            <section className="w-full flex flex-col gap-6 mb-2">
+                <div className={classes.headerText}>
+                    <Typography.Title level={4}>Test sets</Typography.Title>
 
-                            <Link href={`/apps/${appId}/annotations/human_a_b_testing`}>
-                                <Button>Start a Human Evaluation</Button>
-                            </Link>
-                        </Space>
-                    )}
+                    <Button
+                        type="primary"
+                        icon={<PlusOutlined className="mt-[1px]" />}
+                        onClick={() => setIsCreateTestsetModalOpen(true)}
+                        data-cy="create-testset-modal-button"
+                    >
+                        Create new test set
+                    </Button>
                 </div>
-
-                {selectedRowKeys.length > 0 && (
+                <div className="flex items-center justify-between">
+                    <Input.Search
+                        allowClear
+                        placeholder="Search"
+                        className="w-[400px]"
+                        onChange={(e) => setSearchTerm(e.target.value)}
+                    />
                     <Button
-                        data-cy="app-testset-delete-button"
-                        onClick={onDelete}
-                        className={classes.deleteBtn}
+                        danger
+                        type="text"
+                        icon={<Trash size={14} className="mt-0.5" />}
+                        className={classes.button}
+                        disabled={selectedRowKeys.length == 0}
+                        onClick={() => onDelete()}
                     >
-                        <DeleteOutlined key="delete" />
                         Delete
                     </Button>
-                )}
-            </div>
+                </div>
+            </section>
 
-            <div>
+            <Spin spinning={isTestsetsLoading}>
                 <Table
-                    data-cy="app-testset-list"
                     rowSelection={{
                         type: "checkbox",
+                        columnWidth: 48,
                         ...rowSelection,
                     }}
+                    data-cy="app-testset-list"
+                    className={`ph-no-capture ${classes.table}`}
                     columns={columns}
-                    dataSource={testsets}
+                    dataSource={filteredTestset}
                     rowKey="_id"
                     loading={isTestsetsLoading}
+                    scroll={{x: true}}
+                    pagination={false}
                     onRow={(record) => {
                         return {
                             onClick: () => router.push(`/apps/${appId}/testsets/${record._id}`),
                         }
                     }}
+                    locale={{emptyText: <NoResultsFound />}}
                 />
-            </div>
-        </div>
+            </Spin>
+
+            <TestsetModal
+                editTestsetValues={editTestsetValues}
+                setEditTestsetValues={setEditTestsetValues}
+                current={current}
+                setCurrent={setCurrent}
+                testsetCreationMode={testsetCreationMode}
+                setTestsetCreationMode={setTestsetCreationMode}
+                open={isCreateTestsetModalOpen}
+                onCancel={() => {
+                    setIsCreateTestsetModalOpen(false)
+                }}
+            />
+        </>
     )
 }
+
+export default Testset
diff --git a/agenta-web/src/pages/apps/[app_id]/testsets/new/api/index.tsx b/agenta-web/src/pages/apps/[app_id]/testsets/new/api/index.tsx
deleted file mode 100644
index 829788c3e..000000000
--- a/agenta-web/src/pages/apps/[app_id]/testsets/new/api/index.tsx
+++ /dev/null
@@ -1,62 +0,0 @@
-import DynamicCodeBlock from "@/components/DynamicCodeBlock/DynamicCodeBlock"
-
-import pythonCode from "@/code_snippets/testsets/create_with_json/python"
-import cURLCode from "@/code_snippets/testsets/create_with_json/curl"
-import tsCode from "@/code_snippets/testsets/create_with_json/typescript"
-
-import pythonCodeUpload from "@/code_snippets/testsets/create_with_upload/python"
-import cURLCodeUpload from "@/code_snippets/testsets/create_with_upload/curl"
-import tsCodeUpload from "@/code_snippets/testsets/create_with_upload/typescript"
-import {Typography} from "antd"
-import {useRouter} from "next/router"
-import {createUseStyles} from "react-jss"
-import {getAgentaApiUrl} from "@/lib/helpers/utils"
-
-const useStyles = createUseStyles({
-    title: {
-        marginBottom: "20px !important",
-    },
-})
-
-export default function NewTestsetWithAPI() {
-    const classes = useStyles()
-    const router = useRouter()
-    const appId = router.query.app_id as string
-
-    const uploadURI = `${getAgentaApiUrl()}/api/testsets/upload`
-    const jsonURI = `${getAgentaApiUrl()}/api/testsets/${appId}`
-
-    const params = `{
-    "name": "testset_name",}`
-
-    const codeSnippets: Record<string, string> = {
-        Python: pythonCode(jsonURI, params),
-        cURL: cURLCode(jsonURI, params),
-        TypeScript: tsCode(jsonURI, params),
-    }
-
-    const codeSnippetsUpload: Record<string, string> = {
-        Python: pythonCodeUpload(uploadURI, appId),
-        cURL: cURLCodeUpload(uploadURI, appId),
-        TypeScript: tsCodeUpload(uploadURI, appId),
-    }
-    return (
-        <div>
-            <Typography.Title level={5} className={classes.title}>
-                Create a new Test Set with JSON
-            </Typography.Title>
-            <Typography.Text>
-                Use this endpoint to create a new Test Set for your App.
-            </Typography.Text>
-            <DynamicCodeBlock codeSnippets={codeSnippets} />
-
-            <Typography.Title level={5} className={classes.title}>
-                Create a new Test Set with uploading a CSV file
-            </Typography.Title>
-            <Typography.Text>
-                Use this endpoint to create a new Test Set for your App.
-            </Typography.Text>
-            <DynamicCodeBlock codeSnippets={codeSnippetsUpload} />
-        </div>
-    )
-}
diff --git a/agenta-web/src/pages/apps/[app_id]/testsets/new/endpoint/index.tsx b/agenta-web/src/pages/apps/[app_id]/testsets/new/endpoint/index.tsx
deleted file mode 100644
index 673706a4d..000000000
--- a/agenta-web/src/pages/apps/[app_id]/testsets/new/endpoint/index.tsx
+++ /dev/null
@@ -1,128 +0,0 @@
-import axios from "@/lib/helpers/axiosConfig"
-import {getAgentaApiUrl} from "@/lib/helpers/utils"
-import {Alert, Button, Form, Input, Spin, Typography, message} from "antd"
-import {useRouter} from "next/router"
-import {useState} from "react"
-import {createUseStyles} from "react-jss"
-
-const useStyles = createUseStyles({
-    container: {
-        display: "flex",
-        flexDirection: "column",
-        rowGap: 20,
-        maxWidth: 800,
-    },
-    json: {
-        overflow: "auto",
-    },
-    buttonContainer: {
-        display: "flex",
-        flexDirection: "row",
-        justifyContent: "flex-end",
-    },
-})
-
-type FieldType = {
-    name: string
-    endpoint: string
-}
-
-export default function ImportTestsetFromEndpoint() {
-    const classes = useStyles()
-
-    const router = useRouter()
-    const appId = router.query.app_id as string
-
-    const handleSubmit = async (values: FieldType) => {
-        if (values.name.trim() === "" || values.endpoint.trim() === "") {
-            message.error("Please fill out all fields")
-            return
-        }
-
-        setUploadLoading(true)
-
-        const formData = new FormData()
-        formData.append("endpoint", values.endpoint)
-        formData.append("testset_name", values.name)
-        formData.append("app_id", appId)
-
-        try {
-            // TODO: move to api.ts
-            await axios.post(`${getAgentaApiUrl()}/api/testsets/endpoint/`, formData, {
-                headers: {"Content-Type": "multipart/form-data"},
-            })
-            router.push(`/apps/${appId}/testsets`)
-        } catch (_) {
-            // Errors will be handled by Axios interceptor
-            // Do nothing here
-        } finally {
-            setUploadLoading(false)
-        }
-    }
-
-    const [uploadLoading, setUploadLoading] = useState(false)
-
-    return (
-        <div className={classes.container}>
-            <Typography.Title level={5}>Import a new Test Set from an endpoint</Typography.Title>
-
-            <Alert
-                message="Endpoint Test Set Format"
-                description={
-                    <>
-                        Currently, we only support the JSON format which must meet the following
-                        requirements:
-                        <ol>
-                            <li>A JSON with an array of rows</li>
-                            <li>
-                                Each row in the array should be an object of column header name as
-                                key and row data as value
-                            </li>
-                        </ol>
-                        Here is an example of a valid JSON file:
-                        <pre className={classes.json}>
-                            {JSON.stringify(
-                                [
-                                    {
-                                        recipe_name: "Chicken Parmesan",
-                                        correct_answer: "Chicken",
-                                    },
-                                    {recipe_name: "a, special, recipe", correct_answer: "Beef"},
-                                ],
-                                null,
-                                2,
-                            )}
-                        </pre>
-                    </>
-                }
-                type="info"
-            />
-
-            <Spin spinning={uploadLoading}>
-                <Form onFinish={handleSubmit} layout="vertical">
-                    <Form.Item<FieldType>
-                        label="Test Set Name"
-                        name="name"
-                        rules={[{required: true, type: "string", whitespace: true}]}
-                    >
-                        <Input placeholder="Test Set Name" />
-                    </Form.Item>
-
-                    <Form.Item<FieldType>
-                        label="Test Set Endpoint"
-                        name="endpoint"
-                        rules={[{required: true, type: "url"}]}
-                    >
-                        <Input placeholder="Endpoint URL" />
-                    </Form.Item>
-
-                    <div className={classes.buttonContainer}>
-                        <Button htmlType="submit" type="primary">
-                            Import Test Set
-                        </Button>
-                    </div>
-                </Form>
-            </Spin>
-        </div>
-    )
-}
diff --git a/agenta-web/src/pages/apps/[app_id]/testsets/new/manual/index.tsx b/agenta-web/src/pages/apps/[app_id]/testsets/new/manual/index.tsx
deleted file mode 100644
index f2b641168..000000000
--- a/agenta-web/src/pages/apps/[app_id]/testsets/new/manual/index.tsx
+++ /dev/null
@@ -1,5 +0,0 @@
-import TestsetTable from "@/components/TestSetTable/TestsetTable"
-
-export default function testsetCreatePage() {
-    return <TestsetTable mode="create" />
-}
diff --git a/agenta-web/src/pages/apps/[app_id]/testsets/new/upload/index.tsx b/agenta-web/src/pages/apps/[app_id]/testsets/new/upload/index.tsx
deleted file mode 100644
index 0b8ecb2f7..000000000
--- a/agenta-web/src/pages/apps/[app_id]/testsets/new/upload/index.tsx
+++ /dev/null
@@ -1,205 +0,0 @@
-import {UploadOutlined} from "@ant-design/icons"
-import {Alert, Button, Form, Input, Space, Spin, Upload, message} from "antd"
-import {useState} from "react"
-import axios from "@/lib/helpers/axiosConfig"
-import {useRouter} from "next/router"
-import {createUseStyles} from "react-jss"
-import {isValidCSVFile, isValidJSONFile} from "@/lib/helpers/fileManipulations"
-import {GenericObject} from "@/lib/Types"
-import {globalErrorHandler} from "@/lib/helpers/errorHandler"
-import {getAgentaApiUrl} from "@/lib/helpers/utils"
-
-const useStyles = createUseStyles({
-    fileFormatBtn: {
-        display: "flex",
-        gap: "25px",
-    },
-    container: {
-        width: "50%",
-    },
-    alert: {
-        marginTop: 20,
-        marginBottom: 40,
-    },
-    form: {
-        maxWidth: 600,
-    },
-})
-
-export default function AddANewTestset() {
-    const classes = useStyles()
-    const router = useRouter()
-    const appId = router.query.app_id as string
-    const [form] = Form.useForm()
-    const [uploadLoading, setUploadLoading] = useState(false)
-    const [uploadType, setUploadType] = useState<"JSON" | "CSV" | undefined>("CSV")
-
-    const onFinish = async (values: any) => {
-        const {file} = values
-        const fileObj = file[0].originFileObj
-        const malformedFileError = `The file you uploaded is either malformed or is not a valid ${uploadType} file`
-
-        if (file && file.length > 0 && uploadType) {
-            const isValidFile = await (uploadType == "CSV"
-                ? isValidCSVFile(fileObj)
-                : isValidJSONFile(fileObj))
-            if (!isValidFile) {
-                message.error(malformedFileError)
-                return
-            }
-
-            const formData = new FormData()
-            formData.append("upload_type", uploadType)
-            formData.append("file", fileObj)
-            if (values.testsetName && values.testsetName.trim() !== "") {
-                formData.append("testset_name", values.testsetName)
-            }
-            formData.append("app_id", appId)
-
-            try {
-                setUploadLoading(true)
-                // TODO: move to api.ts
-                await axios.post(`${getAgentaApiUrl()}/api/testsets/upload/`, formData, {
-                    headers: {
-                        "Content-Type": "multipart/form-data",
-                    },
-                    //@ts-ignore
-                    _ignoreError: true,
-                })
-                form.resetFields()
-                router.push(`/apps/${appId}/testsets`)
-            } catch (e: any) {
-                if (
-                    e?.response?.data?.detail?.find((item: GenericObject) =>
-                        item?.loc?.includes("csvdata"),
-                    )
-                )
-                    message.error(malformedFileError)
-                else globalErrorHandler(e)
-            } finally {
-                setUploadLoading(false)
-            }
-        }
-    }
-
-    const layout = {
-        labelCol: {span: 8},
-        wrapperCol: {span: 16},
-    }
-
-    const tailLayout = {
-        wrapperCol: {offset: 8, span: 16},
-    }
-
-    return (
-        <div>
-            <div className={classes.fileFormatBtn}>
-                <Button
-                    type={uploadType == "CSV" ? "primary" : "default"}
-                    onClick={() => {
-                        setUploadType("CSV")
-                    }}
-                >
-                    csv
-                </Button>
-                <Button
-                    type={uploadType == "JSON" ? "primary" : "default"}
-                    onClick={() => {
-                        setUploadType("JSON")
-                    }}
-                >
-                    json
-                </Button>
-            </div>
-            <Space direction="vertical" className={classes.container}>
-                <Alert
-                    message="File format"
-                    description={
-                        <>
-                            The test set should be in {uploadType} format with the following
-                            requirements:
-                            <br />
-                            {uploadType == "CSV" && (
-                                <>
-                                    1. Comma separated values
-                                    <br />
-                                    2. The first row should contain the headers
-                                    <br />
-                                    <br />
-                                    Here is an example of a valid CSV file:
-                                    <br />
-                                    <br />
-                                    recipe_name,correct_answer
-                                    <br />
-                                    Chicken Parmesan,Chicken
-                                    <br />
-                                    "a, special, recipe",Beef
-                                    <br />
-                                </>
-                            )}
-                            {uploadType == "JSON" && (
-                                <>
-                                    1. A json file with an array of rows
-                                    <br />
-                                    2. Each row in the array should be an object
-                                    <br />
-                                    of column header name as key and row data as value
-                                    <br />
-                                    <br />
-                                    Here is an example of a valid JSON file:
-                                    <br />
-                                    <br />
-                                    {`[{ "recipe_name": "Chicken Parmesan","correct_answer": "Chicken" },`}
-                                    <br />
-                                    {`{ "recipe_name": "a, special, recipe","correct_answer": "Beef" }]`}
-                                </>
-                            )}
-                        </>
-                    }
-                    type="info"
-                    className={classes.alert}
-                />
-            </Space>
-
-            <Spin spinning={uploadLoading}>
-                <Form onFinish={onFinish} form={form} className={classes.form} {...layout}>
-                    <Form.Item name="testsetName" label="Test set name" rules={[{type: "string"}]}>
-                        <Input maxLength={25} data-cy="upload-testset-file-name" />
-                    </Form.Item>
-                    <Form.Item
-                        name="file"
-                        valuePropName="fileList"
-                        getValueFromEvent={(e) => e.fileList}
-                        label="Test set source"
-                        rules={[{required: true}]}
-                    >
-                        <Upload.Dragger
-                            name="file"
-                            accept={uploadType == "CSV" ? ".csv" : ".json"}
-                            multiple={false}
-                            maxCount={1}
-                        >
-                            <p className="ant-upload-drag-icon">
-                                <UploadOutlined />
-                            </p>
-                            <p className="ant-upload-text">
-                                Click or drag a {uploadType} file to this area to upload
-                            </p>
-                        </Upload.Dragger>
-                    </Form.Item>
-
-                    <Form.Item {...tailLayout}>
-                        <Button
-                            type="primary"
-                            htmlType="submit"
-                            disabled={uploadLoading}
-                            data-cy="testset-upload-button"
-                        >
-                            Add test set
-                        </Button>
-                    </Form.Item>
-                </Form>
-            </Spin>
-        </div>
-    )
-}
diff --git a/agenta-web/src/services/evaluations/api/index.ts b/agenta-web/src/services/evaluations/api/index.ts
index c6cb31cd3..d251d0e26 100644
--- a/agenta-web/src/services/evaluations/api/index.ts
+++ b/agenta-web/src/services/evaluations/api/index.ts
@@ -1,9 +1,6 @@
 import axios from "@/lib//helpers/axiosConfig"
 import {
-    Annotation,
-    AnnotationScenario,
     ComparisonResultRow,
-    EvaluationStatus,
     Evaluator,
     EvaluatorConfig,
     KeyValuePair,
@@ -67,8 +64,15 @@ export const fetchAllEvaluators = async () => {
 
 // Evaluator Configs
 export const fetchAllEvaluatorConfigs = async (appId: string) => {
+    const tagColors = getTagColors()
+
     const response = await axios.get(`/api/evaluators/configs/`, {params: {app_id: appId}})
-    return response.data as EvaluatorConfig[]
+    const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({
+        ...item,
+        icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap],
+        color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)],
+    })) as EvaluatorConfig[]
+    return evaluatorConfigs
 }
 
 export type CreateEvaluationConfigData = Omit<EvaluatorConfig, "id" | "created_at">
@@ -162,60 +166,6 @@ export const fetchAllEvaluationScenarios = async (evaluationId: string) => {
     return evaluationScenarios as _EvaluationScenario[]
 }
 
-//annotations
-export const fetchAllAnnotations = async (appId: string) => {
-    const response = await axios.get(`/api/annotations/`, {params: {app_id: appId}})
-    return response.data.map(evaluationTransformer) as Annotation[]
-}
-
-export const fetchAnnotation = async (annotationId: string) => {
-    const response = await axios.get(`/api/annotations/${annotationId}/`)
-    return evaluationTransformer(response.data) as unknown as Annotation
-}
-
-export const fetchAnnotationStatus = async (annotationId: string) => {
-    const response = await axios.get(`/api/annotations/${annotationId}/status/`)
-    return response.data as {status: EvaluationStatus}
-}
-
-export const createAnnotation = async (
-    appId: string,
-    annotation: Omit<CreateEvaluationData, "evaluators_configs"> &
-        Pick<Annotation, "annotation_name">,
-) => {
-    return axios.post(`/api/annotations/`, {...annotation, app_id: appId})
-}
-
-export const deleteAnnotations = async (annotationsIds: string[]) => {
-    return axios.delete(`/api/annotations/`, {data: {annotations_ids: annotationsIds}})
-}
-
-// Annotation Scenarios
-export const fetchAllAnnotationScenarios = async (appId: string, annotationId: string) => {
-    const [{data: annotationScenarios}, annotation] = await Promise.all([
-        axios.get(`/api/annotations/${annotationId}/annotation_scenarios/`, {
-            params: {app_id: appId},
-        }),
-        fetchAnnotation(annotationId),
-    ])
-
-    annotationScenarios.forEach((scenario: AnnotationScenario) => {
-        scenario.annotation = annotation
-    })
-    return annotationScenarios as AnnotationScenario[]
-}
-
-export const updateAnnotationScenario = async (
-    annotationId: string,
-    annotationScenarioId: string,
-    data: Pick<AnnotationScenario, "is_pinned" | "note" | "result">,
-) => {
-    return axios.put(
-        `/api/annotations/${annotationId}/annotation_scenarios/${annotationScenarioId}`,
-        data,
-    )
-}
-
 // Comparison
 export const fetchAllComparisonResults = async (evaluationIds: string[]) => {
     const scenarioGroups = await Promise.all(evaluationIds.map(fetchAllEvaluationScenarios))
diff --git a/agenta-web/src/services/human-evaluations/api/index.ts b/agenta-web/src/services/human-evaluations/api/index.ts
index a5c98a6be..5e4a1deff 100644
--- a/agenta-web/src/services/human-evaluations/api/index.ts
+++ b/agenta-web/src/services/human-evaluations/api/index.ts
@@ -8,8 +8,10 @@ import {
     AICritiqueCreate,
 } from "@/lib/Types"
 import {
+    abTestingEvaluationTransformer,
     fromEvaluationResponseToEvaluation,
     fromEvaluationScenarioResponseToEvaluationScenario,
+    singleModelTestEvaluationTransformer,
 } from "@/lib/transformers"
 import {EvaluationFlow, EvaluationType} from "@/lib/enums"
 import {getAgentaApiUrl} from "@/lib/helpers/utils"
@@ -31,6 +33,51 @@ export const fetchAllLoadEvaluations = async (appId: string, ignoreAxiosError: b
     return response.data
 }
 
+export const fetchSingleModelEvaluationResult = async (appId: string) => {
+    const evals: Evaluation[] = (await fetchAllLoadEvaluations(appId)).map(
+        fromEvaluationResponseToEvaluation,
+    )
+    const results = await Promise.all(evals.map((e) => fetchEvaluationResults(e.id)))
+    const newEvals = results.map((result, ix) => {
+        const item = evals[ix]
+        if ([EvaluationType.single_model_test].includes(item.evaluationType)) {
+            return singleModelTestEvaluationTransformer({item, result})
+        }
+    })
+
+    const newEvalResults = newEvals
+        .filter((evaluation) => evaluation !== undefined)
+        .filter(
+            (item: any) =>
+                item.resultsData !== undefined ||
+                !(Object.keys(item.scoresData || {}).length === 0) ||
+                item.avgScore !== undefined,
+        )
+    return newEvalResults
+}
+
+export const fetchAbTestingEvaluationResult = async (appId: string) => {
+    const evals = await fetchAllLoadEvaluations(appId)
+
+    const fetchPromises = evals.map(async (item: any) => {
+        return fetchEvaluationResults(item.id)
+            .then((results) => {
+                if (item.evaluation_type === EvaluationType.human_a_b_testing) {
+                    if (Object.keys(results.votes_data).length > 0) {
+                        return abTestingEvaluationTransformer({item, results})
+                    }
+                }
+            })
+            .catch((err) => console.error(err))
+    })
+
+    const results = (await Promise.all(fetchPromises))
+        .filter((evaluation) => evaluation !== undefined)
+        .sort((a, b) => new Date(b.createdAt || 0).getTime() - new Date(a.createdAt || 0).getTime())
+
+    return results
+}
+
 export const fetchLoadEvaluation = async (evaluationId: string) => {
     return await axios
         .get(`${getAgentaApiUrl()}/api/human-evaluations/${evaluationId}/`)
diff --git a/agenta-web/src/services/testsets/api/index.ts b/agenta-web/src/services/testsets/api/index.ts
index b4806ccc1..3e9a08571 100644
--- a/agenta-web/src/services/testsets/api/index.ts
+++ b/agenta-web/src/services/testsets/api/index.ts
@@ -60,6 +60,24 @@ export const fetchTestset = async (testsetId: string | null) => {
     return response.data
 }
 
+export const uploadTestsets = async (formData: FormData) => {
+    const response = await axios.post(`${getAgentaApiUrl()}/api/testsets/upload/`, formData, {
+        headers: {
+            "Content-Type": "multipart/form-data",
+        },
+        //@ts-ignore
+        _ignoreError: true,
+    })
+    return response
+}
+
+export const importTestsetsViaEndpoint = async (formData: FormData) => {
+    const response = await axios.post(`${getAgentaApiUrl()}/api/testsets/endpoint/`, formData, {
+        headers: {"Content-Type": "multipart/form-data"},
+    })
+    return response
+}
+
 export const deleteTestsets = async (ids: string[]) => {
     const response = await axios({
         method: "delete",
diff --git a/agenta-web/src/styles/globals.css b/agenta-web/src/styles/globals.css
index 8c54cda8e..19a9ec3ef 100644
--- a/agenta-web/src/styles/globals.css
+++ b/agenta-web/src/styles/globals.css
@@ -82,3 +82,15 @@ body {
 .editable-card.ant-tabs-card > .ant-tabs-nav .ant-tabs-tab {
     transition: none;
 }
+
+.ant-input-search-button {
+    height: 30px !important;
+    padding-top: 1px;
+}
+
+/* It aligns the settings icon in the table column  */
+.ant-table-cell-fix-right-first svg {
+    display: flex;
+    justify-content: center;
+    margin: 0 auto;
+}
diff --git a/agenta-web/src/styles/tokens/antd-themeConfig.json b/agenta-web/src/styles/tokens/antd-themeConfig.json
index ca186750a..47eb48069 100644
--- a/agenta-web/src/styles/tokens/antd-themeConfig.json
+++ b/agenta-web/src/styles/tokens/antd-themeConfig.json
@@ -281,7 +281,8 @@
         "paddingContentVerticalSM": 8,
         "controlPaddingHorizontal": 12,
         "controlPaddingHorizontalSM": 8,
-
+        "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'",
+        "fontFamilyCode": "'__Inter_36bd41', '__Inter_Fallback_36bd41'",
         "fontSize": 12,
         "fontSizeLG": 14,
         "fontSizeSM": 10,
@@ -377,7 +378,8 @@
             "hoverBg": "#ffffff",
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10
+            "inputFontSizeSM": 10,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Transfer": {
             "listWidthLG": 250,
@@ -409,7 +411,8 @@
             "colorBgContainerDisabled": "rgba(5, 23, 41, 0.04)",
             "colorBgContainer": "#ffffff",
             "itemHeight": 28,
-            "headerHeight": 34
+            "headerHeight": 34,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Segmented": {
             "segmentedBgColorSelected": "#ffffff",
@@ -444,7 +447,8 @@
             "itemColor": "#586673",
             "itemActiveBg": "rgba(5, 23, 41, 0.15)",
             "trackPadding": 2,
-            "trackBg": "#ffffff"
+            "trackBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Switch": {
             "trackPadding": 2,
@@ -469,7 +473,8 @@
             "colorPrimaryHover": "#394857",
             "colorPrimaryBorder": "#d6dee6",
             "colorPrimary": "#1c2c3d",
-            "handleBg": "#ffffff"
+            "handleBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "TimePicker": {
             "timeColumnWidth": 56,
@@ -558,7 +563,8 @@
             "withoutTimeCellHeight": 66,
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10
+            "inputFontSizeSM": 10,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Timeline": {
             "itemPaddingBottom": 20,
@@ -581,7 +587,8 @@
             "colorError": "#d61010",
             "colorBgContainer": "#ffffff",
             "tailColor": "rgba(5, 23, 41, 0.06)",
-            "dotBg": "#ffffff"
+            "dotBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Tabs": {
             "horizontalItemGutter": 24,
@@ -628,7 +635,8 @@
             "cardBg": "rgba(5, 23, 41, 0.02)",
             "titleFontSize": 12,
             "titleFontSizeLG": 14,
-            "titleFontSizeSM": 12
+            "titleFontSizeSM": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Table": {
             "stickyScrollBarBorderRadius": 100,
@@ -691,7 +699,8 @@
             "headerSortHoverBg": "#f0f0f0",
             "cellFontSize": 12,
             "cellFontSizeMD": 12,
-            "cellFontSizeSM": 12
+            "cellFontSizeSM": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Steps": {
             "dotSize": 8,
@@ -741,7 +750,8 @@
             "iconSize": 28,
             "finishIconBorderColor": "#1677ff",
             "customIconFontSize": 24,
-            "iconFontSize": 12
+            "iconFontSize": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Spin": {
             "dotSizeSM": 14,
@@ -756,7 +766,8 @@
             "colorText": "#1c2c3d",
             "colorPrimary": "#1c2c3d",
             "colorBgContainer": "#ffffff",
-            "dotSizeLG": 28
+            "dotSizeLG": 28,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Slider": {
             "railSize": 4,
@@ -793,7 +804,8 @@
             "handleActiveColor": "#1c2c3d",
             "dotBorderColor": "#eaeff5",
             "dotActiveBorderColor": "#d6dee6",
-            "handleColorDisabled": "#bfbfbf"
+            "handleColorDisabled": "#bfbfbf",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Rate": {
             "rateStarSize": 20,
@@ -803,7 +815,8 @@
             "fontSize": 12,
             "controlHeightLG": 34,
             "colorText": "#1c2c3d",
-            "colorFillContent": "rgba(5, 23, 41, 0.06)"
+            "colorFillContent": "rgba(5, 23, 41, 0.06)",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Radio": {
             "radioSize": 16,
@@ -847,7 +860,8 @@
             "buttonCheckedColorDisabled": "#bdc7d1",
             "buttonCheckedBgDisabled": "rgba(5, 23, 41, 0.15)",
             "buttonCheckedBg": "#ffffff",
-            "buttonBg": "#ffffff"
+            "buttonBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Popover": {
             "titleMinWidth": 177,
@@ -865,7 +879,8 @@
             "colorTextHeading": "#1c2c3d",
             "colorText": "#1c2c3d",
             "colorSplit": "rgba(5, 23, 41, 0.06)",
-            "colorBgElevated": "#ffffff"
+            "colorBgElevated": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Notification": {
             "width": 384,
@@ -892,7 +907,8 @@
             "colorError": "#d61010",
             "colorBgElevated": "#ffffff",
             "lineHeightLG": 1.5714285714285714,
-            "lineWidthFocus": 4
+            "lineWidthFocus": 4,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Tooltip": {
             "paddingSM": 12,
@@ -905,7 +921,8 @@
             "borderRadius": 8,
             "colorTextLightSolid": "#ffffff",
             "colorText": "#1c2c3d",
-            "colorBgSpotlight": "rgba(5, 23, 41, 0.9)"
+            "colorBgSpotlight": "rgba(5, 23, 41, 0.9)",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Menu": {
             "subMenuItemBg": "rgba(0, 0, 0, 0)",
@@ -969,7 +986,8 @@
             "itemMarginInline": 4,
             "iconMarginInlineEnd": 10,
             "groupTitleFontSize": 12,
-            "groupTitleLineHeight": 1.6666666666666667
+            "groupTitleLineHeight": 1.6666666666666667,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "InputNumber": {
             "paddingInlineSM": 7,
@@ -1021,7 +1039,8 @@
             "hoverBg": "#ffffff",
             "inputFontSize": 12,
             "inputFontSizeLG": 14,
-            "inputFontSizeSM": 10
+            "inputFontSizeSM": 10,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Image": {
             "previewOperationSize": 18,
@@ -1067,7 +1086,8 @@
             "headerFontSize": 14,
             "headerFontSizeSM": 12,
             "fontHeight": 22,
-            "fontSizeLG": 14
+            "fontSizeLG": 14,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Carousel": {
             "dotWidth": 16,
@@ -1079,7 +1099,8 @@
             "controlHeightSM": 24,
             "controlHeightLG": 34,
             "colorText": "#1c2c3d",
-            "colorBgContainer": "#ffffff"
+            "colorBgContainer": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Cascader": {
             "dropdownHeight": 180,
@@ -1115,7 +1136,8 @@
             "colorBgContainer": "#ffffff",
             "optionSelectedBg": "#f5f7fa",
             "menuPadding": 4,
-            "optionSelectedFontWeight": 600
+            "optionSelectedFontWeight": 600,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Calendar": {
             "yearControlWidth": 80,
@@ -1157,7 +1179,8 @@
             "itemActiveBg": "#f5f7fa",
             "fullPanelBg": "#ffffff",
             "fullBg": "#ffffff",
-            "fontHeightSM": 20
+            "fontHeightSM": 20,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Button": {
             "paddingInlineSM": 7,
@@ -1249,7 +1272,8 @@
             "textFontSize": 10,
             "textFontSizeSM": 10,
             "fontHeight": 22,
-            "lineHeight": 1.6666666666666667
+            "lineHeight": 1.6666666666666667,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Form": {
             "screenXSMax": 575,
@@ -1285,7 +1309,8 @@
             "labelColonMarginInlineEnd": 8,
             "labelColonMarginInlineStart": 2,
             "labelHeight": 28,
-            "labelFontSize": 12
+            "labelFontSize": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Avatar": {
             "marginXXS": 4,
@@ -1308,7 +1333,8 @@
             "containerSize": 28,
             "textFontSize": 18,
             "textFontSizeLG": 20,
-            "textFontSizeSM": 12
+            "textFontSizeSM": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Tour": {
             "sizePopupArrow": 16,
@@ -1334,7 +1360,7 @@
             "closeBtnSize": 22,
             "primaryNextBtnHoverBg": "#f0f0f0",
             "primaryPrevBtnBg": "rgba(255, 255, 255, 0.15)",
-
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'",
             "lineWidthFocus": 4
         },
         "QRCode": {
@@ -1347,7 +1373,8 @@
             "borderRadiusLG": 10,
             "colorWhite": "#ffffff",
             "colorText": "#1c2c3d",
-            "colorSplit": "rgba(5, 23, 41, 0.06)"
+            "colorSplit": "rgba(5, 23, 41, 0.06)",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Upload": {
             "paddingXS": 8,
@@ -1380,7 +1407,8 @@
             "colorBgMask": "rgba(5, 23, 41, 0.45)",
             "fontHeight": 22,
             "fontHeightSM": 20,
-            "lineWidthFocus": 4
+            "lineWidthFocus": 4,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Typography": {
             "paddingSM": 12,
@@ -1411,7 +1439,8 @@
             "colorLink": "#1c2c3d",
             "colorErrorHover": "#de4040",
             "colorErrorActive": "#ab0d0d",
-            "colorError": "#d61010"
+            "colorError": "#d61010",
+            "fontFamilyCode": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "TreeSelect": {
             "paddingXS": 8,
@@ -1441,7 +1470,8 @@
             "colorBgContainer": "#ffffff",
             "titleHeight": 24,
             "nodeSelectedBg": "#f5f7fa",
-            "nodeHoverBg": "rgba(5, 23, 41, 0.04)"
+            "nodeHoverBg": "rgba(5, 23, 41, 0.04)",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Tree": {
             "paddingSM": 12,
@@ -1475,7 +1505,8 @@
             "nodeSelectedBg": "#f5f7fa",
             "nodeHoverBg": "rgba(5, 23, 41, 0.04)",
             "directoryNodeSelectedColor": "#ffffff",
-            "directoryNodeSelectedBg": "#1c2c3d"
+            "directoryNodeSelectedBg": "#1c2c3d",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Tag": {
             "paddingXXS": 4,
@@ -1507,7 +1538,8 @@
             "colorErrorBorder": "#ef9f9f",
             "colorErrorBg": "#fbe7e7",
             "defaultColor": "#1c2c3d",
-            "defaultBg": "rgba(5, 23, 41, 0.02)"
+            "defaultBg": "rgba(5, 23, 41, 0.02)",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Statistic": {
             "padding": 16,
@@ -1519,7 +1551,8 @@
             "colorTextDescription": "#758391",
             "colorText": "#1c2c3d",
             "contentFontSize": 20,
-            "titleFontSize": 12
+            "titleFontSize": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Skeleton": {
             "padding": 16,
@@ -1593,7 +1626,8 @@
             "multipleItemBg": "rgba(5, 23, 41, 0.06)",
             "clearBg": "#ffffff",
             "optionFontSize": 12,
-            "optionSelectedFontWeight": 600
+            "optionSelectedFontWeight": 600,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Result": {
             "paddingXS": 8,
@@ -1630,7 +1664,8 @@
             "colorBgContainer": "#ffffff",
             "remainingColor": "rgba(5, 23, 41, 0.06)",
             "defaultColor": "#1c2c3d",
-            "circleTextColor": "#1c2c3d"
+            "circleTextColor": "#1c2c3d",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Popconfirm": {
             "marginXXS": 4,
@@ -1690,7 +1725,8 @@
             "itemBg": "#ffffff",
             "itemActiveColorDisabled": "#bdc7d1",
             "itemActiveBgDisabled": "rgba(5, 23, 41, 0.15)",
-            "itemActiveBg": "#ffffff"
+            "itemActiveBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Modal": {
             "screenSMMax": 767,
@@ -1727,7 +1763,8 @@
             "contentBg": "#ffffff",
             "titleFontSize": 16,
             "titleLineHeight": 1.25,
-            "fontHeight": 22
+            "fontHeight": 22,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Message": {
             "paddingXS": 8,
@@ -1743,7 +1780,8 @@
             "colorSuccess": "#389e0d",
             "colorInfo": "#1c2c3d",
             "colorError": "#d61010",
-            "contentBg": "#ffffff"
+            "contentBg": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "List": {
             "screenSM": 576,
@@ -1780,7 +1818,8 @@
             "headerBg": "rgba(0, 0, 0, 0)",
             "footerBg": "rgba(0, 0, 0, 0)",
             "avatarMarginRight": 16,
-            "descriptionFontSize": 12
+            "descriptionFontSize": 12,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "FloatButton": {
             "paddingXXS": 4,
@@ -1803,7 +1842,8 @@
             "colorPrimaryHover": "#394857",
             "colorPrimary": "#1c2c3d",
             "colorFillContent": "rgba(5, 23, 41, 0.06)",
-            "colorBgElevated": "#ffffff"
+            "colorBgElevated": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Empty": {
             "colorTextDisabled": "#bdc7d1",
@@ -1839,7 +1879,8 @@
             "colorPrimary": "#1c2c3d",
             "colorError": "#d61010",
             "colorBgElevated": "#ffffff",
-            "paddingBlock": 5
+            "paddingBlock": 5,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Drawer": {
             "paddingXS": 8,
@@ -1868,7 +1909,8 @@
             "colorTextHeading": "#1c2c3d",
             "colorText": "#1c2c3d",
             "colorSplit": "rgba(5, 23, 41, 0.06)",
-            "lineHeight": 1.6666666666666667
+            "lineHeight": 1.6666666666666667,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Descriptions": {
             "paddingXS": 8,
@@ -1893,7 +1935,8 @@
             "colorFillAlter": "rgba(5, 23, 41, 0.02)",
             "titleColor": "#1c2c3d",
             "labelBg": "rgba(5, 23, 41, 0.02)",
-            "contentColor": "#1c2c3d"
+            "contentColor": "#1c2c3d",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "DatePicker": {
             "sizePopupArrow": 16,
@@ -1993,7 +2036,8 @@
             "inputFontSizeLG": 14,
             "inputFontSizeSM": 10,
             "fontHeight": 22,
-            "fontHeightLG": 24
+            "fontHeightLG": 24,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Collapse": {
             "paddingXXS": 4,
@@ -2018,7 +2062,8 @@
             "contentBg": "#ffffff",
             "fontHeight": 22,
             "fontHeightLG": 24,
-            "lineHeightLG": 1.5714285714285714
+            "lineHeightLG": 1.5714285714285714,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Checkbox": {
             "paddingXS": 8,
@@ -2039,7 +2084,8 @@
             "colorPrimary": "#1c2c3d",
             "colorBorder": "#bdc7d1",
             "colorBgContainerDisabled": "rgba(5, 23, 41, 0.04)",
-            "colorBgContainer": "#ffffff"
+            "colorBgContainer": "#ffffff",
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Breadcrumb": {
             "paddingXXS": 4,
@@ -2059,7 +2105,8 @@
             "itemColor": "#758391",
             "separatorMargin": 8,
             "iconFontSize": 12,
-            "fontHeight": 22
+            "fontHeight": 22,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Anchor": {
             "paddingXXS": 4,
@@ -2071,7 +2118,8 @@
             "colorSplit": "rgba(5, 23, 41, 0.06)",
             "colorPrimary": "#1c2c3d",
             "linkPaddingInlineStart": 16,
-            "linkPaddingBlock": 4
+            "linkPaddingBlock": 4,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Alert": {
             "paddingMD": 20,
@@ -2102,7 +2150,8 @@
             "colorErrorBorder": "#ef9f9f",
             "colorErrorBg": "#fbe7e7",
             "colorError": "#d61010",
-            "withDescriptionIconSize": 20
+            "withDescriptionIconSize": 20,
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'"
         },
         "Space": {
             "paddingXS": 8,
@@ -2153,7 +2202,7 @@
             "optionActiveBg": "rgba(0, 0, 0, 0.04)",
             "optionHeight": 32,
             "optionFontSize": 12,
-
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'",
             "fontSize": 12,
             "fontSizeIcon": 12,
             "fontSizeLG": 14,
@@ -2180,6 +2229,7 @@
             "fontSizeXL": 18
         },
         "Mentions": {
+            "fontFamily": "'__Inter_36bd41', '__Inter_Fallback_36bd41'",
             "fontSize": 12,
             "lineHeight": 1.6666666666666667
         }
diff --git a/agenta-web/tailwind.config.ts b/agenta-web/tailwind.config.ts
index 9c78f09dd..755e4924b 100644
--- a/agenta-web/tailwind.config.ts
+++ b/agenta-web/tailwind.config.ts
@@ -14,6 +14,9 @@ const config: Config = {
         transparent: "transparent",
         current: "currentColor",
         extend: {
+            fontFamily: {
+                sans: ["var(--font-inter)"],
+            },
             colors: {
                 // light mode
                 tremor: {
diff --git a/docs/docs/prompt_management/01-concepts.mdx b/docs/docs/prompt_management/01-concepts.mdx
new file mode 100644
index 000000000..e482a1a43
--- /dev/null
+++ b/docs/docs/prompt_management/01-concepts.mdx
@@ -0,0 +1,102 @@
+---
+title: "Core Concepts"
+---
+
+# Core Concepts
+
+This page describes the main concepts used in prompt management in **agenta**.
+
+## Prompt and Configuration Management
+
+### What Is Prompt Management?
+
+Building LLM-powered applications is an iterative process. In each iteration, you aim to improve the application's performance by refining prompts, adjusting configurations, and evaluating outputs.
+
+<img
+  style={{ width: "75%", display: "block", margin: "0 auto" }}
+  src="/images/prompt_management/llm_lifecycle.png"
+/>
+
+A prompt management system provides you the tools to do this process systematically by:
+
+- **Versioning Prompts**: Keeping track of different prompts you've tested.
+- **Linking Prompt Variants to Experiments**: Connecting each prompt variant to its evaluation metrics to understand the effect of changes and determine the best variant.
+- **Publishing Prompts**: Providing a way to publish the best prompt variants to production and maintain a history of changes in production systems.
+- **Associating Prompts with Traces**: Monitoring how changes in prompts affect production metrics.
+
+### Why Do I Need a Prompt Management System?
+
+A prompt management system enables everyone on the team—from product owners to subject matter experts—to collaborate in creating prompts. Additionally it helps you answer the following questions:
+
+- Which prompts have we tried?
+- What were the outputs of these prompts?
+- How do the evaluation results of these prompts compare?
+- Which prompt was used for a specific generation in production?
+- What was the effect of publishing the new version of this prompt in production?
+- Who on the team made changes to a particular prompt in production?
+
+### What Is the Difference Between Prompt and Configuration Management?
+
+Agenta goes beyond prompt management to encompass the entire configuration of your LLM applications.
+
+**Prompts** are a special case of a **configuration**. A **prompt** includes the prompt template, the model, and the model parameters. However, a **configuration** of an LLM application can include additional parameters.
+For instance, an LLM application using a chain of two prompts would have a configuration that includes the two prompts and their respective model parameters. Similarly, an application that includes a RAG pipeline would have a configuration that includes parameters such as `top_k` and `embedding`.
+
+Agenta enables you to version the entire `configuration` of the LLM app as a unit. This makes sense since there is a dependency between the parts of the configuration. For instance in a chain of two prompts, the changes of the first depend on the changes of the second. Therefore you need to version them together to ensure consistency and traceability.
+
+## Taxonomy of Terms and Concepts in agenta
+
+Below are the description to the main terms and concepts used in agenta.
+
+<img
+  style={{ width: "75%", display: "block", margin: "0 auto" }}
+  src="/images/prompt_management/taxonomy_agenta.png"
+/>
+
+### Templates
+
+**Templates** are the workflows used by LLM-powered applications. Agenta comes with two default templates:
+
+- **Completion Application Template:** For single-prompt applications that generate text completions.
+- **Chat Application Template:** For applications that handle conversational interactions.
+
+Agenta also allows you to create custom templates for your workflows using our SDK. Examples include:
+
+- Retrieval-Augmented Generation (RAG) Applications
+- Chains of Multiple Prompts
+- Agents Interacting with External APIs
+
+After creating a template, you can interact with it in the playground, run no-code evaluations, and publish versions all from the webUI.
+
+### Applications
+
+An **application** uses a **template** to solve a specific use case. For instance, an **application** could use the single-prompt **template** for tasks like:
+
+- **Tweet Generation:** Crafting engaging tweets based on input topics.
+- **Article Summarization:** Condensing long articles into key points.
+
+### Variants
+
+Within each application, you can create **variants**. **Variants** are different configurations of the application, allowing you to experiment with and compare multiple approaches. For example, for the "tweet generation" application, you might create **variants** that:
+
+- Use different prompt phrasings.
+- Adjust model parameters like temperature or maximum tokens.
+- Incorporate different styles or tones (e.g., professional vs. casual).
+
+### Versions
+
+Every **variant** is **versioned** and immutable. When you make changes to a **variant**, a new **version** is created. Each **version** has a **commit id** that uniquely identifies it.
+
+### Endpoints
+
+**Endpoints** are the interfaces where your published variants are accessible. You can publish a **version** of a **variant** to an **endpoint**. Each **endpoint** has a user-defined environment name (e.g. development, staging, production) that specifies its context or stage.
+
+You can then integrate the **endpoint** into your codebase to fetch the configuration published on that **endpoint**. Additionally, you can directly call the **endpoint** containing the application running with that configuration.
+
+By default, applications come with three predefined environment names for **endpoints**:
+
+- **Development:** For initial testing and experimentation.
+- **Staging:** For pre-production testing and quality assurance.
+- **Production:** For live use with real users.
+
+When publishing a **variant** to an **endpoint**, the latest **version** of that **variant** gets published. Each **endpoint** points to a specific **version** of a **variant** (a certain **commit**). Updating the **variant** after publishing does not automatically update the **endpoint**.
diff --git a/docs/docs/prompt_management/setting_up/creating_an_app.mdx b/docs/docs/prompt_management/02-creating-a-prompt.mdx
similarity index 63%
rename from docs/docs/prompt_management/setting_up/creating_an_app.mdx
rename to docs/docs/prompt_management/02-creating-a-prompt.mdx
index bf82cc359..5b0ae6e38 100644
--- a/docs/docs/prompt_management/setting_up/creating_an_app.mdx
+++ b/docs/docs/prompt_management/02-creating-a-prompt.mdx
@@ -1,22 +1,34 @@
 ---
-title: 'Creating an LLM App'
+title: "Creating a Prompt"
 ---
 
 :::note
-You can create applications in Agenta either from the web interface or from code. This guide will focus on creating an application using a template from the UI. You can read more about creating a custom application using code  [here](/prompt_management/setting_up/custom_applications)
+You can create applications in Agenta either from the web interface or from code. This guide will focus on creating an application using a template from the UI. You can read more about creating a custom application using code [here](/prompt_management/setting_up/custom_applications)
 :::
 
 ## Step-by-step Guide
 
 1. **Navigate to the main page**: This is where you can create a new application.
 
-<img className="dark:hidden" src="/images/basic_guides/00_main_page_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/00_main_page_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/00_main_page_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/00_main_page_dark.png"
+/>
 
 2. **Choose a template**: Currently, we offer templates for single prompt applications and chat applications.
 
-<img className="dark:hidden" src="/images/basic_guides/01_choose_template_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/01_choose_template_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/01_choose_template_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/01_choose_template_dark.png"
+/>
 
 ### Single Prompt Application
 
@@ -30,15 +42,28 @@ This template is based on the OpenAI specification and uses both the system prom
 
 While you could write the same application using only the user prompt or the system prompt, it is best to experiment with both approaches. Usually, the system-prompt is typically used for high-level instruction.
 
-<img className="dark:hidden" src="/images/basic_guides/02_single_prompt_playground_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/02_single_prompt_playground_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/02_single_prompt_playground_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/02_single_prompt_playground_dark.png"
+/>
 
 ### Chat Application
+
 Like the single prompt application, the chat application is based on the OpenAI specification and uses both the system prompt and user prompt, but it is designed for multi-turn applications like chatbots.
 
-<img className="dark:hidden" src="/images/basic_guides/03_chat_app_playground_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/03_chat_app_playground_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/03_chat_app_playground_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/03_chat_app_playground_dark.png"
+/>
 
 ## Next steps
 
-Now that you've created an application, you can learn how to do [prompt engineering in the playground](/prompt_management/prompt_engineering).
\ No newline at end of file
+Now that you've created an application, you can learn how to do [prompt engineering in the playground](/prompt_management/prompt_engineering).
diff --git a/docs/docs/prompt_management/setting_up/custom_applications.mdx b/docs/docs/prompt_management/03-creating-a-custom-template.mdx
similarity index 98%
rename from docs/docs/prompt_management/setting_up/custom_applications.mdx
rename to docs/docs/prompt_management/03-creating-a-custom-template.mdx
index 244e948a4..a95573f73 100644
--- a/docs/docs/prompt_management/setting_up/custom_applications.mdx
+++ b/docs/docs/prompt_management/03-creating-a-custom-template.mdx
@@ -1,9 +1,9 @@
 ---
-title: "Creating Custom Applications"
+title: "Creating Custom Template"
 description: "Learn how to use your custom application with Agenta"
 ---
 
-Agenta comes with several pre-built template LLM applications for common use cases, such as single prompt and chatbot. However, you can also create your own custom application with Agenta. This could be a RAG application, a custom agent, a chain of prompts, or any custom logic.
+Agenta comes with several pre-built template LLM applications for common use cases, such as single prompt and chatbot. However, you can also create your own custom application with Agenta. This could be a **RAG application**, a custom agent, a chain of prompts, or any custom logic.
 
 This guide will show you how to create a custom application and use it with Agenta.
 
diff --git a/docs/docs/prompt_management/prompt_engineering.mdx b/docs/docs/prompt_management/04-using-the-playground.mdx
similarity index 70%
rename from docs/docs/prompt_management/prompt_engineering.mdx
rename to docs/docs/prompt_management/04-using-the-playground.mdx
index 68b0f795a..3fd10ef8a 100644
--- a/docs/docs/prompt_management/prompt_engineering.mdx
+++ b/docs/docs/prompt_management/04-using-the-playground.mdx
@@ -1,15 +1,15 @@
 ---
-title: 'Prompt Engineering'
-description: 'Using the playground for prompt engineering.'
+title: "Using the Playground"
 ---
 
 The agenta playground is a platform that lets you create, modify, and compare different prompts and configurations for your LLM application.
 
-
 ## Prompt Templates
+
 An LLM takes a prompt and returns a completion. To make the prompt reusable, we need to be able to dynamically modify it based on a set of inputs. A prompt template is a prompt that can be dynamically modified based on a set of inputs.
 
 For instance, a prompt to create a blog post might look like this:
+
 ```
 Write a blog post about {subject}
 ```
@@ -22,8 +22,14 @@ The LLM app templates in agenta use the f-string templating language, with promp
 
 To add new inputs to the LLM app, navigate to the playground and modify the inputs under "Modify Parameters."
 
-<img className="dark:hidden" src="/images/basic_guides/08_add_new_input_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/08_add_new_input_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/08_add_new_input_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/08_add_new_input_dark.png"
+/>
 
 After adding an input, incorporate it into the prompt templates using the curly bracket syntax.
 
@@ -31,8 +37,14 @@ After adding an input, incorporate it into the prompt templates using the curly
 
 You can create a new variant of an application by clicking on "Add Variant" in the "Side-by-side" view or the "+" tab in the "Tab view".
 
-<img className="dark:hidden" src="/images/basic_guides/09_add_variant_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/09_add_variant_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/09_add_variant_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/09_add_variant_dark.png"
+/>
 
 You'll then see a window where you can select the source variant to use as a template and provide a new name for your variant.
 
@@ -44,12 +56,24 @@ Fill in the inputs in the cards and click "Run" to test a variant dynamically.
 
 You can also load a test set to populate the playground with a set of inputs. Then You can click Run all to run all the inputs in the test set.
 
-<img className="dark:hidden" src="/images/basic_guides/10_testing_a_variant_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/10_testing_a_variant_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/10_testing_a_variant_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/10_testing_a_variant_dark.png"
+/>
 
 ## Comparing variants side by side
 
 Click on the "Side-by-side" tab to compare variants. From the dropdowns, select the variants you wish to compare. This allows you to view the results of multiple variants simultaneously. When using a chat application, you can interact with different variants in parallel.
 
-<img className="dark:hidden" src="/images/basic_guides/11_side_by_side_chat_mode_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/11_side_by_side_chat_mode_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/11_side_by_side_chat_mode_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/11_side_by_side_chat_mode_dark.png"
+/>
diff --git a/docs/docs/prompt_management/deployment.mdx b/docs/docs/prompt_management/05-publishing-changes.mdx
similarity index 56%
rename from docs/docs/prompt_management/deployment.mdx
rename to docs/docs/prompt_management/05-publishing-changes.mdx
index 1630ec93b..744b90938 100644
--- a/docs/docs/prompt_management/deployment.mdx
+++ b/docs/docs/prompt_management/05-publishing-changes.mdx
@@ -1,12 +1,11 @@
 ---
-title: 'Deployment'
-description: 'Learn how to integrate Agenta with your application'
+title: "Publishing Changes"
+description: "Learn how to integrate Agenta with your application"
 ---
 
 After using the playground to find a good configuration for your application, it's time to deploy the application.
 By deploying the application, you can integrate it with short code snippet in your applicatoin. You can later change the configuration from the UI without having to update the code.
 
-
 Agenta provides a way to deploy an application to multiple environments: development, staging, and production. Each environment has its own unique configuration.
 
 ## Deploying an application to an environment:
@@ -14,13 +13,25 @@ Agenta provides a way to deploy an application to multiple environments: develop
 1. Navigate to the playground for the variant you want to deploy.
 2. Click on 'Publish'. Make sure the variant is saved before deployment.
 
-<img className="dark:hidden" src="/images/basic_guides/04_save_before_deployment_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/04_save_before_deployment_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/04_save_before_deployment_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/04_save_before_deployment_dark.png"
+/>
 
 3. Select the environment and click on "Publish"
 
-<img className="dark:hidden" src="/images/basic_guides/05_select_environment_to_publish_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/05_select_environment_to_publish_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/05_select_environment_to_publish_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/05_select_environment_to_publish_dark.png"
+/>
 
 The application is now deployed to the chosen environment and is accessible as an API endpoint.
 
@@ -29,12 +40,24 @@ The application is now deployed to the chosen environment and is accessible as a
 1. Go to the 'API endpoint' section.
 2. Choose the environment where the application was deployed.
 
-<img className="dark:hidden" src="/images/basic_guides/06_choose_evironment_to_deploy_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/06_choose_evironment_to_deploy_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/06_choose_evironment_to_deploy_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/06_choose_evironment_to_deploy_dark.png"
+/>
 
 3. Here, you'll find the application code in various languages, which can be embedded in your application code.
 
-<img className="dark:hidden" src="/images/basic_guides/07_select_app_code_light.png" />
-<img className="hidden dark:block" src="/images/basic_guides/07_select_app_code_dark.png" />
+<img
+  className="dark:hidden"
+  src="/images/basic_guides/07_select_app_code_light.png"
+/>
+<img
+  className="hidden dark:block"
+  src="/images/basic_guides/07_select_app_code_dark.png"
+/>
 
-Additionally, you can use the configuration in your application code without using Agenta deployment. This can be achieved by using the Python SDK or the REST API. More information can be found on the page 'Integrating Agenta with Your Application'.`
\ No newline at end of file
+Additionally, you can use the configuration in your application code without using Agenta deployment. This can be achieved by using the Python SDK or the REST API. More information can be found on the page 'Integrating Agenta with Your Application'.`
diff --git a/docs/docs/prompt_management/integrating.mdx b/docs/docs/prompt_management/06-integrating-with-agenta.mdx
similarity index 100%
rename from docs/docs/prompt_management/integrating.mdx
rename to docs/docs/prompt_management/06-integrating-with-agenta.mdx
diff --git a/docs/docs/prompt_management/setting_up/_category_.json b/docs/docs/prompt_management/setting_up/_category_.json
deleted file mode 100644
index 448647410..000000000
--- a/docs/docs/prompt_management/setting_up/_category_.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "position": 1,
-  "label": "Setting Up"
-}
diff --git a/docs/docs/prompt_management/setting_up/using_agenta_from_cli.mdx b/docs/docs/prompt_management/setting_up/using_agenta_from_cli.mdx
deleted file mode 100644
index bc0f754be..000000000
--- a/docs/docs/prompt_management/setting_up/using_agenta_from_cli.mdx
+++ /dev/null
@@ -1,52 +0,0 @@
----
-title: "Using Agenta from CLI"
-description: "Create, experiment, and evaluate your applications all from the CLI"
----
-
-Agenta was designed for use both from the CLI and from the web interface. This guide explains the basics of using Agenta from the CLI. For more details, refer to the [CLI developer guide](/reference/cli/quick-usage).
-
-## Installation
-
-The agenta CLI can be easily installed through pip:
-
-```bash
-pip install -U agenta
-```
-
-## Creating an application
-
-1. Create a new project
-
-To create an application in Agenta, first, initialize an empty project. Run the following command in <span title="If you would like to start somewhere you can clone https://github.com/Agenta-AI/simple_prompt">the folder containing your application code</span>:
-
-```bash
-agenta init
-```
-
-This will prompt you for the project name, the Agenta host, and the API key (if using the cloud or enterprise version).
-
-Running `agenta init` creates a blank project in Agenta and generates a config.toml file in that folder, which contains all the information about your project.
-
-2. Serve the first app variant
-   With the project created, we need to add the first <span title="An app variant is one 'version' of an app. It can be created by modifying the code, or the configuration of an existing code base">app variant</span> to it.
-   This can be done by running the following command:
-   `bash
- agenta variant serve <filename.py>
- `
-
-This will create a new app variant in Agenta under the name filename.default. Here, filename is the name of the codebase containing the app logic, while default is a default configuration created for that codebase. Each new app variant created from the web interface or from the CLI will always have the name format `<codebase_name>.<configuration_name>`.
-
-    Running this comand will [create a container for the application](/guides/how_does_agenta_work) with a REST API endpoint. This endpoint is what is used by the agenta web interface to communicate with the application.
-
-The CLI will also display the URL of the endpoint, which can be used to test the application.
-
-## Adding a multiple variants for one application
-
-You can add multiple app variants to a single application using the CLI. This is useful if you want to test different workflows for the same application (i.e. single prompt vs. chain of prompts). To do this, create a new python file containing the new logic, then execute the following command:
-
-```bash
-agenta variant serve <new_filename.py>
-
-```
-
-In the UI, you'll find the new variant under the same application, labeled `<new_filename>.default`. You can modify the configuration for this variant in the playground and create numerous variants based on it.
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index da050c745..d33132f4e 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -16,7 +16,7 @@ const sidebars: SidebarsConfig = {
       items: [{ type: "autogenerated", dirName: "getting_started" }],
     },
     {
-      label: "Prompt Management & Engineering",
+      label: "Prompt Management",
       ...CATEGORY_UTILITIES,
       items: [{ type: "autogenerated", dirName: "prompt_management" }],
     },
diff --git a/docs/static/images/prompt_management/llm_lifecycle.png b/docs/static/images/prompt_management/llm_lifecycle.png
new file mode 100644
index 000000000..0316057bb
Binary files /dev/null and b/docs/static/images/prompt_management/llm_lifecycle.png differ
diff --git a/docs/static/images/prompt_management/taxonomy_agenta.png b/docs/static/images/prompt_management/taxonomy_agenta.png
new file mode 100644
index 000000000..c9f3793a7
Binary files /dev/null and b/docs/static/images/prompt_management/taxonomy_agenta.png differ