evidentlyai · Sep 6, 2024 · Sep 7, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 10, 2024
Showing 563 changed files with 26,228 additions and 9,123 deletions.
diff --git a/.github/share-actions/ui-node-pnpm-install/action.yml b/.github/share-actions/ui-node-pnpm-install/action.yml
@@ -1,4 +1,10 @@
 name: UI install
+inputs:
+  args:
+    type: string
+    description: ""
+    required: false
+    default: ""
 runs:
   using: "composite"
   steps:
@@ -15,5 +21,5 @@ runs:
 
     - name: 📥 Install node dependencies
       working-directory: ui
-      run: pnpm i --frozen-lockfile --ignore-scripts
+      run: pnpm i --frozen-lockfile --ignore-scripts ${{ inputs.args }}
       shell: bash
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
@@ -42,8 +42,10 @@ jobs:
         if: matrix.minimal
         run: pip install -r requirements.min.txt
       - name: Prepare examples dependencies
-        run: pip install catboost sentence-transformers
+        run: pip install catboost sentence-transformers openai
       - name: Export examples
         run: jupyter nbconvert --to python examples/*/*.ipynb --output-dir example_scripts
       - name: Run examples
         run: python example_test.py
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY_GIT_DEV }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -98,6 +98,8 @@ jobs:
       - changed_files
       - prepare-cache-data
     if: ${{ github.event.pull_request.draft == false && needs.changed_files.outputs.evidently_any_modified == 'true' }}
+    env:
+      EVIDENTLY_TEST_ENVIRONMENT: 1
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -115,22 +117,22 @@ jobs:
       - name: Install minimal dependencies
         run: pip install -r requirements.min.txt
       - name: Install package
-        run: pip install -e .[dev,spark,fsspec]
-      - name: Run pip-audit
-        run: pip-audit --ignore-vuln PYSEC-2024-48 --ignore-vuln GHSA-jw8x-6495-233v --ignore-vuln GHSA-4hq2-rpgc-r8r7
+        run: pip install -e .[dev,spark,fsspec,llm]
       - name: Run Tests
         run: python -m pytest --durations=50
   test:
     # The type of runner that the job will run on
     name: Test ${{ matrix.os }} with py${{ matrix.python }}
+    env:
+      EVIDENTLY_TEST_ENVIRONMENT: 1
     needs:
       - linter
       - prepare-cache-data
     runs-on: ${{ matrix.os }}
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        os: [ubuntu-22.04, windows-2022, macos-13]
+        os: [ubuntu-22.04, windows-2022, macos-14]
         python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         exclude:
           - os: windows-latest
@@ -141,7 +143,6 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
-          architecture: "x64"
           cache: "pip"
           cache-dependency-path: setup.py
       - uses: actions/cache@v4
@@ -155,7 +156,7 @@ jobs:
         uses: ./.github/share-actions/get-bikes-dataset-cached
 
       - name: Install package
-        run: pip install -e .[dev,spark,fsspec]
+        run: pip install -e .[dev,spark,fsspec,llm]
       - name: Run Tests
         run: python -m pytest --durations=50
 
@@ -173,7 +174,7 @@ jobs:
           cache: "pip"
           cache-dependency-path: setup.py
       - name: Install dependencies
-        run: pip install -e ".[dev]"
+        run: pip install -e .
       - name: Install wheel
         run: pip install wheel
       - name: Build package

diff --git a/.github/workflows/ui.yml b/.github/workflows/ui.yml
@@ -57,8 +57,8 @@ jobs:
           echo "One or more evidently_python file(s) has changed."
           echo "List all the files that have changed: ${{ steps.changed-files.outputs.evidently_python_all_changed_and_modified_files }}"
 
-  ui-type-check:
-    name: UI type-check
+  ui-code-check:
+    name: UI code-check
     runs-on: ubuntu-22.04
     needs: changed_files
     if: ${{ github.event.pull_request.draft == false && (needs.changed_files.outputs.ui_any_modified == 'true' || needs.changed_files.outputs.evidently_python_any_modified == 'true') }}
@@ -77,6 +77,10 @@ jobs:
         working-directory: ui
         run: pnpm type-check
 
+      - name: 🔬 Check code quality
+        working-directory: ui
+        run: pnpm code-check
+
   ui-deps-analyze:
     name: UI deps analyze
     runs-on: ubuntu-22.04
@@ -147,26 +151,21 @@ jobs:
         uses: ./.github/share-actions/ui-node-pnpm-install
 
       - name: Install Playwright Browsers
-        working-directory: ui
-        run: pnpm dlx playwright@1.43.0 install --with-deps
+        working-directory: ui/service
+        run: pnpm exec playwright install --with-deps chromium
 
-      - uses: actions/cache@v3
-        id: cache-bikes-dataset
-        env:
-          cache-name: cache-bikes-dataset
-        with:
-          path: Bike-Sharing-Dataset.zip
-          key: cache-bikes-dataset
-      - name: Download test data
-        if: ${{ steps.cache-bikes-dataset.outputs.cache-hit != 'true' }}
-        run: curl -k https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip -o Bike-Sharing-Dataset.zip
+      - name: 🔍 Get bikes dataset cached
+        uses: ./.github/share-actions/get-bikes-dataset-cached
 
       - name: Run UI
-        run: EXPERIMENTAL_DETERMINISTIC_UUID="true" evidently ui --port 8000 --workspace workspace-for-visual-testing --demo-projects all &
+        env:
+          EXPERIMENTAL_DETERMINISTIC_UUID: "true"
+          EVIDENTLY_TEST_ENVIRONMENT: 1
+        run: evidently ui --port 8000 --workspace workspace-for-visual-testing --demo-projects all &
 
       - name: Wait UI to be ready to test
         working-directory: ui/service
-        run: pnpm wait-on tcp:127.0.0.1:8000 -t 200000
+        run: pnpm wait-on tcp:127.0.0.1:8000 -t 6m
 
       - name: Run Service Playwright tests
         working-directory: ui/service

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,8 @@ repos:
         args: [--exit-non-zero-on-fix, --fix]
       - id: ruff-format
 
-#  - repo: https://github.com/pre-commit/mirrors-prettier
-#    rev: "v3.0.3"
-#    hooks:
-#      - id: prettier
+  - repo: https://github.com/biomejs/pre-commit
+    rev: "v0.4.0"
+    hooks:
+      - id: biome-check
+        additional_dependencies: ["@biomejs/biome@1.8.3"]
diff --git a/biome.jsonc b/biome.jsonc
@@ -0,0 +1,53 @@
+{
+  "files": {
+    // `endpoints.d.ts` is autogenerated
+    // `JsonParser.ts` copied from https://github.com/douglascrockford/JSON-js
+    "ignore": [
+      "ui/packages/evidently-ui-lib/src/**/JsonParser.ts",
+      "ui/packages/evidently-ui-lib/src/**/endpoints.d.ts",
+      "ui/packages/evidently-ui-lib/.tsc-dts/**"
+    ],
+    "include": ["ui/*/src/**", "ui/packages/*/src/**"]
+  },
+  "formatter": {
+    "enabled": true,
+    "formatWithErrors": false,
+    "indentStyle": "space",
+    "indentWidth": 2,
+    "lineEnding": "lf",
+    "lineWidth": 100,
+    "attributePosition": "auto"
+  },
+  "css": {
+    "linter": {
+      "enabled": true
+    },
+    "formatter": {
+      "enabled": true
+    }
+  },
+  "javascript": {
+    "formatter": {
+      "arrowParentheses": "always",
+      "bracketSameLine": false,
+      "bracketSpacing": true,
+      "jsxQuoteStyle": "single",
+      "quoteStyle": "single",
+      "quoteProperties": "asNeeded",
+      "semicolons": "asNeeded",
+      "trailingCommas": "none"
+    }
+  },
+  "json": {
+    "formatter": {
+      "trailingCommas": "none"
+    }
+  },
+  "organizeImports": { "enabled": true },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true
+    }
+  }
+}
diff --git a/docs/book/README.md b/docs/book/README.md
@@ -1,3 +1,8 @@
+
+{% hint style="info" %}
+**You are looking at the old Evidently documentation**: Check the newer version [here](https://docs.evidentlyai.com/introduction).
+{% endhint %}
+
 Evidently helps evaluate, test, and monitor data and ML-powered systems.
 * Predictive tasks: classification, regression, ranking, recommendations.
 * Generative tasks: chatbots, RAGs, Q&A, summarization.

diff --git a/docs/book/SUMMARY.md b/docs/book/SUMMARY.md
@@ -1,5 +1,6 @@
 # Table of contents
 
+* [New DOCS](https://docs.evidentlyai.com/)
 * [What is Evidently?](README.md)
 * [Get Started](get-started/README.md)
   * [Evidently Cloud](get-started/quickstart-cloud.md)
@@ -18,7 +19,7 @@
   * [Regression Performance](presets/reg-performance.md)
   * [Classification Performance](presets/class-performance.md)
   * [NoTargetPerformance](presets/no-target-performance.md)
-  * [Text Overview](presets/text-overview.md)
+  * [Text Evals](presets/text-overview.md)
   * [Recommender System](presets/recsys.md)
 * [Tutorials and Examples](examples/README.md)
   * [All Tutorials](examples/examples.md)
@@ -80,11 +81,12 @@
   * [Feature importance in data drift](customization/feature-importance.md)
   * [Text evals with LLM-as-judge](customization/llm_as_a_judge.md)
   * [Text evals with HuggingFace](customization/huggingface_descriptor.md)
+  * [Add a custom text descriptor](customization/add-custom-descriptor.md)
+  * [Add a custom drift method](customization/add-custom-drift-method.md)
+  * [Add a custom Metric or Test](customization/add-custom-metric-or-test.md)
   * [Customize JSON output](customization/json-dict-output.md)
   * [Show raw data in Reports](customization/report-data-aggregation.md)
   * [Add text comments to Reports](customization/text-comments.md)
-  * [Add a custom drift method](customization/add-custom-drift-method.md)
-  * [Add a custom Metric or Test](customization/add-custom-metric-or-test.md)
   * [Change color schema](customization/options-for-color-schema.md)
 * [How-to guides](how-to-guides/README.md)
 

diff --git a/docs/book/api-reference/evidently.calculations.md b/docs/book/api-reference/evidently.calculations.md
@@ -277,7 +277,7 @@ Update dataset by predictions type:
 
     set predicted_labels column by threshold
 
-- (multy label classification) if predictions is a list and its length is greater than 2
+- (multi label classification) if predictions is a list and its length is greater than 2
 
     set predicted_labels from probability values in columns by prediction column
 

diff --git a/docs/book/customization/add-custom-descriptor.md b/docs/book/customization/add-custom-descriptor.md
@@ -0,0 +1,110 @@
+---
+description: How to add custom text descriptors.
+---
+
+You can implement custom row-level evaluations for text data that you will later use just like any other descriptor across Metrics and Tests. You can implement descriptors that use a single column or two columns.
+
+Note that if you want to use LLM-based evaluations, you can write custom prompts using [LLM judge templates](llm_as_a_judge.md). 
+
+# Code example
+
+Refer to a How-to example:
+
+{% embed url="https://github.com/evidentlyai/evidently/blob/main/examples/how_to_questions/how_to_use_llm_judge_template.ipynb" %}
+
+# Custom descriptors
+
+Imports: 
+
+```python
+from evidently.descriptors import CustomColumnEval, CustomPairColumnEval
+```
+
+## Single column descriptor 
+
+You can create a custom descriptor that will take a single column from your dataset and run a certain evaluation for each row.
+
+**Implement your evaluation as a Python function**. It will take a pandas Series as input and return a transformed Series. 
+
+Here, the `is_empty_string_callable` function takes a column of strings and returns an "EMPTY" or "NON EMPTY" outcome for each.
+
+```python
+def is_empty_string_callable(val1):
+    return pd.Series(["EMPTY" if val == "" else "NON EMPTY" for val in val1], index=val1.index)
+```
+
+**Create a custom descriptor**. Create an example of `CustomColumnEval` class to wrap the evaluation logic into an object that you can later use to process specific dataset input.
+
+```python
+empty_string = CustomColumnEval(
+    func=is_empty_string_callable,
+    feature_type="cat",
+    display_name="Empty response"
+)
+```
+
+Where:
+* `func: Callable[[pd.Series], pd.Series]` is a function that returns a transformed pandas Series.
+* `display_name: str` is the new descriptor's name that will appear in Reports and Test Suites.
+* `feature_type` is the type of descriptor that the function returns (`cat` for categorical, `num` for numerical)
+
+**Apply the new descriptor**. To create a Report with a new Descriptor, pass it as a `column_name` to the `ColumnSummaryMetric`. This will compute the new descriptor for all rows in the specified column and summarize its distribution:
+
+```python
+report = Report(metrics=[
+    ColumnSummaryMetric(column_name=empty_string.on("response")),
+])
+```
+
+Run the Report on your `df` dataframe as usual:
+
+```python
+report.run(reference_data=None, 
+           current_data=df)
+```
+
+## Double column descriptor
+
+You can create a custom descriptor that will take two columns from your dataset and will run a certain evaluation for each row. (For example, for pairwise evaluators).
+
+**Implement your evaluation as a Python function**. Here, the `exact_match_callable` function takes two columns and checks whether each pair of values is the same, returning "MATCH" if they are equal and "MISMATCH" if they are not.
+
+```python
+def exact_match_callable(val1, val2):
+    return pd.Series(["MATCH" if val else "MISMATCH" for val in val1 == val2])
+```
+
+**Create a custom descriptor**. Create an example of the `CustomPairColumnEval` class to wrap the evaluation logic into an object that you can later use to process two named columns in a dataset.
+
+```python
+exact_match =  CustomPairColumnEval(
+    func=exact_match_callable,
+    first_column="response",
+    second_column="question",
+    feature_type="cat",
+    display_name="Exact match between response and question"
+)
+```
+
+Where:
+
+* `func: Callable[[pd.Series, pd.Series], pd.Series]` is a function that returns a transformed pandas Series after evaluating two columns.
+* `first_column: str` is the name of the first column to be passed into the function.
+* `second_column: str` is the name of the second column to be passed into the function.
+* `display_name: str` is the new descriptor's name that will appear in Reports and Test Suites.
+* `feature_type` is the type of descriptor that the function returns (`cat` for categorical, `num` for numerical).
+
+**Apply the new descriptor**. To create a Report with a new Descriptor, pass it as a `column_name` to the ColumnSummaryMetric. This will compute the new descriptor for all rows in the dataset and summarize its distribution:
+
+```python
+report = Report(metrics=[
+    ColumnSummaryMetric(column_name=exact_match.as_column())
+])
+```
+
+Run the Report on your `df` dataframe as usual:
+
+```python
+report.run(reference_data=None, 
+           current_data=df)
+```