Merge branch 'develop' into feat/shortcuts-improvements

argilla-io · Aug 1, 2023 · d714754 · d714754
2 parents 5432e57 + ef1eb16
commit d714754
Show file tree

Hide file tree

Showing 148 changed files with 2,366 additions and 2,308 deletions.
diff --git a/.github/workflows/build-python-package.yml b/.github/workflows/build-python-package.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Cache pip 👜
         uses: actions/cache@v3
         env:

diff --git a/.github/workflows/check-repo-files.yml b/.github/workflows/check-repo-files.yml
@@ -19,7 +19,7 @@ jobs:
       buildChanges: ${{ steps.path_filter.outputs.buildChanges }}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Check affected files
         uses: dorny/paths-filter@v2
         id: path_filter

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
@@ -5,6 +5,8 @@ concurrency:
   cancel-in-progress: true
 
 on:
+  workflow_dispatch:
+
   release:
     types:
       - "published"
@@ -13,10 +15,7 @@ on:
     branches:
       - "main"
       - "develop"
-      - "integration"
       - "releases/**"
-      - "feature/**"
-      - "feat/**"
 
   pull_request:
     branches:
@@ -25,6 +24,7 @@ on:
       - "releases/**"
       - "feature/**"
       - "feat/**"
+      - "fix/**"
 
 jobs:
 
@@ -53,18 +53,23 @@ jobs:
         - searchEngineDockerImage: docker.elastic.co/elasticsearch/elasticsearch:8.8.2
           searchEngineDockerEnv: '{"discovery.type": "single-node", "xpack.security.enabled": "false"}'
           coverageReport: coverage-elasticsearch-8.8.2
+          runsOn: extended-runner
         - searchEngineDockerImage: docker.elastic.co/elasticsearch/elasticsearch:8.0.1
           searchEngineDockerEnv: '{"discovery.type": "single-node", "xpack.security.enabled": "false"}'
           coverageReport: coverage-elasticsearch-8.0.1
+          runsOn: extended-runner
         - searchEngineDockerImage: docker.elastic.co/elasticsearch/elasticsearch:7.17.11
           searchEngineDockerEnv: '{"discovery.type": "single-node", "xpack.security.enabled": "false"}'
           coverageReport: coverage-elasticsearch-7.17.11
+          runsOn: ubuntu-latest
         - searchEngineDockerImage: opensearchproject/opensearch:2.4.1
           searchEngineDockerEnv: '{"discovery.type": "single-node", "plugins.security.disabled": "true"}'
           coverageReport: coverage-opensearch-2.4.1
+          runsOn: ubuntu-latest
         - searchEngineDockerImage: opensearchproject/opensearch:1.3.11
           searchEngineDockerEnv: '{"discovery.type": "single-node", "plugins.security.disabled": "true"}'
           coverageReport: coverage-opensearch-1.3.11
+          runsOn: ubuntu-latest
     name: Run base tests
     uses: ./.github/workflows/run-python-tests.yml
     needs:  check_repo_files
@@ -73,6 +78,7 @@ jobs:
       searchEngineDockerImage: ${{ matrix.searchEngineDockerImage }}
       searchEngineDockerEnv: ${{ matrix.searchEngineDockerEnv }}
       coverageReport: coverage
+      runsOn: ${{ matrix.runsOn }}
       pytestArgs: |
         --ignore=tests/training \
         --ignore=tests/client/feedback/training \
@@ -87,6 +93,7 @@ jobs:
     if: needs.check_repo_files.outputs.pythonChanges == 'true'
     # continue-on-error: true
     with:
+      runsOn: extended-runner
       coverageReport: coverage-extra
       pytestArgs: |
         tests/training \
@@ -103,7 +110,7 @@ jobs:
       - run_tests_extra
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - uses: actions/download-artifact@v3
       - name: Copy all reports
         run: find coverage-report*/ -name "*.xml" -exec mv '{}' . \;
@@ -126,12 +133,10 @@ jobs:
       - build_python_package
       - run_tests
       - run_tests_extra
-      - deployable_check
     if: |
-      always() &&
-      needs.deployable_check.outputs.isDeployable == 'true' &&
-      needs.run_tests.result != 'failure' &&
-      needs.run_tests_extra.result != 'failure'
+      !cancelled() &&
+      !contains(needs.*.result, 'failure') &&
+      !contains(needs.*.result, 'cancelled')
     with:
       download-python-package: true
       image-name: argilla/argilla-server
@@ -145,7 +150,7 @@ jobs:
     uses: ./.github/workflows/build-push-docker.yml
     needs: build_server_docker_image
     if: |
-      always() &&
+      !cancelled() &&
       needs.build_server_docker_image.result == 'success'
     with:
       download-python-package: false
@@ -171,7 +176,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Download python package
         uses: actions/download-artifact@v2
         with:

diff --git a/.github/workflows/run-python-tests.yml b/.github/workflows/run-python-tests.yml
@@ -3,6 +3,10 @@ name: Run Argilla python tests
 on:
   workflow_call:
     inputs:
+      runsOn:
+        required: false
+        type: string
+        default: extended-runner
       pytestArgs:
         description: "Provide extra args to pytest command line"
         required: true
@@ -28,7 +32,7 @@ env:
 jobs:
   run-python-tests:
     name: Argilla python tests
-    runs-on: extended-runner
+    runs-on: ${{ inputs.runsOn }}
     services:
       search_engine:
         image: ${{ inputs.searchEngineDockerImage }}
@@ -42,7 +46,7 @@ jobs:
       COVERAGE_REPORT: ${{ inputs.coverageReport }}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Conda Env 🐍
         uses: conda-incubator/setup-miniconda@v2
         with:

diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml
@@ -12,7 +12,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml
@@ -39,7 +39,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml
@@ -66,7 +66,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml
@@ -93,7 +93,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml
@@ -120,7 +120,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml
@@ -147,7 +147,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Checkout Code 🛎
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Setup Rubrix
         run: |
           sed -i 's/rubrix:latest/rubrix:master/' docker-compose.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,13 +20,13 @@ repos:
           # - --remove-header
 
   - repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
       - id: black
         additional_dependencies: ["typer==0.7.0"]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.276
+    rev: v0.0.280
     hooks:
       # Simulate isort via (the much faster) ruff
       - id: ruff

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,21 @@ These are the section headers that we use:
 
 ## [Unreleased]
 
+### Changed
+
+- Improved efficiency of weak labeling when dataset contains vectors ([#3444](https://github.com/argilla-io/argilla/pull/3444)).
+- Added `ArgillaDatasetMixin` to detach the Argilla-related functionality from the `FeedbackDataset` ([#3427](https://github.com/argilla-io/argilla/pull/3427))
+- Moved `FeedbackDataset`-related `pydantic.BaseModel` schemas to `argilla.client.feedback.schemas` instead, to be better structured and more scalable and maintainable ([#3427](https://github.com/argilla-io/argilla/pull/3427))
+- Update CLI to use database async connection ([#3450](https://github.com/argilla-io/argilla/pull/3450)).
+- Update alembic code to apply migrations to use database async engine ([#3450](https://github.com/argilla-io/argilla/pull/3450)).
+- Limit rating questions values to the positive range [1, 10] (Closes [#3451](https://github.com/argilla-io/argilla/issues/3451)).
+
+## [1.13.2](https://github.com/argilla-io/argilla/compare/v1.13.1...v1.13.2)
+
+### Fixed
+
+- The `suggestion_type_enum` ENUM data type created in PostgreSQL didn't have any value ([#3445](https://github.com/argilla-io/argilla/pull/3445)).
+
 ## [1.13.1](https://github.com/argilla-io/argilla/compare/v1.13.0...v1.13.1)
 
 ### Fixed

diff --git a/docs/_source/getting_started/installation/deployments/huggingface-spaces.md b/docs/_source/getting_started/installation/deployments/huggingface-spaces.md
@@ -20,7 +20,7 @@ You can deploy Argilla on Spaces with just a few clicks:
     <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-lg.svg" />
 </a>
 
-You need to define the **Owner** (your personal account or an organization), a **Space name**, and the **Visibility**. To interact with the Argilla app with Python, you need to setup the visibility to `Public`. If you plan to use the Space frequently or handle large datasets for data labeling and feedback collection, upgrading the hardware with a more powerful CPU and increased RAM can enhance performance.
+You need to define the **Owner** (your personal account or an organization), a **Space name**, and the **Visibility**. To interact with the Argilla app with Python, you need to set up the visibility to `Public`. If you plan to use the Space frequently or handle large datasets for data labeling and feedback collection, upgrading the hardware with a more powerful CPU and increased RAM can enhance performance.
 
 <div class="flex justify-center">
 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/spaces-argilla-new-space.png"/>
@@ -51,15 +51,15 @@ Once Argilla is running, you can use the UI with the Direct URL you'll find in t
 
 If everything went well, you are ready to use the Argilla Python client from an IDE such as Colab, Jupyter, or VS Code.
 
-If you want to a quick step-by-step example, keep reading. If you want an end-to-end tutorial, go to this [tutorial and use Colab or Jupyter](https://docs.argilla.io/en/latest/tutorials/notebooks/training-textclassification-setfit-fewshot.html).
+If you want a quick step-by-step example, keep reading. If you want an end-to-end tutorial, go to this [tutorial and use Colab or Jupyter](https://docs.argilla.io/en/latest/tutorials/notebooks/training-textclassification-setfit-fewshot.html).
 
-First we need to pip install `datasets` and `argilla` on Colab or your local machine:
+First, we need to pip install `datasets` and `argilla` on Colab or your local machine:
 
 ```bash
 pip install datasets argilla
 ```
 
-Then, you can read the example dataset using the `datasets` library. This dataset is a CSV file uploaded to the Hub using the drag and drop feature.
+Then, you can read the example dataset using the `datasets` library. This dataset is a CSV file uploaded to the Hub using the drag-and-drop feature.
 
 ```python
 from datasets import load_dataset
@@ -151,7 +151,7 @@ The usernames, passwords, and API keys to upload, read, update, and delete datas
   variable. The API key you choose can be any string of your choice and you can check an online generator if you like.
 - `ADMIN_USERNAME`: The admin username to log in Argilla. The default admin username is `admin`. By setting up
   a custom username you can use your own username to login into the app.
-- `ADMIN_PASSWORD`: This sets a custom password for login into the app with the `argilla` username. The default
+- `ADMIN_PASSWORD`: This sets a custom password for login into the app with the `admin` username. The default
   password is `12345678`. By setting up a custom password you can use your own password to login into the app.
 - `ADMIN_API_KEY`: Argilla provides a Python library to interact with the app (read, write, and update data, log model
   predictions, etc.). If you don't set this variable, the library and your app will use the default API key

diff --git a/docs/_source/guides/llms/practical_guides/create_dataset.md b/docs/_source/guides/llms/practical_guides/create_dataset.md
@@ -50,7 +50,7 @@ You can define your questions using the Python SDK and set up the following conf
 
 The following arguments apply to specific question types:
 
-- `values`: In the `RatingQuestion` this will be any list of unique integers that represent the options that annotators can choose from. It doesn't matter whether these are positive, negative, sequential or not. In the `RankingQuestion`, values will be a list of strings with the options they will need to rank. If you'd like the text of the options to be different in the UI and internally, you can pass a dictionary instead where the key is the internal name and the value the text to display in the UI.
+- `values`: In the `RatingQuestion` this will be any list of unique integers that represent the options that annotators can choose from. These values must be defined in the range [1, 10]. In the `RankingQuestion`, values will be a list of strings with the options they will need to rank. If you'd like the text of the options to be different in the UI and internally, you can pass a dictionary instead where the key is the internal name and the value the text to display in the UI.
 - `labels`: In `LabelQuestion` and `MultiLabelQuestion` this is a list of strings with the options for these questions. If you'd like the text of the labels to be different in the UI and internally, you can pass a dictionary instead where the key is the internal name and the value the text to display in the UI.
 - `visible_labels` (optional): In `LabelQuestion` and `MultiLabelQuestion` this is the number of labels that will be visible in the UI. By default, the UI will show 20 labels and collapse the rest. Set your preferred number to change this limit or set `visible_labels=None` to show all options.
 - `use_markdown` (optional): In `TextQuestion` define whether the field should render markdown text. Defaults to `False`.

diff --git a/pyproject.toml b/pyproject.toml
@@ -183,6 +183,7 @@ exclude = [
     "node_modules",
     "venv",
 ]
+line-length = 120
 
 [tool.ruff.per-file-ignores]
 # Ignore imported but unused;

diff --git a/src/argilla/__main__.py b/src/argilla/__main__.py
@@ -14,11 +14,10 @@
 #  limitations under the License.
 
 
-import typer
+from argilla.tasks import database_app, server_app, training_app, users_app
+from argilla.tasks.async_typer import AsyncTyper
 
-from .tasks import database_app, server_app, training_app, users_app
-
-app = typer.Typer(rich_help_panel=True, help="Argilla CLI", no_args_is_help=True)
+app = AsyncTyper(rich_help_panel=True, help="Argilla CLI", no_args_is_help=True)
 
 app.add_typer(users_app, name="users")
 app.add_typer(database_app, name="database")

diff --git a/src/argilla/client/api.py b/src/argilla/client/api.py
@@ -20,10 +20,7 @@
 
 from argilla.client.client import Argilla
 from argilla.client.datasets import Dataset
-from argilla.client.models import (  # TODO Remove TextGenerationRecord
-    BulkResponse,
-    Record,
-)
+from argilla.client.models import BulkResponse, Record  # TODO Remove TextGenerationRecord
 from argilla.client.sdk.commons import errors
 from argilla.client.sdk.v1.datasets.api import list_datasets as list_datasets_api_v1
 from argilla.client.sdk.workspaces.api import list_workspaces as list_workspaces_api_v0
@@ -340,11 +337,7 @@ def load(
         raise e
 
 
-def copy(
-    dataset: str,
-    name_of_copy: str,
-    workspace: str = None,
-):
+def copy(dataset: str, name_of_copy: str, workspace: str = None):
     """
     Creates a copy of a dataset including its tags and metadata