From 5b71ae1d188e8c0ebb29754e48442248d31b57aa Mon Sep 17 00:00:00 2001 From: kkim-labelbox <99684749+kkim-labelbox@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:07:13 -0700 Subject: [PATCH] Release 3.43.0 (#1044) Co-authored-by: Ibrahim Muhammad Co-authored-by: Kevin Kim Co-authored-by: Val Brodsky Co-authored-by: Val Brodsky Co-authored-by: Klaus Opreschko Co-authored-by: Klaus Opreschko Co-authored-by: mnoszczak <99751601+mnoszczak@users.noreply.github.com> Co-authored-by: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com> --- CHANGELOG.md | 20 + CONTRIB.md | 90 +- docs/source/conf.py | 2 +- examples/annotation_import/audio.ipynb | 529 ++++++++++ .../annotation_import/conversational.ipynb | 147 ++- examples/annotation_import/dicom.ipynb | 534 ++++++++++ examples/annotation_import/html.ipynb | 529 ++++++++++ examples/annotation_import/image.ipynb | 910 ++++++++++-------- examples/annotation_import/pdf.ipynb | 667 ++++++++++--- examples/annotation_import/text.ipynb | 189 +++- examples/annotation_import/video.ipynb | 330 +++++-- .../prediction_upload/video_predictions.ipynb | 82 +- .../queue_management.ipynb | 165 ++-- labelbox/__init__.py | 3 +- labelbox/data/annotation_types/__init__.py | 6 + labelbox/data/annotation_types/annotation.py | 36 +- .../data/annotation_types/base_annotation.py | 18 + .../classification/classification.py | 27 + .../annotation_types/geometry/__init__.py | 2 + .../annotation_types/geometry/rectangle.py | 27 + labelbox/data/annotation_types/label.py | 6 +- .../data/annotation_types/relationship.py | 18 + labelbox/data/annotation_types/video.py | 16 +- labelbox/data/metrics/group.py | 4 +- labelbox/data/serialization/ndjson/base.py | 4 +- .../serialization/ndjson/classification.py | 124 ++- .../data/serialization/ndjson/converter.py | 1 - labelbox/data/serialization/ndjson/label.py | 152 ++- labelbox/data/serialization/ndjson/objects.py | 167 +++- .../data/serialization/ndjson/relationship.py | 44 + labelbox/schema/data_row.py | 10 +- labelbox/schema/dataset.py | 10 +- labelbox/schema/export_params.py | 4 +- labelbox/schema/slice.py | 8 +- .../classification/test_classification.py | 2 +- .../assets/ndjson/classification_import.json | 4 +- .../classification_import_global_key.json | 4 +- .../classification_import_name_only.json | 4 +- .../custom_confusion_matrix_import.json | 58 +- .../assets/ndjson/custom_scalar_import.json | 38 +- tests/data/assets/ndjson/nested_import.json | 6 +- .../ndjson/nested_import_name_only.json | 6 +- .../assets/ndjson/relationship_import.json | 42 + tests/data/assets/ndjson/video_import.json | 258 +++-- .../ndjson/video_import_global_key.json | 258 +++-- .../assets/ndjson/video_import_name_only.json | 258 +++-- .../serialization/ndjson/test_checklist.py | 297 +++++- tests/data/serialization/ndjson/test_dicom.py | 19 +- .../serialization/ndjson/test_document.py | 41 +- .../ndjson/test_export_video_objects.py | 18 +- tests/data/serialization/ndjson/test_radio.py | 84 +- .../serialization/ndjson/test_relationship.py | 41 + tests/data/serialization/ndjson/test_text.py | 17 +- tests/data/serialization/ndjson/test_video.py | 485 ++++++++++ .../integration/annotation_import/conftest.py | 64 +- .../test_bulk_import_request.py | 70 +- .../test_conversation_import.py | 14 +- .../annotation_import/test_data_types.py | 172 ++++ .../annotation_import/test_label_import.py | 22 +- .../test_mea_prediction_import.py | 41 +- .../annotation_import/test_model_run.py | 73 +- .../test_upsert_prediction_import.py | 24 +- tests/unit/test_mal_import.py | 2 +- 63 files changed, 5727 insertions(+), 1576 deletions(-) create mode 100644 examples/annotation_import/audio.ipynb create mode 100644 examples/annotation_import/dicom.ipynb create mode 100644 examples/annotation_import/html.ipynb create mode 100644 labelbox/data/annotation_types/base_annotation.py create mode 100644 labelbox/data/annotation_types/relationship.py create mode 100644 labelbox/data/serialization/ndjson/relationship.py create mode 100644 tests/data/assets/ndjson/relationship_import.json create mode 100644 tests/data/serialization/ndjson/test_relationship.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d6fc583cb..d4bed1b78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +# Version 3.43.0 (2023-04-05) + +## Added +* Nested object classifications to `VideoObjectAnnotation` +* Relationship Annotation Types +* Added `project_ids` and `model_run_ids` to params in all export_v2 functions + +## Fixed +* VideoMaskAnnotation annotation import + +## Notebooks +* Added DICOM annotation import notebook +* Added audio annotation import notebook +* Added HTML annotation import notebook +* Added relationship examples to annotation import notebooks +* Added global video classification example +* Added nested classification examples +* Added video mask example +* Added global key and LPOs to queue management notebook + # Version 3.42.0 (2023-03-22) ## Added diff --git a/CONTRIB.md b/CONTRIB.md index d59a0cc67..77122ed79 100644 --- a/CONTRIB.md +++ b/CONTRIB.md @@ -1,9 +1,25 @@ # Labelbox Python SDK Contribution Guide +## Contribution Guidelines +Thank you for expressing your interest in contributing to the Labelbox SDK. +To ensure that your contribution aligns with our guidelines, please carefully +review the following considerations before proceeding: + +* For feature requests, we recommend consulting with Labelbox support or + creating a [Github Issue](https://github.com/Labelbox/labelbox-python/issues) on our repository. +* We can only accept general solutions that address common issues rather than solutions + designed for specific use cases. Acceptable contributions include simple bug fixes and + improvements to functions within the schema/ package. +* Please ensure that any new libraries are compliant with the Apache license that governs the Labelbox SDK. +* Ensure that you update any relevant docstrings and comments within your code + ## Repository Organization The SDK source (excluding tests and support tools) is organized into the following packages/modules: +* `data/` package contains code that maps annotations (labels or pre-labels) to + Python objects, as well as serialization and deserialization tools for converting + between NDJson and Annotation Types. * `orm/` package contains code that supports the general mapping of Labelbox data to Python objects. This includes base classes, attribute (field and relationship) classes, generic GraphQL queries etc. @@ -25,58 +41,52 @@ following packages/modules: * Approved PRs are merged to the `develop` branch. * The `develop` branch is merged to `master` on each release. -## Commits +## Formatting Before making a commit, to automatically adhere to our formatting standards, install and activate [pre-commit](https://pre-commit.com/) +```shell +pip install pre-commit +pre-commit install +``` +After the above, running `git commit ...` will attempt to fix formatting, +and make necessary changes to files. You will then need to stage those files again. + +You may also manually format your code by running the following: +```shell +yapf tests labelbox -i --verbose --recursive --parallel --style "google" +``` -After the above, running `git commit ...` will attempt to fix formatting. If -there was formatted needed, you will need to re-add and re-commit before pushing. ## Testing -Currently, the SDK functionality is tested using integration tests. These tests -communicate with a Labelbox server (by default the staging server) and are in -that sense not self-contained. Besides, that they are organized like unit test -and are based on the `pytest` library. +Currently, the SDK functionality is tested using unit and integration tests. +The integration tests communicate with a Labelbox server (by default the staging server) +and are in that sense not self-contained. + +Please consult "Testing" section in the README for more details on how to test. -To execute tests you will need to provide an API key for the server you're using +Additionally, to execute tests you will need to provide an API key for the server you're using for testing (staging by default) in the `LABELBOX_TEST_API_KEY` environment variable. For more info see [Labelbox API key docs](https://labelbox.helpdocs.io/docs/api/getting-started). -To pass tests, code must be formatted. If pre-commit was not installed, -you will need to use the following command: - -```shell -yapf tests labelbox -i --verbose --recursive --parallel --style "google" -``` ## Release Steps -Each release should follow the following steps: - -1. Update the Python SDK package version in `REPO_ROOT/setup.py` -2. Make sure the `CHANGELOG.md` contains appropriate info -3. Commit these changes and tag the commit in Git as `vX.Y` -4. Merge `develop` to `master` (fast-forward only). -5. Create a GitHub release. -6. This will kick off a Github Actions workflow that will: - - Build the library in the [standard way](https://packaging.python.org/tutorials/packaging-projects/#generating-distribution-archives) - - Upload the distribution archives in the [standard way](https://packaging.python.org/tutorials/packaging-projects/#uploading-the-distribution-archives) - - with credentials for the `labelbox` PyPI user. - - ## Running Jupyter Notebooks - - We have plenty of good samples in the _examples_ directory and using them for testing can help us increase our productivity. One way to use jupyter notebooks is to run the jupyter server locally (another way is to use a VSC plugin, not documented here). It works really fast. - - Make sure your notebook will use your source code: - 1. `ipython profile create` - 2. `ipython locate` - will show where the config file is. This is the config file used by the jupyter server, since it runs via ipython - 3. Open the file (this should be ipython_config.py and it is usually located in ~/.ipython/profile_default) and add the following line of code: - ``` - c.InteractiveShellApp.exec_lines = [ - 'import sys; sys.path.insert(0, "")' - ] - ``` - 4. Go to the root of your project and run `jupyter notebook` to start the server +Please consult the Labelbox team for releasing your contributions + +## Running Jupyter Notebooks + +We have plenty of good samples in the _examples_ directory and using them for testing can help us increase our productivity. One way to use jupyter notebooks is to run the jupyter server locally (another way is to use a VSC plugin, not documented here). It works really fast. + +Make sure your notebook will use your source code: +1. `ipython profile create` +2. `ipython locate` - will show where the config file is. This is the config file used by the jupyter server, since it runs via ipython +3. Open the file (this should be ipython_config.py and it is usually located in ~/.ipython/profile_default) and add the following line of code: +``` +c.InteractiveShellApp.exec_lines = [ + 'import sys; sys.path.insert(0, "")' +] +``` +4. Go to the root of your project and run `jupyter notebook` to start the server diff --git a/docs/source/conf.py b/docs/source/conf.py index ee9eec618..0b5e790cc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,7 +21,7 @@ copyright = '2021, Labelbox' author = 'Labelbox' -release = '3.42.0' +release = '3.43.0' # -- General configuration --------------------------------------------------- diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb new file mode 100644 index 000000000..b3bb1c94c --- /dev/null +++ b/examples/annotation_import/audio.ipynb @@ -0,0 +1,529 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Audio Annotation Import\n", + "* This notebook will provide examples of each supported annotation type for audio assets, and also cover MAL and Label Import methods:\n", + "\n", + "Suported annotations that can be uploaded through the SDK\n", + "\n", + "* Classification Radio \n", + "* Classification Checklist \n", + "* Classification Free Text \n", + "\n", + "**Not** supported annotations\n", + "\n", + "* Bouding box\n", + "* NER\n", + "* Polygon \n", + "* Point\n", + "* Polyline \n", + "* Segmentation Mask\n", + "\n", + "MAL and Label Import:\n", + "\n", + "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", + "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* For information on what types of annotations are supported per data type, refer to this documentation:\n", + " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* Notes:\n", + " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install -q 'labelbox[data]'" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Setup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Replace with your API key\n", + "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Supported annotations for Audio" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_audio\",\n", + " value=lb_types.Text(answer=\"free text audio annotation\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " 'name': 'text_audio',\n", + " 'answer': 'free text audio annotation',\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "##### Checklist Classification ####### \n", + "\n", + "checklist_annotation= lb_types.ClassificationAnnotation(\n", + " name=\"checklist_audio\",\n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\"\n", + " ), \n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\"\n", + " )\n", + " ]\n", + " ),\n", + " )\n", + "\n", + "\n", + "checklist_annotation_ndjson = {\n", + " 'name': 'checklist_audio',\n", + " 'answers': [\n", + " {'name': 'first_checklist_answer'},\n", + " {'name': 'second_checklist_answer'}\n", + " ],\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_audio\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")))\n", + "\n", + "radio_annotation_ndjson = {\n", + " 'name': 'radio_audio',\n", + " 'answer': {\n", + " 'name': 'first_radio_answer'\n", + " },\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Upload Annotations - putting it all together " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "## Step 1: Import data rows into Catalog" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"sample-audio-1.mp3\"\n", + "\n", + "asset = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n", + " \"global_key\": global_key\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: None\n", + "Failed data rows: None\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 2: Create/select an ontology\n", + "\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", + "\n", + "For example, when we create the text annotation, we provided the `name` as `text_audio`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_audio`. The same alignment must hold true for the other tools and classifications we create in our ontology." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"text_audio\"), \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.CHECKLIST, \n", + " name=\"checklist_audio\", \n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\") \n", + " ]\n", + " ), \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.RADIO, \n", + " name=\"radio_audio\", \n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\")\n", + " ]\n", + " )\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Audio Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Audio)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "\n", + "## Step 3: Create a labeling project\n", + "Connect the ontology to the labeling project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"audio_project\", \n", + " media_type=lb.MediaType.Audio)\n", + "\n", + "# Setup your ontology \n", + "project.setup_editor(ontology) # Connect your ontology and editor to your project" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Default createProject behavior will soon be adjusted to prefer batch projects. Pass in `queue_mode` parameter explicitly to opt-out for the time being.\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Send a batch of data rows to the project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Batch: \n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 5: Create the annotations payload\n", + "Create the annotations payload using the snippets of code above\n", + "\n", + "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Python annotation\n", + "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data=lb_types.AudioData(\n", + " global_key=global_key\n", + " ),\n", + " annotations=[\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation\n", + " ]\n", + " )\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### NDJSON annotations \n", + "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "for annotations in [text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson]:\n", + " annotations.update({\n", + " 'dataRow': {\n", + " 'globalKey': global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 6: Upload annotations to a project as pre-labels or complete labels" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Model Assisted Labeling (MAL)\n", + "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=f\"mal_job-{str(uuid.uuid4())}\", \n", + " predictions=label)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': '40aea601-206f-491c-8bcc-675205dbb351', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}, {'uuid': '51c43a0e-a54b-4ee3-94c1-7ccdebe81f98', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}, {'uuid': '380273f0-4ed4-4d0a-959a-06659f5edf88', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}]\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload label for this data row in project \n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=\"label_import_job\"+str(uuid.uuid4()), \n", + " labels=label)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': '1e555dd0-3916-4cfb-97be-79f9607da01a', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}, {'uuid': 'fe805388-d313-45ea-b3bc-1a7f5ffa0980', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}, {'uuid': '83cea6f0-acdf-4a2c-afff-197bac9bdb01', 'dataRow': {'id': 'clfsl56ww5uyi078ldbojeqby', 'globalKey': 'sample-audio-1.mp3'}, 'status': 'SUCCESS'}]\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Optional deletions for cleanup " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb index 8d9b2cb67..448e19cbc 100644 --- a/examples/annotation_import/conversational.ipynb +++ b/examples/annotation_import/conversational.ipynb @@ -75,7 +75,7 @@ { "metadata": {}, "source": [ - "!pip install -q 'labelbox[data]'" + "!pip install -q \"labelbox[data]\"" ], "cell_type": "code", "outputs": [], @@ -171,9 +171,9 @@ "\n", "\n", "text_annotation_ndjson = {\n", - " 'name': 'text_convo',\n", - " 'answer': 'the answer to the text questions right here',\n", - " 'messageId': \"0\"\n", + " \"name\": \"text_convo\",\n", + " \"answer\": \"the answer to the text questions right here\",\n", + " \"messageId\": \"0\"\n", "}" ], "cell_type": "code", @@ -186,7 +186,7 @@ "##### Checklist Classification ####### \n", "\n", "checklist_annotation= lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature's name\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", " value=lb_types.Checklist(\n", " answer = [\n", " lb_types.ClassificationAnswer(\n", @@ -202,12 +202,12 @@ "\n", "\n", "checklist_annotation_ndjson = {\n", - " 'name': 'checklist_convo',\n", - " 'answers': [\n", - " {'name': 'first_checklist_answer'},\n", - " {'name': 'second_checklist_answer'}\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\"name\": \"first_checklist_answer\"},\n", + " {\"name\": \"second_checklist_answer\"}\n", " ],\n", - " 'messageId': '2'\n", + " \"messageId\": \"2\"\n", "}" ], "cell_type": "code", @@ -220,15 +220,15 @@ "######## Radio Classification ######\n", "\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name='radio_convo', \n", - " value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = 'first_radio_answer')),\n", + " name=\"radio_convo\", \n", + " value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = \"first_radio_answer\")),\n", " message_id=\"0\"\n", ")\n", "\n", "radio_annotation_ndjson = {\n", - " 'name': 'radio_convo',\n", - " 'answer': {\n", - " 'name': 'first_radio_answer'\n", + " \"name\": \"radio_convo\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", " },\n", " \"messageId\": \"0\",\n", "}" @@ -237,6 +237,71 @@ "outputs": [], "execution_count": null }, + { + "metadata": {}, + "source": [ + "####### Relationships ########## \n", + "ner_source = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(\n", + " start=16,\n", + " end=26,\n", + " message_id=\"4\"\n", + " )\n", + ")\n", + "ner_target = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(\n", + " start=29, \n", + " end=34, \n", + " message_id=\"4\"\n", + " )\n", + ")\n", + "\n", + "ner_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=ner_source,\n", + " target=ner_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", + "\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "ner_source_ndjson = { \n", + " \"uuid\": uuid_source, \n", + " \"name\": \"ner\",\n", + " \"location\": { \n", + " \"start\": 16, \n", + " \"end\": 26 \n", + " },\n", + " \"messageId\": \"4\"\n", + " }\n", + "\n", + "ner_target_ndjson = { \n", + " \"uuid\": uuid_target,\n", + " \"name\": \"ner\",\n", + " \"location\": { \n", + " \"start\": 29, \n", + " \"end\": 34\n", + " },\n", + " \"messageId\": \"4\"\n", + " }\n", + "\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\", \n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"bidirectional\"\n", + " }\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": [ @@ -289,9 +354,9 @@ "source": [ "ontology_builder = lb.OntologyBuilder(\n", " tools=[ \n", - " lb.Tool( # NER tool given the name \"ner\"\n", - " tool=lb.Tool.Type.NER, \n", - " name=\"ner\")], \n", + " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP,name=\"relationship\")\n", + " ], \n", " classifications=[ \n", " lb.Classification( \n", " class_type=lb.Classification.Type.TEXT,\n", @@ -318,7 +383,7 @@ " ]\n", ")\n", "\n", - "ontology = client.create_ontology(\"Ontology Conversation Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)" + "ontology = client.create_ontology(\"Ontology Conversation Annotations\", ontology_builder.asdict())" ], "cell_type": "code", "outputs": [], @@ -403,7 +468,10 @@ " ner_annotation,\n", " text_annotation,\n", " checklist_annotation,\n", - " radio_annotation\n", + " radio_annotation,\n", + " ner_source,\n", + " ner_target,\n", + " ner_relationship\n", " ]\n", " )\n", ")" @@ -424,13 +492,18 @@ "metadata": {}, "source": [ "label_ndjson = []\n", - "for annotations in [ner_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson]:\n", + "for annotations in [\n", + " ner_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " ner_source_ndjson,\n", + " ner_target_ndjson,\n", + " ner_relationship_annotation_ndjson,\n", + " ]:\n", " annotations.update({\n", - " 'dataRow': {\n", - " 'globalKey': global_key\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", " }\n", " })\n", " label_ndjson.append(annotations)" @@ -464,7 +537,7 @@ " name=f\"mal_job-{str(uuid.uuid4())}\", \n", " predictions=label)\n", "\n", - "upload_job.wait_until_done();\n", + "upload_job.wait_until_done()\n", "print(\"Errors:\", upload_job.errors)\n", "print(\"Status of uploads: \", upload_job.statuses)" ], @@ -483,15 +556,19 @@ "metadata": {}, "source": [ "# Upload label for this data row in project \n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=\"label_import_job\"+str(uuid.uuid4()), \n", - " labels=label)\n", + "# Uncomment this code when excluding relationships from label import\n", + "# Relationships are not currently supported for label import\n", "\n", - "upload_job.wait_until_done();\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" + "\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client, \n", + "# project_id = project.uid, \n", + "# name=\"label_import_job\"+str(uuid.uuid4()), \n", + "# labels=label)\n", + "\n", + "# upload_job.wait_until_done();\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" ], "cell_type": "code", "outputs": [], diff --git a/examples/annotation_import/dicom.ipynb b/examples/annotation_import/dicom.ipynb new file mode 100644 index 000000000..a456d3c2f --- /dev/null +++ b/examples/annotation_import/dicom.ipynb @@ -0,0 +1,534 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# DICOM Annotation Import\n", + "\n", + "* Annotations must be created and uploaded using NDJSON\n", + "* Supported annotations that can be uploaded through the SDK:\n", + " * Polyline\n", + " * Segmentation masks \n", + "* **NOT** supported:\n", + " * Bounding box\n", + " * Point\n", + " * Polygons\n", + " * Free form text classifications\n", + " * Radio classifications \n", + " * Checklist classifications" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Setup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install -q 'labelbox[data]'" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Replace with your API key \n", + "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Add your api key\n", + "API_KEY=None\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Supported annotations for DICOM\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "######## Polyline ########\n", + "polyline_annotation = [\n", + " lb_types.DICOMObjectAnnotation(\n", + " name=\"line_dicom\",\n", + " group_key=lb_types.GroupKey.AXIAL,\n", + " frame=1,\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=10, y=10),\n", + " lb_types.Point(x=200, y=20),\n", + " lb_types.Point(x=250, y=250)\n", + " ]),\n", + " segment_index=0,\n", + " keyframe=True,\n", + " ),\n", + " lb_types.DICOMObjectAnnotation(\n", + " name=\"line_dicom\",\n", + " group_key=lb_types.GroupKey.AXIAL,\n", + " frame=20,\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=10, y=10),\n", + " lb_types.Point(x=200, y=10),\n", + " lb_types.Point(x=300, y=300)\n", + " ]),\n", + " segment_index=1,\n", + " keyframe=True,\n", + " ), \n", + "]\n", + "\n", + "polyline_annotation_ndjson = {\n", + " 'name': 'line_dicom',\n", + " 'groupKey': 'axial', # should be 'axial', 'sagittal', or 'coronal'\n", + " 'segments': [\n", + " {\n", + " 'keyframes': [{\n", + " 'frame': 1,\n", + " 'line': [\n", + " {'x': 10, 'y': 10},\n", + " {'x': 200, 'y': 20},\n", + " {'x': 250, 'y': 250},\n", + " ]\n", + " }]},\n", + " {\n", + " 'keyframes' : [{\n", + " 'frame': 20,\n", + " 'line': [\n", + " {'x': 10, 'y': 10},\n", + " {'x': 200, 'y': 10},\n", + " {'x': 300, 'y': 300},\n", + " ]\n", + " }]}\n", + " ],\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "######## Segmentation Masks ########\n", + "\n", + "mask_annotation = [\n", + " lb_types.DICOMMaskAnnotation(\n", + " group_key='axial',\n", + " frames=[\n", + " lb_types.MaskFrame(\n", + " index=1,\n", + " instance_uri=\"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\"\n", + " ),\n", + " lb_types.MaskFrame(\n", + " index=5,\n", + " instance_uri=\"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\"\n", + " )\n", + " ],\n", + " instances=[\n", + " lb_types.MaskInstance(\n", + " color_rgb=(255, 255, 255),\n", + " name=\"segmentation_mask_dicom\"\n", + " )\n", + " ])\n", + "]\n", + "\n", + "mask_annotation_ndjson = {\n", + " 'groupKey': 'axial',\n", + " 'masks': {\n", + " 'frames': [{\n", + " 'index': 1,\n", + " 'instanceURI': \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\"\n", + " }, {\n", + " 'index': 5,\n", + " 'instanceURI': \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\"\n", + " }],\n", + " 'instances': [\n", + " {\n", + " 'colorRGB': (255, 255, 255),\n", + " 'name': 'segmentation_mask_dicom'\n", + " }\n", + " ]\n", + " },\n", + "}\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Upload Annotations - putting it all together" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Step 1: Import data rows into Catalog" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "global_key = \"sample-dicom-1.dcm\"\n", + "asset = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm\", \n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"dicom_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors :\",task.errors)\n", + "print(\"Failed data rows:\" ,task.failed_data_rows)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors : None\n", + "Failed data rows: None\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 2: Create/select an ontology\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", + "\n", + "For example, when we create the line annotation above, we provided the `name` as `line_dicom`. Now, when we setup our ontology, we must ensure that the name of my line tool is also `line_dicom`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", + "\n", + "\n", + "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"segmentation_mask_dicom\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_dicom\"),\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology DICOM Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Dicom)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 3: Create a labeling project \n", + "Connect the ontology to the labeling project." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "\n", + "project = client.create_project(name=\"dicom_project_demo\", media_type=lb.MediaType.Dicom)\n", + "\n", + "## connect ontology to your project\n", + "project.setup_editor(ontology)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Default createProject behavior will soon be adjusted to prefer batch projects. Pass in `queue_mode` parameter explicitly to opt-out for the time being.\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 4: Send a batch of data rows to the project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create batches\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-dicom-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # a list of data row objects, data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Batch: \n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 5: Create the annotations payload \n", + "Create the annotations payload using the snippets of code above.\n", + "\n", + "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Python Annotation Types" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "annotations_list = polyline_annotation + mask_annotation\n", + "labels = [\n", + " lb_types.Label(\n", + " data=lb_types.DicomData(global_key=global_key),\n", + " annotations=annotations_list\n", + " )\n", + "]" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### NDJSON annotations\n", + "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created above." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "\n", + "for annotation in [\n", + " polyline_annotation_ndjson, \n", + " mask_annotation_ndjson\n", + "]: \n", + " annotation.update({\n", + " 'dataRow': {\n", + " 'globalKey': global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotation)\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", + "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Model-Assisted Labeling (MAL)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload MAL label for this data row in project\n", + "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=\"mal_import_job-\" + str(uuid.uuid4()), \n", + " predictions=labels)\n", + "\n", + "upload_job_mal.wait_until_done();\n", + "print(\"Errors:\", upload_job_mal.errors)\n", + "print(\"Status of uploads: \", upload_job_mal.statuses)\n", + "print(\" \")" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ibrahim/workspace/labelbox-python/labelbox/data/serialization/ndjson/label.py:177: UserWarning: Nested classifications are not currently supported\n", + " for video object annotations\n", + " and will not import alongside the object annotations.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': 'd193960f-1c7b-4493-87d1-8a55b2405748', 'dataRow': {'id': 'clftu3umv0384075r5poqey9o', 'globalKey': 'sample-dicom-1.dcm'}, 'status': 'SUCCESS'}, {'uuid': '8d0c454e-3385-44fc-97db-0516db3e7dc1', 'dataRow': {'id': 'clftu3umv0384075r5poqey9o', 'globalKey': 'sample-dicom-1.dcm'}, 'status': 'SUCCESS'}]\n", + " \n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job_label_import = lb.LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid, \n", + " name = \"label_import_job-\" + str(uuid.uuid4()),\n", + " labels=labels\n", + ")\n", + "\n", + "upload_job_label_import.wait_until_done()\n", + "print(\"Errors:\", upload_job_label_import.errors)\n", + "print(\"Status of uploads: \", upload_job_label_import.statuses)\n", + "print(\" \")" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': '1cc42044-4f36-45af-8b05-da6c8dd5bf37', 'dataRow': {'id': 'clftu3umv0384075r5poqey9o', 'globalKey': 'sample-dicom-1.dcm'}, 'status': 'SUCCESS'}, {'uuid': '005e5d45-b3c2-49f5-bf82-aef760de7826', 'dataRow': {'id': 'clftu3umv0384075r5poqey9o', 'globalKey': 'sample-dicom-1.dcm'}, 'status': 'SUCCESS'}]\n", + " \n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Optional deletions for cleanup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Delete Project\n", + "# project.delete()\n", + "# dataset.delete()\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/annotation_import/html.ipynb b/examples/annotation_import/html.ipynb new file mode 100644 index 000000000..98ca23896 --- /dev/null +++ b/examples/annotation_import/html.ipynb @@ -0,0 +1,529 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# HTML Annotation Import\n", + "* This notebook will provide examples of each supported annotation type for HTML assets, and also cover MAL and Label Import methods:\n", + "\n", + "Suported annotations that can be uploaded through the SDK\n", + "\n", + "* Classification Radio \n", + "* Classification Checklist \n", + "* Classification Free Text \n", + "\n", + "**Not** supported annotations\n", + "\n", + "* Bouding box\n", + "* NER\n", + "* Polygon \n", + "* Point\n", + "* Polyline \n", + "* Segmentation Mask\n", + "\n", + "MAL and Label Import:\n", + "\n", + "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", + "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* For information on what types of annotations are supported per data type, refer to this documentation:\n", + " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* Notes:\n", + " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install -q 'labelbox[data]'" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Setup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Replace with your API key\n", + "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Supported annotations for HTML" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_html\",\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " 'name': 'text_html',\n", + " 'answer': 'sample text',\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "##### Checklist Classification ####### \n", + "\n", + "checklist_annotation= lb_types.ClassificationAnnotation(\n", + " name=\"checklist_html\", # must match your ontology feature's name\n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\"\n", + " ), \n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\"\n", + " )\n", + " ]\n", + " ),\n", + " )\n", + "\n", + "\n", + "checklist_annotation_ndjson = {\n", + " 'name': 'checklist_html',\n", + " 'answers': [\n", + " {'name': 'first_checklist_answer'},\n", + " {'name': 'second_checklist_answer'}\n", + " ],\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_html\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")))\n", + "\n", + "radio_annotation_ndjson = {\n", + " 'name': 'radio_html',\n", + " 'answer': {\n", + " 'name': 'first_radio_answer'\n", + " },\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Upload Annotations - putting it all together " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "## Step 1: Import data rows into Catalog" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"sample_html_1.html\"\n", + "\n", + "asset = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html\",\n", + " \"global_key\": global_key\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"html_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: None\n", + "Failed data rows: None\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 2: Create/select an ontology\n", + "\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", + "\n", + "For example, when we create the text annotation, we provided the `name` as `text_html`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_html`. The same alignment must hold true for the other tools and classifications we create in our ontology." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"text_html\"), \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.CHECKLIST, \n", + " name=\"checklist_html\", \n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\") \n", + " ]\n", + " ), \n", + " lb.Classification( \n", + " class_type=lb.Classification.Type.RADIO, \n", + " name=\"radio_html\", \n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\")\n", + " ]\n", + " )\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology HTML Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Html)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "\n", + "## Step 3: Create a labeling project\n", + "Connect the ontology to the labeling project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"html_project\", \n", + " media_type=lb.MediaType.Html)\n", + "\n", + "# Setup your ontology \n", + "project.setup_editor(ontology) # Connect your ontology and editor to your project" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Default createProject behavior will soon be adjusted to prefer batch projects. Pass in `queue_mode` parameter explicitly to opt-out for the time being.\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Send a batch of data rows to the project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-html-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Batch: \n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 5: Create the annotations payload\n", + "Create the annotations payload using the snippets of code above\n", + "\n", + "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Python annotation\n", + "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data=lb_types.HTMLData(\n", + " global_key=global_key\n", + " ),\n", + " annotations=[\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation\n", + " ]\n", + " )\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### NDJSON annotations \n", + "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "for annotations in [text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson]:\n", + " annotations.update({\n", + " 'dataRow': {\n", + " 'globalKey': global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Step 6: Upload annotations to a project as pre-labels or complete labels" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Model Assisted Labeling (MAL)\n", + "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=f\"mal_job-{str(uuid.uuid4())}\", \n", + " predictions=label)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': 'cd188c7e-167f-48ce-98e6-571f255ba319', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}, {'uuid': '3153214d-831c-4aa4-bf95-280d657cfe51', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}, {'uuid': '297a9f08-30b4-427f-b8f0-d1139709b97d', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}]\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload label for this data row in project \n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=\"label_import_job\"+str(uuid.uuid4()), \n", + " labels=label)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n", + "Status of uploads: [{'uuid': 'f1c3e6b6-1e7e-41ca-abd5-a0fd919082a6', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}, {'uuid': '44714fd1-e8a8-4da4-aea9-b71392ac78a4', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}, {'uuid': '9d520664-9565-46e7-8b0b-c0615158d004', 'dataRow': {'id': 'clfslgefk0um4079rfyajcf18', 'globalKey': 'sample_html_1.html'}, 'status': 'SUCCESS'}]\n" + ] + } + ], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Optional deletions for cleanup " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb index 597e9aea0..0bffe9e51 100644 --- a/examples/annotation_import/image.ipynb +++ b/examples/annotation_import/image.ipynb @@ -33,20 +33,21 @@ { "metadata": {}, "source": [ - "# Image Annotation Import\n", + "# Image annotation import\n", "This notebook will provide examples of each supported annotation type for image assets. \n", "\n", "### [Model-assisted labeling (MAL)](https://docs.labelbox.com/v4/docs/model-assisted-labeling)\n", + "\n", "* This workflow allows you to import computer-generated predictions (or simply annotations created outside of Labelbox) as pre-labels on an asset. \n", - "The imported annotations will be pre-populated in the labeling editor. However, in order to convert the pre-labels to real annotations, a human labeler will still need to open the Data Row in the Editor and submit it. This functionality is designed to speed up human labeling.\n", "\n", + "The imported annotations will be pre-populated in the labeling editor. However, in order to convert the pre-labels to real annotations, a human labeler will still need to open the Data Row in the Editor and submit it. This functionality is designed to speed up human labeling.\n", "\n", "### [Import ground truth](https://docs.labelbox.com/v4/docs/import-ground-truth)\n", - "* This workflow functionality allows you to bulk import your ground truth annotations from an external or third-party labeling system into Labelbox Annotate. Using the label import API to import external data is a useful way to consolidate and migrate all annotations into Labelbox as a single source of truth.\n", "\n", + "* This workflow functionality allows you to bulk import your ground truth annotations from an external or third-party labeling system into Labelbox Annotate. Using the label import API to import external data is a useful way to consolidate and migrate all annotations into Labelbox as a single source of truth.\n", "\n", - "### Python Annotation types vs NDJSON\n", - "**Python Annotation Type (recommended)**\n", + "### Python annotation types vs NDJSON\n", + "**Python annotation type (recommended)**\n", "- Provides a seamless transition between third-party platforms, machine learning pipelines, and Labelbox.\n", "\n", "- Allows you to build annotations locally with local file paths, numpy arrays, or URLs\n", @@ -67,14 +68,14 @@ { "metadata": {}, "source": [ - "## Imports" + "## Setup" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "!pip install -q 'labelbox[data]'" + "!pip install -q \"labelbox[data]\"" ], "cell_type": "code", "outputs": [], @@ -83,10 +84,10 @@ { "metadata": {}, "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", "import uuid\n", - "import numpy as np\n" + "import numpy as np\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" ], "cell_type": "code", "outputs": [], @@ -95,7 +96,7 @@ { "metadata": {}, "source": [ - "## Replace with your API KEY\n", + "## Replace with your API key\n", "\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" ], @@ -114,34 +115,33 @@ { "metadata": {}, "source": [ - "## Supported Annotations for Image\n" + "## Supported annotations for image\n" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "### Supported Annotation Types" + "### Radio classification" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "########### Radio Classification ###########\n", - "\n", "# Python annotation\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\", \n", - " value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = \"second_radio_answer\"))\n", - ")\n", - "\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")))\n", "\n", "# NDJSON\n", "radio_annotation_ndjson = {\n", - " 'name': 'radio_question',\n", - " 'answer': {'name': 'second_radio_answer'}\n", - "} " + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_radio_answer\"\n", + " }\n", + "}" ], "cell_type": "code", "outputs": [], @@ -150,30 +150,78 @@ { "metadata": {}, "source": [ - "########## Nested Radio and Checklist Classification is only supported with NDJSON tools##########\n", - "\n", - "# NDJSON \n", + "### Nested radio and checklist classification\n", + "> **NOTE:** \n", + "> Only supported with NDJSON" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")\n", + "# NDJSON\n", "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\"name\": \"first_radio_answer\"},\n", - " \"classifications\" : [\n", - " {'name': 'sub_radio_question', 'answer': {'name': 'first_sub_radio_answer'}}\n", - " ]\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " }\n", + " }]\n", "}\n", "\n", "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", + "\n", "nested_checklist_annotation_ndjson = {\n", - " \"name\": \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\", \n", - " \"classifications\" : [\n", - " {\n", - " \"name\": \"sub_checklist_question\", \n", - " \"answer\": {\"name\": \"first_sub_checklist_answer\"}\n", - " } \n", - " ] \n", - " }]\n", - "}\n" + " \"name\": \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " }\n", + " }]\n", + " }]\n", + "}" ], "cell_type": "code", "outputs": [], @@ -182,30 +230,29 @@ { "metadata": {}, "source": [ - "############ Checklist question ############\n", - "\n", - "# Python Annotations\n", - "checklist_annotation= lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\", # must match your ontology feature's name\n", - " value=lb_types.Checklist(\n", - " answer = [\n", - " lb_types.ClassificationAnswer(\n", - " name = \"first_checklist_answer\"\n", - " ), \n", - " lb_types.ClassificationAnswer(\n", - " name = \"second_checklist_answer\"\n", - " )\n", - " ]\n", - " )\n", - " )\n", + "### Checklist question" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python annotation\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n", + " ]))\n", "\n", "# NDJSON\n", "checklist_annotation_ndjson = {\n", - " 'name': 'checklist_question',\n", - " 'answer': [\n", - " {'name': 'first_checklist_answer'},\n", - " {'name': 'second_checklist_answer'}\n", - " ]\n", + " \"name\": \"checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\"\n", + " }, {\n", + " \"name\": \"second_checklist_answer\"\n", + " }]\n", "}" ], "cell_type": "code", @@ -215,19 +262,22 @@ { "metadata": {}, "source": [ - "############# Free text Classification #############\n", - "\n", + "### Free form text classification" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ "# Python annotation\n", "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature's name\n", - " value=lb_types.Text(answer=\"sample text\")\n", - ")\n", - "\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"))\n", "\n", "# NDJSON\n", "text_annotation_ndjson = {\n", - " 'name': 'free_text',\n", - " 'answer': 'sample text',\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", "}" ], "cell_type": "code", @@ -237,28 +287,31 @@ { "metadata": {}, "source": [ - "####### Bounding box #######\n", - "\n", - "\n", - "# Python Annotation \n", + "### Bounding box" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python annotation\n", "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name = \"bounding_box\", # must match your ontology feature's name\n", - " value = lb_types.Rectangle(\n", - " start=lb_types.Point(x=20, y=15), # Top left\n", - " end=lb_types.Point(x=25, y=30), # Bottom right\n", - " )\n", - ")\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top \n", + " end=lb_types.Point(x=1915, y=1307), # x= left + width , y = top + height\n", + " ))\n", "\n", - "#NDJSON \n", + "# NDJSON\n", "bbox_annotation_ndjson = {\n", - " 'name': 'bounding_box',\n", - " 'bbox': {\n", - " \"top\": 977,\n", - " \"left\": 1690,\n", - " \"height\": 330,\n", - " \"width\": 225\n", - " }\n", - "}\n" + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 977,\n", + " \"left\": 1690,\n", + " \"height\": 330,\n", + " \"width\": 225\n", + " }\n", + "}" ], "cell_type": "code", "outputs": [], @@ -267,37 +320,42 @@ { "metadata": {}, "source": [ - "# Bounding box with nested classification\n", + "### Bounding box with nested classification" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python annotation\n", "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=10, y=20), # Top left\n", - " end=lb_types.Point(x=25, y=30), # Bottom right\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top \n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", " ),\n", " classifications=[\n", - " \tlb_types.ClassificationAnnotation(\n", - " \tname=\"sub_radio_question\",\n", - " \t\tvalue=lb_types.Radio(answer=lb_types.ClassificationAnswer(name=\"first_sub_radio_answer\"))\n", - " )\n", - " ]\n", - ")\n", - "\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")))\n", + " ])\n", "\n", "## NDJSON\n", "bbox_with_radio_subclass_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\", \n", + " \"name\": \"bbox_with_radio_subclass\",\n", " \"classifications\": [{\n", " \"name\": \"sub_radio_question\",\n", - " \"answer\": \n", - " { \"name\":\"first_sub_radio_answer\" }\n", - " \n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " }\n", " }],\n", " \"bbox\": {\n", - " \"top\": 933,\n", - " \"left\": 541,\n", - " \"height\": 191,\n", - " \"width\": 330\n", - " }\n", + " \"top\": 933,\n", + " \"left\": 541,\n", + " \"height\": 191,\n", + " \"width\": 330\n", + " }\n", "}" ], "cell_type": "code", @@ -306,47 +364,56 @@ }, { "metadata": {}, - "source": [], + "source": [ + "### Polygon" + ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "########## Polygon ##########\n", - "# Python AnotationTypes \n", + "# Python annotation\n", "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name = \"polygon\", # must match your ontology feature's name \n", - " value=lb_types.Polygon( # Coordinates for the verticies of your polygon\n", - " points=[lb_types.Point(x=1489.581,y=183.934), lb_types.Point(x=2278.306,y=256.885), lb_types.Point(x=2428.197,y=200.437), lb_types.Point(x=2560.0,y=335.419),\n", - " lb_types.Point(x=2557.386,y=503.165), lb_types.Point(x=2320.596,y=503.103), lb_types.Point(x=2156.083, y=628.943), lb_types.Point(x=2161.111,y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647), lb_types.Point(x=1838.456,y=877.874), lb_types.Point(x=1436.53,y=874.636), lb_types.Point(x=1411.403,y=758.579),\n", - " lb_types.Point(x=1353.853,y=751.74), lb_types.Point(x=1345.264, y=453.461), lb_types.Point(x=1426.011,y=421.129)]\n", - " ),\n", - ")\n", - "\n", - "\n", + " name=\"polygon\", # must match your ontology feature\"s name \n", + " value=lb_types.Polygon( # Coordinates for the vertices of your polygon\n", + " points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129)\n", + " ]))\n", "\n", "# NDJSON\n", - "\n", "polygon_annotation_ndjson = {\n", - " 'name': 'polygon',\n", - " 'polygon': [\n", - " {'x': 1489.581, 'y': 183.934},\n", - " {'x': 2278.306, 'y': 256.885},\n", - " {'x': 2428.197, 'y': 200.437},\n", - " {'x': 2560.0, 'y': 335.419},\n", - " {'x': 2557.386, 'y': 503.165},\n", - " {'x': 2320.596, 'y': 503.103},\n", - " {'x': 2156.083, 'y': 628.943},\n", - " {'x': 2161.111, 'y': 785.519},\n", - " {'x': 2002.115, 'y': 894.647},\n", - " {'x': 1838.456, 'y': 877.874},\n", - " {'x': 1436.53, 'y': 874.636},\n", - " {'x': 1411.403, 'y': 758.579},\n", - " {'x': 1353.853, 'y': 751.74},\n", - " {'x': 1345.264, 'y': 453.461},\n", - " {'x': 1426.011, 'y': 421.129},\n", - " {'x': 1489.581, 'y': 183.934}\n", + " \"name\": \"polygon\",\n", + " \"polygon\": [\n", + " {\"x\": 1489.581, \"y\": 183.934},\n", + " {\"x\": 2278.306, \"y\": 256.885},\n", + " {\"x\": 2428.197, \"y\": 200.437},\n", + " {\"x\": 2560.0, \"y\": 335.419},\n", + " {\"x\": 2557.386, \"y\": 503.165},\n", + " {\"x\": 2320.596, \"y\": 503.103},\n", + " {\"x\": 2156.083, \"y\": 628.943},\n", + " {\"x\": 2161.111, \"y\": 785.519},\n", + " {\"x\": 2002.115, \"y\": 894.647},\n", + " {\"x\": 1838.456, \"y\": 877.874},\n", + " {\"x\": 1436.53, \"y\": 874.636},\n", + " {\"x\": 1411.403, \"y\": 758.579},\n", + " {\"x\": 1353.853, \"y\": 751.74},\n", + " {\"x\": 1345.264, \"y\": 453.461},\n", + " {\"x\": 1426.011, \"y\": 421.129},\n", + " {\"x\": 1489.581, \"y\": 183.934}\n", " ]\n", "}" ], @@ -357,39 +424,41 @@ { "metadata": {}, "source": [ - "######### Mask #########\n", - "\n", - "\n", - "# Python \n", + "### Mask" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ "# Identifying what values in the numpy array correspond to the mask annotation\n", "color = (0, 0, 0)\n", "\n", "# convert a polygon to mask\n", - "im_height, im_width = 100,100 #need to provide the height and width of image.\n", + "im_height, im_width = 100,100 # need to provide the height and width of image\n", "mask_data = lb_types.MaskData(arr=\n", " polygon_annotation.value.draw(height=im_height,width=im_width,color=color))\n", "\n", "# convert a 2D array to 3D array\n", - "arr_2d = np.zeros((100,100), dtype='uint8')\n", + "arr_2d = np.zeros((100,100), dtype=\"uint8\")\n", "mask_data = lb_types.MaskData.from_2D_arr(arr_2d)\n", "\n", - "# a 3D array where 3rd axis is RGB values.\n", - "mask_data = lb_types.MaskData(arr= np.zeros([400,450,3],dtype='uint8'))\n", + "# a 3D array where 3rd axis is RGB values\n", + "mask_data = lb_types.MaskData(arr=np.zeros([400,450,3],dtype=\"uint8\"))\n", "\n", + "# Python annotation\n", "mask_annotation = lb_types.ObjectAnnotation(\n", - " name = \"mask\", # must match your ontology feature's name\n", + " name = \"mask\", # must match your ontology feature\"s name\n", " value=lb_types.Mask(mask=mask_data, color=color),\n", ")\n", "\n", - "\n", "# NDJSON\n", "mask_annotation_ndjson = {\n", - " 'name': 'mask',\n", - " 'classifications': [],\n", - " 'mask': {'instanceURI': 'https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA',\n", - " 'colorRGB': (0, 0, 0)}\n", - "}\n", - "\n" + " \"name\": \"mask\",\n", + " \"classifications\": [],\n", + " \"mask\": {\"instanceURI\": \"https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA\",\n", + " \"colorRGB\": (0, 0, 0)}\n", + "}" ], "cell_type": "code", "outputs": [], @@ -398,19 +467,27 @@ { "metadata": {}, "source": [ - "######## Point Annotation ########\n", - "\n", - "# Python Annotation\n", + "### Point" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python annotation\n", "point_annotation = lb_types.ObjectAnnotation(\n", - " name = \"point\", # must match your ontology feature's name\n", - " value = lb_types.Point(x=1166.606, y=1441.768),\n", + " name=\"point\", # must match your ontology feature\"s name\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", ")\n", "\n", "# NDJSON\n", "point_annotation_ndjson = {\n", - " 'name': 'point',\n", - " 'classifications': [],\n", - " 'point': {'x': 1166.606, 'y': 1441.768}\n", + " \"name\": \"point\",\n", + " \"classifications\": [],\n", + " \"point\": {\n", + " \"x\": 1166.606,\n", + " \"y\": 1441.768\n", + " }\n", "}" ], "cell_type": "code", @@ -420,50 +497,70 @@ { "metadata": {}, "source": [ - "###### Polygon ######\n", - "\n", - "\n", - "# Python Annotation \n", - "\n", + "### Polyline" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python annotation\n", "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name = \"polyline\", # must match your ontology feature's name\n", - " value=lb_types.Line( # Coordinates for the keypoints in your polyline\n", - " points=[lb_types.Point(x=2534.353, y=249.471), lb_types.Point(x=2429.492, y=182.092), lb_types.Point(x=2294.322, y=221.962), lb_types.Point(x=2224.491, y=180.463), lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949), lb_types.Point(x=1703.838, y=84.438), lb_types.Point(x=1579.772, y=82.61), lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903), lb_types.Point(x=1418.941, y=318.149), lb_types.Point(x=1243.128, y=400.815), lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216), lb_types.Point(x=670.273, y=364.408), lb_types.Point(x=613.114, y=288.16), lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064), lb_types.Point(x=246.557, y=167.286), lb_types.Point(x=236.648, y=285.61), lb_types.Point(x=90.929, y=326.412)]\n", - " ),\n", + " name=\"polyline\", # must match your ontology feature\"s name\n", + " value=lb_types.Line( # Coordinates for the keypoints in your polyline\n", + " points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412)\n", + " ]),\n", ")\n", "\n", "# NDJSON\n", "polyline_annotation_ndjson = {\n", - " 'name': 'polyline',\n", - " 'classifications': [],\n", - " 'line': [\n", - " {'x': 2534.353, 'y': 249.471},\n", - " {'x': 2429.492, 'y': 182.092},\n", - " {'x': 2294.322, 'y': 221.962},\n", - " {'x': 2224.491, 'y': 180.463},\n", - " {'x': 2136.123, 'y': 204.716},\n", - " {'x': 1712.247, 'y': 173.949},\n", - " {'x': 1703.838, 'y': 84.438},\n", - " {'x': 1579.772, 'y': 82.61},\n", - " {'x': 1583.442, 'y': 167.552},\n", - " {'x': 1478.869, 'y': 164.903},\n", - " {'x': 1418.941, 'y': 318.149},\n", - " {'x': 1243.128, 'y': 400.815},\n", - " {'x': 1022.067, 'y': 319.007},\n", - " {'x': 892.367, 'y': 379.216},\n", - " {'x': 670.273, 'y': 364.408},\n", - " {'x': 613.114, 'y': 288.16},\n", - " {'x': 377.559, 'y': 238.251},\n", - " {'x': 368.087, 'y': 185.064},\n", - " {'x': 246.557, 'y': 167.286},\n", - " {'x': 236.648, 'y': 285.61},\n", - " {'x': 90.929, 'y': 326.412}\n", + " \"name\": \"polyline\",\n", + " \"classifications\": [],\n", + " \"line\": [\n", + " {\"x\": 2534.353, \"y\": 249.471},\n", + " {\"x\": 2429.492, \"y\": 182.092},\n", + " {\"x\": 2294.322, \"y\": 221.962},\n", + " {\"x\": 2224.491, \"y\": 180.463},\n", + " {\"x\": 2136.123, \"y\": 204.716},\n", + " {\"x\": 1712.247, \"y\": 173.949},\n", + " {\"x\": 1703.838, \"y\": 84.438},\n", + " {\"x\": 1579.772, \"y\": 82.61},\n", + " {\"x\": 1583.442, \"y\": 167.552},\n", + " {\"x\": 1478.869, \"y\": 164.903},\n", + " {\"x\": 1418.941, \"y\": 318.149},\n", + " {\"x\": 1243.128, \"y\": 400.815},\n", + " {\"x\": 1022.067, \"y\": 319.007},\n", + " {\"x\": 892.367, \"y\": 379.216},\n", + " {\"x\": 670.273, \"y\": 364.408},\n", + " {\"x\": 613.114, \"y\": 288.16},\n", + " {\"x\": 377.559, \"y\": 238.251},\n", + " {\"x\": 368.087, \"y\": 185.064},\n", + " {\"x\": 246.557, \"y\": 167.286},\n", + " {\"x\": 236.648, \"y\": 285.61},\n", + " {\"x\": 90.929, \"y\": 326.412}\n", " ]\n", - "}\n" + "}" ], "cell_type": "code", "outputs": [], @@ -472,42 +569,74 @@ { "metadata": {}, "source": [ - "##### Relationship ##### \n", - "# only supported with NDJson and for MAL imports \n", + "### Relationship\n", + "> **NOTE:** \n", + "> Only supported for MAL imports" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=2096, y=1264),\n", + " end=lb_types.Point(x=2240, y=1689),\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=2272, y=1346),\n", + " end=lb_types.Point(x=2416, y=1704),\n", + " ),\n", + ")\n", + "\n", + "relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", "\n", + "## Only supported for MAL imports \n", "uuid_source = str(uuid.uuid4())\n", "uuid_target = str(uuid.uuid4())\n", "\n", - "bbox_source = {\n", - " 'name': 'bounding_box',\n", - " 'uuid': uuid_source,\n", - " 'bbox': {\n", - " \"top\": 177,\n", - " \"left\": 690,\n", - " \"height\": 130,\n", - " \"width\": 125\n", - " }\n", + "bbox_source_ndjson = {\n", + " \"uuid\": uuid_source,\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 1264.0,\n", + " \"left\": 2096.0,\n", + " \"height\": 425.0,\n", + " \"width\": 144.0\n", + " }\n", "}\n", "\n", - "bbox_target = {\n", - " 'name': 'bounding_box',\n", - " 'uuid': uuid_target,\n", - " 'bbox': {\n", - " \"top\": 277,\n", - " \"left\": 590,\n", - " \"height\": 230,\n", - " \"width\": 325\n", - " }\n", + "bbox_target_ndjson = {\n", + " \"uuid\": uuid_target,\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 1346.0,\n", + " \"left\": 2272.0,\n", + " \"height\": 358.0,\n", + " \"width\": 144.0\n", + " }\n", "}\n", "\n", - "relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", + "relationship_ndjson = {\n", + " \"name\": \"relationship\",\n", " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\"\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\"\n", " }\n", - "}\n" + "}" ], "cell_type": "code", "outputs": [], @@ -516,14 +645,14 @@ { "metadata": {}, "source": [ - "# Upload Annotations - putting it all together\n" + "# Uploading annotations - putting it all together\n" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "## Step 1: Import data rows into Catalog\n", + "## Step 1: Import data rows into catalog\n", "\n" ], "cell_type": "markdown" @@ -532,19 +661,21 @@ "metadata": {}, "source": [ "# send a sample image as batch to the project\n", - "global_key=\"2560px-Kitano_Street_Kobe01s5s41102.jpeg\"\n", + "global_key = \"2560px-Kitano_Street_Kobe01s5s41102.jpeg\"\n", "\n", "test_img_url = {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\" ,\n", - " \"global_key\": global_key\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key\n", "}\n", - " \n", "\n", - "dataset = client.create_dataset(name=\"demo_dataset_img\")\n", + "dataset = client.create_dataset(name=\"image-demo-dataset\")\n", "task = dataset.create_data_rows([test_img_url])\n", "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" + "\n", + "print(f\"Errors: {task.errors}\")\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")" ], "cell_type": "code", "outputs": [], @@ -553,12 +684,10 @@ { "metadata": {}, "source": [ - "## Step 2: Create/select an Ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", + "## Step 2: Create/select an ontology\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", - "For example, when we create the bounding box annotation above, we provided the `name` as `bounding_box`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `bounding_box`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", - "\n", - "\n" + "For example, when we created the bounding box annotation above, we provided the `name` as `bounding_box`. Now, when we setup our ontology, we must ensure that the name of the bounding box tool is also `bounding_box`. The same alignment must hold true for the other tools and classifications we create in our ontology." ], "cell_type": "markdown" }, @@ -566,93 +695,67 @@ "metadata": {}, "source": [ "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification( \n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\")\n", - " ]\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\")\n", - " ]\n", - " ), \n", - " lb.Classification( \n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\"first_radio_answer\",\n", - " options=[\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\")\n", + " ]),\n", + " lb.Classification(class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\")\n", + " ]),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")])\n", + " ])\n", + " ]),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")])\n", + " ])\n", + " ]),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")]\n", - " )\n", - " ]\n", - " )\n", - " ] \n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", - " options=[lb.Option(\"first_sub_checklist_answer\")]\n", - " )\n", - " ]\n", - " )\n", - " ]\n", - " ), \n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bounding_box\"), \n", - " lb.Tool( \n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_sub_radio_answer\")\n", - " ]\n", - " ),\n", - " ]\n", - " ), \n", - " lb.Tool( \n", - " tool=lb.Tool.Type.POLYGON,\n", - " name=\"polygon\"),\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.SEGMENTATION,\n", - " name=\"mask\"),\n", - " \t lb.Tool( \n", - " tool=lb.Tool.Type.POINT,\n", - " name=\"point\"), \n", - " lb.Tool( \n", - " tool=lb.Tool.Type.LINE,\n", - " name=\"polyline\"),\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.RELATIONSHIP,\n", - " name=\"relationship\")]\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Image Prediction Import Demo\", ontology_builder.asdict(), media_type=lb.MediaType.Image)" + " options=[lb.Option(value=\"first_sub_radio_answer\")]),\n", + " ]),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", + " lb.Tool(tool=lb.Tool.Type.SEGMENTATION, name=\"mask\"),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\")\n", + " ])\n", + "\n", + "ontology = client.create_ontology(\"Image Prediction Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image)" ], "cell_type": "code", "outputs": [], @@ -662,7 +765,6 @@ "metadata": {}, "source": [ "## Step 3: Create a labeling project\n", - "\n", "Connect the ontology to the labeling project" ], "cell_type": "markdown" @@ -670,14 +772,13 @@ { "metadata": {}, "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "project = client.create_project(name=\"image-demo-project\",\n", + " media_type=lb.MediaType.Image,\n", + " queue_mode=lb.QueueMode.Batch)\n", "\n", - "# create a project and configure the ontology \n", - "project = client.create_project(\n", - " name=\"annotations_import_project_demo\",\n", - " media_type=lb.MediaType.Image,\n", - " queue_mode=lb.QueueMode.Batch)\n", - "\n", - "project.setup_editor(ontology) # Connect your ontology and editor to your MAL project" + "project.setup_editor(ontology)" ], "cell_type": "code", "outputs": [], @@ -694,11 +795,14 @@ "metadata": {}, "source": [ "batch = project.create_batch(\n", - " \"Initial batch2\", # name of the batch\n", - " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=1 # priority between 1-5\n", + " \"image-demo-batch\", # each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # paginated collection of data row objects, list of data row ids or global keys\n", + " priority=1 # priority between 1(highest) - 5(lowest)\n", ")\n", - "print(\"Batch\", batch)" + "\n", + "print(f\"Batch: {batch}\")" ], "cell_type": "code", "outputs": [], @@ -718,7 +822,7 @@ { "metadata": {}, "source": [ - "### Python Annotations\n", + "### Python annotations\n", "\n", "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created." ], @@ -727,26 +831,24 @@ { "metadata": {}, "source": [ - "# create a Label\n", - "\n", - "label = []\n", - "\n", - "label.append(\n", - " lb_types.Label(\n", - " data=lb_types.ImageData(\n", - " global_key=global_key),\n", - " annotations = [\n", - " checklist_annotation, \n", - " text_annotation,\n", - " bbox_annotation, \n", - " bbox_with_radio_subclass_annotation, \n", - " polygon_annotation, \n", - " mask_annotation, \n", - " point_annotation, \n", - " polyline_annotation\n", - " ]\n", - " )\n", - ")\n" + "labels = []\n", + "annotations = [\n", + " radio_annotation,\n", + " checklist_annotation,\n", + " text_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " polygon_annotation,\n", + " mask_annotation,\n", + " point_annotation,\n", + " polyline_annotation,\n", + " bbox_source,\n", + " bbox_target,\n", + " relationship,\n", + "]\n", + "labels.append(\n", + " lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n", + " annotations=annotations))" ], "cell_type": "code", "outputs": [], @@ -763,26 +865,30 @@ { "metadata": {}, "source": [ - "label_ndjson = []\n", - "for annotation in [radio_annotation_ndjson, \n", - " checklist_annotation_ndjson, \n", - " text_annotation_ndjson,\n", - " bbox_annotation_ndjson, \n", - " bbox_with_radio_subclass_ndjson, \n", - " polygon_annotation_ndjson, \n", - " mask_annotation_ndjson, \n", - " point_annotation_ndjson, \n", - " polyline_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " relationship_annotation_ndjson, ## Only supported for MAL imports \n", - " bbox_source,\n", - " bbox_target\n", - " ]:\n", - " annotation.update({\n", - " 'dataRow': {'globalKey':global_key},\n", - " })\n", - " label_ndjson.append(annotation)\n" + "ndjson_labels = []\n", + "annotations = [\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " bbox_with_radio_subclass_ndjson,\n", + " polygon_annotation_ndjson,\n", + " mask_annotation_ndjson,\n", + " point_annotation_ndjson,\n", + " polyline_annotation_ndjson,\n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " relationship_ndjson, ## Only supported for MAL imports \n", + "]\n", + "for annotation in annotations:\n", + " annotation.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " ndjson_labels.append(annotation)" ], "cell_type": "code", "outputs": [], @@ -791,31 +897,32 @@ { "metadata": {}, "source": [ - "## Step 6: Upload annotations to a project as pre-labels or complete labels" + "## Step 6: Upload annotations to a project as pre-labels or ground truth\n", + "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "### Model-Assisted Labeling\n", - "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)\n" + "Option A: Upload to a labeling project as pre-labels (MAL)" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# Upload MAL label for this data row in project\n", + "# upload MAL labels for this data row in project\n", "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=\"mal_job\"+str(uuid.uuid4()), \n", - " predictions=label_ndjson)\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_job\" + str(uuid.uuid4()),\n", + " predictions=labels\n", + ")\n", + "upload_job.wait_until_done()\n", "\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)\n", - "print(\" \")" + "print(f\"Errors: {upload_job.errors}\", )\n", + "print(f\"Status of uploads: {upload_job.statuses}\")" ], "cell_type": "code", "outputs": [], @@ -824,23 +931,26 @@ { "metadata": {}, "source": [ - "### Label Import\n" + "Option B: Upload to a labeling project using ground truth" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ + "# Uncomment if relationships are not being imported. \n", + "# Relationships will be supported for label import in the near future. \n", + "\n", "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=\"label_import_job\"+str(uuid.uuid4()), \n", - " labels=label_ndjson)\n", - "\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)\n", - "print(\" \")" + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client, \n", + "# project_id = project.uid, \n", + "# name=\"label_import_job\"+str(uuid.uuid4()), \n", + "# labels=ndjson_labels)\n", + "\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)\n", + "# print(\" \")" ], "cell_type": "code", "outputs": [], diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 92bb2337d..be4b0ef69 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -41,10 +41,12 @@ "Supported annotations for PDF assets \n", "\n", "*Annotation types*\n", - "- Checklist classification \n", - "- Radio classifications \n", + "- Checklist classification (including nested classifications)\n", + "- Radio classifications (including nested classifications)\n", "- Free text classifications\n", + "- Bounding box\n", "- Entities\n", + "- Relationships (only supported for MAL imports)\n", "\n", "\n", "*NDJson*\n", @@ -52,7 +54,8 @@ "- Radio classifications (including nested classifications)\n", "- Free text classifications\n", "- Bounding box \n", - "- Entities " + "- Entities \n", + "- Relationships (only supported for MAL imports)" ], "cell_type": "markdown" }, @@ -66,7 +69,7 @@ { "metadata": {}, "source": [ - "!pip install -q 'labelbox[data]'" + "!pip install -q \"labelbox[data]\"" ], "cell_type": "code", "outputs": [], @@ -162,8 +165,8 @@ ")\n", "# NDJSON\n", "radio_annotation_ndjson = {\n", - " 'name': 'radio_question',\n", - " 'answer': {'name': 'first_radio_answer'}\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\"name\": \"first_radio_answer\"}\n", "}" ], "cell_type": "code", @@ -187,10 +190,10 @@ "\n", "# NDJSON\n", "checklist_annotation_ndjson = {\n", - " 'name': 'checklist_question',\n", - " 'answer': [\n", - " {'name': 'first_checklist_answer'},\n", - " {'name': 'second_checklist_answer'}\n", + " \"name\": \"checklist_question\",\n", + " \"answer\": [\n", + " {\"name\": \"first_checklist_answer\"},\n", + " {\"name\": \"second_checklist_answer\"}\n", " ]\n", "}" ], @@ -203,16 +206,26 @@ "source": [ "############ Bounding Box ###########\n", "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top \n", + " end=lb_types.Point(x=518.571, y=245.143), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS\n", + " )\n", + " )\n", + "\n", "bbox_annotation_ndjson = {\n", - " 'name': 'bounding_box',\n", - " 'bbox': {\n", - " \"top\": 42.799,\n", - " \"left\": 86.498,\n", - " \"height\": 141.911,\n", - " \"width\": 303.195\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 135.3,\n", + " \"left\": 102.771,\n", + " \"height\": 109.843,\n", + " \"width\": 415.8\n", " },\n", - " 'page': 0,\n", - " 'unit': \"POINTS\"\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\"\n", "}" ], "cell_type": "code", @@ -222,7 +235,25 @@ { "metadata": {}, "source": [ - "# ############ nested classifications ###########\n", + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", "\n", "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", @@ -237,13 +268,33 @@ " }]\n", "}\n", "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")\n", + "\n", + "\n", "nested_radio_annotation_ndjson = {\n", - " 'name': 'nested_radio_question',\n", - " 'answer': {\n", - " 'name': 'first_radio_answer',\n", - " 'classifications': [{\n", - " 'name':'sub_radio_question',\n", - " 'answer': { 'name' : 'first_sub_radio_answer'}\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n", " }]\n", " }\n", "}\n", @@ -259,14 +310,14 @@ "############## Classification Free-form text ############## \n", "\n", "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature's name\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", " value=lb_types.Text(answer=\"sample text\")\n", ")\n", "\n", "\n", "text_annotation_ndjson = {\n", - " 'name': 'free_text',\n", - " 'answer': 'sample text'\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\"\n", "}" ], "cell_type": "code", @@ -278,22 +329,61 @@ "source": [ "######### BBOX with nested classifications #########\n", "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top \n", + " end=lb_types.Point(x=566.657, y=420.986), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1\n", + " ),\n", + " classifications=[\n", + " \tlb_types.ClassificationAnnotation(\n", + " \tname=\"sub_radio_question\",\n", + " \t\tvalue=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + " )\n", + " ]\n", + ")\n", + "\n", "bbox_with_radio_subclass_annotation_ndjson = {\n", - " 'name': 'bbox_with_radio_subclass',\n", - " 'classifications': [\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [\n", " {\n", - " 'name': 'sub_radio_question',\n", - " 'answer': {'name': 'first_sub_radio_answer'}\n", + " \"name\": \"sub_radio_question\", \n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\", \n", + " \"classifications\": [\n", + " {\n", + " \"name\": \"second_sub_radio_question\", \n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\"}\n", + " }\n", + " ]\n", + " }\n", " }\n", " ],\n", - " 'bbox': {\n", - " \"top\": 214.894,\n", - " \"left\": 189.215,\n", - " \"height\": 264,\n", - " \"width\": 240.573\n", + " \"bbox\": {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386\n", " },\n", - " 'page': 1,\n", - " 'unit': \"POINTS\"\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\"\n", "}" ], "cell_type": "code", @@ -305,21 +395,45 @@ "source": [ "############ NER with nested classifications ######## \n", "\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(\n", + " token_ids=[],\n", + " group_id=\"\",\n", + " page=1\n", + " )\n", + " ]\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")]\n", + " )\n", + " )\n", + " ]\n", + ")\n", + "\n", + "\n", "ner_with_checklist_subclass_annotation_ndjson = {\n", - " 'name': 'ner_with_checklist_subclass',\n", - " 'classifications':[\n", + " \"name\": \"ner_with_checklist_subclass\",\n", + " \"classifications\":[\n", " {\n", - " 'name': 'sub_checklist_question',\n", - " 'answer': [{'name': 'first_sub_checklist_answer'}] \n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\"name\": \"first_sub_checklist_answer\"}] \n", " }\n", " ],\n", - " 'textSelections': [\n", + " \"textSelections\": [\n", " {\n", " \"tokenIds\": [\n", - " \"\",\n", + " \"\"\n", " ],\n", " \"groupId\": \"\",\n", - " \"page\": 1,\n", + " \"page\": 1\n", " }\n", " ] \n", "}\n", @@ -329,6 +443,170 @@ "outputs": [], "execution_count": null }, + { + "metadata": {}, + "source": [ + "######### Relationships ########## \n", + "entity_source = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value= lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(\n", + " token_ids=[],\n", + " group_id=\"\",\n", + " page=1\n", + " )\n", + " ]\n", + " )\n", + ")\n", + "\n", + "entity_target = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(\n", + " token_ids=[],\n", + " group_id=\"\",\n", + " page=1\n", + " )\n", + " ]\n", + " )\n", + ")\n", + "\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", + "\n", + "## Only supported for MAL imports \n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "entity_source_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"uuid\": uuid_source,\n", + " \"textSelections\": [\n", + " {\n", + " \"tokenIds\": [\n", + " \"\"\n", + " ],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }\n", + " ]\n", + " \n", + "}\n", + "\n", + "entity_target_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"uuid\": uuid_target,\n", + " \"textSelections\": [\n", + " {\n", + " \"tokenIds\": [\n", + " \"\"\n", + " ],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }\n", + " ]\n", + "}\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\", \n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\"\n", + " }\n", + "}\n", + "\n", + "\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "######### BBOX with relationships #############\n", + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top \n", + " end=lb_types.Point(x=270.907, y=149.556), # x = left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=96.424, y=66.251),\n", + " end=lb_types.Point(x=179.074, y=146.932),\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1\n", + " ),\n", + ")\n", + "\n", + "bbox_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", + "\n", + "\n", + "## Only supported for MAL imports \n", + "uuid_source_2 = str(uuid.uuid4())\n", + "uuid_target_2 = str(uuid.uuid4())\n", + "\n", + "bbox_source_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_source_2,\n", + " \"bbox\": {\n", + " \"top\": 68.875,\n", + " \"left\": 188.257,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\"\n", + "}\n", + "\n", + "bbox_target_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_target_2,\n", + " \"bbox\": {\n", + " \"top\": 66.251,\n", + " \"left\": 96.424,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\"\n", + "}\n", + "\n", + "bbox_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\", \n", + " \"relationship\": {\n", + " \"source\": uuid_source_2,\n", + " \"target\": uuid_target_2,\n", + " \"type\": \"unidirectional\"\n", + " }\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": [ @@ -417,10 +695,9 @@ " name=\"sub_radio_question\",\n", " options=[lb.Option(\"first_sub_radio_answer\")]\n", " )\n", - " ]\n", - " )\n", - " ] \n", - " ),\n", + " ])\n", + " ]\n", + " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", " name=\"nested_checklist_question\",\n", @@ -433,45 +710,43 @@ " name=\"sub_checklist_question\", \n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", - " ]\n", - " )\n", + " ])\n", " ]\n", " ), \n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bounding_box\"), \n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER, \n", - " name=\"named_entity\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER, \n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_sub_checklist_answer\")\n", - " ]\n", - " )\n", - " ]\n", - " ),\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"), \n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP,name=\"relationship\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", " options=[\n", - " lb.Option(value=\"first_sub_radio_answer\")\n", + " lb.Option(value=\"first_sub_checklist_answer\")\n", " ]\n", " )\n", - " ]\n", - " )\n", - " ]\n", + " ]),\n", + " lb.Tool( tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\" ,\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[lb.Option(\"second_sub_radio_answer\")]\n", + " )]\n", + " )]\n", + " )]\n", + " )]\n", ")\n", "\n", "ontology = client.create_ontology(\"Document Annotation Import Demo\",\n", @@ -537,7 +812,10 @@ { "metadata": {}, "source": [ - "##### First, we need to populate the text selections for Entity annotations" + "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", + "To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", + "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", + "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n" ], "cell_type": "markdown" }, @@ -547,65 +825,144 @@ "import requests\n", "import json\n", "\n", - "\n", - "# To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", - "# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", - "# https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n", + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [\n", + " {\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }\n", + " ]\n", + " })\n", + " \n", "\n", "text_layer = \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", "\n", "# Fetch the content of the text layer\n", "res = requests.get(text_layer) \n", "\n", - "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\" , \n", + " \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\", \n", + " \"Organic charge transfer salts based on the donor\",\n", + " \"the experimental investigations on this issue have not\"]\n", "\n", "# Parse the text layer\n", "text_selections = []\n", + "text_selections_ner = []\n", + "text_selections_source = []\n", + "text_selections_target = []\n", + "\n", "for obj in json.loads(res.text):\n", - " for group in obj['groups']: \n", - " # Find the text group that we are interested in annotating\n", - " if group['content'] == \"Metal-insulator (MI) transitions have been one of the\":\n", - " # We now need all the tokens associated with each word in this text group\n", - " list_tokens = [x['id'] for x in group['tokens']]\n", - " # build text selections for Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotation\n", - " entities_annotations_ndjson.update(\n", - " {\n", - " \"textSelections\": [\n", - " {\n", - " \"groupId\": group['id'], #id associated with the group of words\n", - " \"tokenIds\": list_tokens, #id associated with each word in a sentence group\n", - " \"page\": 1,\n", - " }\n", - " ]}\n", - " )\n", - " if group['content'] == \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\":\n", - " list_tokens_2 = [x['id'] for x in group['tokens']]\n", - " ner_with_checklist_subclass_annotation_ndjson.update(\n", - " {\n", - " \"textSelections\": [\n", - " {\n", - " \"groupId\": group['id'], #id associated with the group of words\n", - " \"tokenIds\": list_tokens_2, #id associated with each word in a sentence group\n", - " \"page\": 1,\n", - " }\n", - " ]\n", - " }\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(annotation=entities_annotations_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words \n", + " list_tokens=list_tokens, # ids representing individual words from the group\n", + " page=1)\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(annotation=ner_with_checklist_subclass_annotation_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words \n", + " list_tokens=list_tokens_2, # ids representing individual words from the group\n", + " page=1)\n", + " if group[\"content\"] == content_phrases[2]:\n", + " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_source = lb_types.DocumentTextSelection(groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", + " text_selections_source.append(text_selection_entity_source)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(annotation=entity_source_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words \n", + " list_tokens=relationship_source, # ids representing individual words from the group\n", + " page=1)\n", + " if group[\"content\"] == content_phrases[3]:\n", + " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_target = lb_types.DocumentTextSelection(group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", + " text_selections_target.append(text_selection_entity_target)\n", + " # build text selections forthe NDJson annotations\n", + " update_text_selections(annotation=entity_target_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words \n", + " list_tokens=relationship_target, # ids representing individual words from the group\n", + " page=1)\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "Re-write the python annotations to include text selections (only required for python annotation types)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#re-write the entity annotation with text selections \n", + "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", textSelections = text_selections)\n", + "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",value=entities_annotation_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections \n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")]\n", " )\n", - " \n", - "# re-write the entity annotation with text selections (annotation types)\n", - "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n", - " textSelections = text_selections)\n", - "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entities_annotation_document_entity)\n", - "\n", - " \n", + " )\n", + " ]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\", textSelections= text_selections_ner)\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\", \n", + " value=ner_annotation_with_subclass, \n", + " classifications=classifications)\n", + "\n", + "#re-write the entity source and target annotations withe text selectios\n", + "entity_source_doc = lb_types.DocumentEntity(name=\"named_entity\", text_selections= text_selections_source)\n", + "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\", value=entity_source_doc)\n", + "\n", + "entity_target_doc = lb_types.DocumentEntity(name=\"named_entity\", text_selections=text_selections_target)\n", + "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\", value=entity_target_doc)\n", + "\n", + "# re-write the entity relationship with the re-created entities \n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", + " " + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Final NDJSON and python annotations \n", "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", "print(f\"entities_annotation={entities_annotation}\")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation_ndjson}\")\n", - " " + "print(f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", + "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", + "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", + "print(f\"entity_source={entity_source}\")\n", + "print(f\"entity_target={entity_target}\")" ], "cell_type": "code", "outputs": [], @@ -622,8 +979,6 @@ { "metadata": {}, "source": [ - "\n", - "\n", "labels = []\n", "\n", "labels.append(\n", @@ -632,9 +987,20 @@ " global_key=global_key),\n", " annotations = [\n", " entities_annotation,\n", - " checklist_annotation, \n", + " checklist_annotation,\n", + " nested_checklist_annotation, \n", " text_annotation,\n", - " radio_annotation\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " entity_source, \n", + " entity_target, \n", + " entity_relationship,# Only supported for MAL imports \n", + " bbox_source,\n", + " bbox_target,\n", + " bbox_relationship # Only supported for MAL imports \n", " ]\n", " )\n", ")" @@ -658,17 +1024,23 @@ "label_ndjson = []\n", "for annot in [\n", " entities_annotations_ndjson,\n", - " bbox_annotation_ndjson,\n", - " text_annotation_ndjson,\n", " checklist_annotation_ndjson,\n", " nested_checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", " bbox_with_radio_subclass_annotation_ndjson,\n", " ner_with_checklist_subclass_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " radio_annotation_ndjson\n", + " entity_source_ndjson, \n", + " entity_target_ndjson, \n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports \n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " bbox_relationship_annotation_ndjson # Only supported for MAL imports \n", " ]:\n", " annot.update({\n", - " 'dataRow': {'globalKey': global_key},\n", + " \"dataRow\": {\"globalKey\": global_key},\n", " })\n", " label_ndjson.append(annot)\n", "\n" @@ -680,7 +1052,8 @@ { "metadata": {}, "source": [ - "### Step 6: Import the annotation payload" + "### Step 6: Import the annotation payload\n", + "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." ], "cell_type": "markdown" }, @@ -698,7 +1071,7 @@ " client = client,\n", " project_id = project.uid,\n", " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", - " predictions=label_ndjson)\n", + " predictions=labels)\n", "\n", "upload_job.wait_until_done()\n", "# Errors will appear for annotation uploads that failed.\n", @@ -719,15 +1092,17 @@ { "metadata": {}, "source": [ + "# Uncomment this code when excluding relationships from label import\n", + "## Relationships are not currently supported for label import\n", "\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=\"label_import_job\"+str(uuid.uuid4()), \n", - " labels=label_ndjson)\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client, \n", + "# project_id = project.uid, \n", + "# name=\"label_import_job\"+str(uuid.uuid4()), \n", + "# labels=labels)\n", "\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" ], "cell_type": "code", "outputs": [], diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb index ef0abc9bc..6ca15a992 100644 --- a/examples/annotation_import/text.ipynb +++ b/examples/annotation_import/text.ipynb @@ -39,6 +39,7 @@ "* Classification radio \n", "* Classification checklist \n", "* Classification free-form text \n", + "* Relationships (Only supported for MAL and through the SDK)\n", "\n", "**Not** supported:\n", "* Segmentation mask\n", @@ -75,7 +76,7 @@ { "metadata": {}, "source": [ - "!pip install -q 'labelbox[data]'" + "!pip install -q \"labelbox[data]\"" ], "cell_type": "code", "outputs": [], @@ -165,8 +166,8 @@ "\n", "# NDJSON\n", "radio_annotation_ndjson = {\n", - " 'name': 'radio_question',\n", - " 'answer': {'name': 'first_radio_answer'}\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\"name\": \"first_radio_answer\"}\n", "} " ], "cell_type": "code", @@ -176,20 +177,55 @@ { "metadata": {}, "source": [ - "########## Classification - Radio and Checklist (with subclassifcations) is only suppported with NDJSON tools ##########\n", + "########## Classification - Radio and Checklist (with subclassifcations) ##########\n", "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")\n", "# NDJSON\n", "nested_radio_annotation_ndjson= {\n", - " 'name': 'nested_radio_question',\n", - " 'answer': {\n", - " 'name': 'first_radio_answer',\n", - " 'classifications': [{\n", - " 'name':'sub_radio_question',\n", - " 'answer': { 'name' : 'first_sub_radio_answer'}\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n", " }]\n", " }\n", "}\n", "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", @@ -225,11 +261,11 @@ "\n", "# NDJSON\n", "checklist_annotation_ndjson = {\n", - " 'name': 'checklist_question',\n", - " 'answer': [\n", - " {'name': 'first_checklist_answer'},\n", - " {'name': 'second_checklist_answer'},\n", - " {'name': 'third_checklist_answer'},\n", + " \"name\": \"checklist_question\",\n", + " \"answer\": [\n", + " {\"name\": \"first_checklist_answer\"},\n", + " {\"name\": \"second_checklist_answer\"},\n", + " {\"name\": \"third_checklist_answer\"},\n", " ]\n", "}" ], @@ -250,8 +286,72 @@ "\n", "# NDJSON\n", "text_annotation_ndjson = {\n", - " 'name': 'free_text',\n", - " 'answer': 'sample text',\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "##### Relationship ##### \n", + "# only supported for MAL imports \n", + "ner_source = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.TextEntity(\n", + " start=133, \n", + " end=140\n", + " )\n", + ")\n", + "\n", + "ner_target = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.TextEntity(\n", + " start=143,\n", + " end=159\n", + " )\n", + ")\n", + "\n", + "ner_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=ner_source,\n", + " target=ner_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ))\n", + "\n", + "\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "entity_source_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"uuid\": uuid_source,\n", + " \"location\": {\n", + " \"start\" : 133,\n", + " \"end\": 140 \n", + " }\n", + "}\n", + "\n", + "entity_target_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"uuid\": uuid_target,\n", + " \"location\": {\n", + " \"start\": 143,\n", + " \"end\": 159\n", + " }\n", + "}\n", + "\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\", \n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\"\n", + " }\n", "}" ], "cell_type": "code", @@ -276,7 +376,7 @@ "metadata": {}, "source": [ "# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file \n", - "global_key = 'lorem-ipsum.txt'\n", + "global_key = \"lorem-ipsum.txt\"\n", "text_asset = {\n", " \"row_data\": \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n", " \"global_key\": global_key,\n", @@ -365,12 +465,18 @@ " )\n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.NER, \n", - " name=\"named_entity\")\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER, \n", + " name=\"named_entity\"\n", + " ),\n", + " lb.Tool( \n", + " tool=lb.Tool.Type.RELATIONSHIP,\n", + " name=\"relationship\"\n", + " )\n", " ]\n", ")\n", "\n", - "ontology = client.create_ontology(\"Ontology Text Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Text)\n" + "ontology = client.create_ontology(\"Ontology Text Annotations\", ontology_builder.asdict())\n" ], "cell_type": "code", "outputs": [], @@ -448,8 +554,8 @@ "metadata": {}, "source": [ "# Create a Label\n", - "label = []\n", - "label.append(\n", + "labels = []\n", + "labels.append(\n", " lb_types.Label(\n", " data=lb_types.TextData(\n", " global_key=global_key),\n", @@ -457,7 +563,12 @@ " named_entitity_annotation, \n", " radio_annotation, \n", " checklist_annotation, \n", - " text_annotation\n", + " text_annotation,\n", + " ner_source,\n", + " ner_target,\n", + " ner_relationship,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation\n", " ]\n", " )\n", ")" @@ -482,10 +593,13 @@ " checklist_annotation_ndjson,\n", " text_annotation_ndjson,\n", " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson\n", + " nested_checklist_annotation_ndjson,\n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson,\n", " ] :\n", " annotations.update({\n", - " 'dataRow': { 'globalKey': global_key }\n", + " \"dataRow\": { \"globalKey\": global_key }\n", " }) \n", " label_ndjson.append(annotations)" ], @@ -518,7 +632,7 @@ " client = client, \n", " project_id = project.uid, \n", " name=\"mal_import_job\"+str(uuid.uuid4()), \n", - " predictions=label_ndjson)\n", + " predictions=labels)\n", "\n", "upload_job_mal.wait_until_done();\n", "print(\"Errors:\", upload_job_mal.errors)\n", @@ -538,16 +652,19 @@ { "metadata": {}, "source": [ - "# Upload label for this data row in project\n", - "upload_job_label_import = lb.LabelImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=\"label_import_job\"+str(uuid.uuid4()), \n", - " labels=label_ndjson)\n", + "# Uncomment if relationships are not being imported. \n", + "# Relationships will be supported for label import in the near future. \n", + "\n", + "# # Upload label for this data row in project \n", + "# upload_job_label_import = lb.LabelImport.create_from_objects(\n", + "# client = client, \n", + "# project_id = project.uid, \n", + "# name=\"label_import_job\"+str(uuid.uuid4()), \n", + "# labels=labels)\n", "\n", - "upload_job_label_import.wait_until_done();\n", - "print(\"Errors:\", upload_job_label_import.errors)\n", - "print(\"Status of uploads: \", upload_job_label_import.statuses)" + "# upload_job_label_import.wait_until_done();\n", + "# print(\"Errors:\", upload_job_label_import.errors)\n", + "# print(\"Status of uploads: \", upload_job_label_import.statuses)" ], "cell_type": "code", "outputs": [], diff --git a/examples/annotation_import/video.ipynb b/examples/annotation_import/video.ipynb index 68e841880..650916bd5 100644 --- a/examples/annotation_import/video.ipynb +++ b/examples/annotation_import/video.ipynb @@ -40,9 +40,9 @@ " * Polyline \n", " * Radio classifications \n", " * Checklist classifications \n", + " * Segmentation masks\n", "* **NOT** supported:\n", " * Polygons \n", - " * Segmentation masks\n", " * Free form text classifications\n", "\n", "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets." @@ -59,7 +59,7 @@ { "metadata": {}, "source": [ - "!pip install -q 'labelbox[data]'" + "!pip install -q \"labelbox[data]\"" ], "cell_type": "code", "outputs": [], @@ -106,7 +106,8 @@ "metadata": {}, "source": [ "######## Bounding box ###########\n", - "# Python Annotation \n", + "\n", + "# Python Annotation\n", "bbox_annotation = [\n", " lb_types.VideoObjectAnnotation(\n", " name = \"bbox_video\", \n", @@ -114,8 +115,8 @@ " frame=13,\n", " segment_index=0,\n", " value = lb_types.Rectangle(\n", - " start=lb_types.Point(x=146.0, y=98.0), # Top left\n", - " end=lb_types.Point(x=382.0, y=341.0), # Bottom right\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0),# x= left + width , y = top + height\n", " )\n", " ),\n", " lb_types.VideoObjectAnnotation(\n", @@ -124,8 +125,8 @@ " frame=15,\n", " segment_index=0,\n", " value = lb_types.Rectangle(\n", - " start=lb_types.Point(x=146.0, y=98.0), # Top left\n", - " end=lb_types.Point(x=382.0, y=341.0), # Bottom right\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0), # x= left + width , y = top + height\n", " )\n", " ),\n", " lb_types.VideoObjectAnnotation(\n", @@ -134,8 +135,8 @@ " frame=19,\n", " segment_index=0,\n", " value = lb_types.Rectangle(\n", - " start=lb_types.Point(x=146.0, y=98.0), # Top left\n", - " end=lb_types.Point(x=382.0, y=341.0), # Bottom right\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0), # x= left + width , y = top + height\n", " )\n", " )\n", "]\n", @@ -187,6 +188,7 @@ "source": [ "######## Point ########\n", "\n", + "# Python Annotation\n", "point_annotation = [\n", " lb_types.VideoObjectAnnotation(\n", " name = \"point_video\",\n", @@ -196,7 +198,7 @@ " )\n", "]\n", "\n", - "#NDJSON\n", + "# NDJSON\n", "point_annotation_ndjson = {\n", " \"name\": \"point_video\", \n", " \"segments\": [{\n", @@ -218,6 +220,8 @@ "metadata": {}, "source": [ "######## Polyline ########\n", + "\n", + "# Python Annotation\n", "polyline_annotation = [\n", " lb_types.VideoObjectAnnotation(\n", " name = \"line_video_frame\",\n", @@ -267,7 +271,7 @@ " \n", "]\n", "\n", - "\n", + "# NDJSON\n", "polyline_frame_annotation_ndjson = {\n", " \"name\": \"line_video_frame\", \n", " \"segments\": [\n", @@ -348,9 +352,9 @@ { "metadata": {}, "source": [ - "######## classifications ########\n", - "\n", + "######## Frame base classifications ########\n", "\n", + "# Python Annotation\n", "radio_annotation = [\n", " lb_types.VideoClassificationAnnotation(\n", " name=\"radio_class\", \n", @@ -420,10 +424,9 @@ "\n", "\n", "## NDJSON\n", - "\n", "frame_radio_classification_ndjson = {\n", " \"name\": \"radio_class\", \n", - " \"answer\": { \"name\": \"first_radio_answer\", \"frames\": [{\"start\": 9, \"end\": 15}] }\n", + " \"answer\": { \"name\": \"first_radio_answer\", \"frames\": [{\"start\": 9, \"end\": 15}]}\n", "}\n", "\n", "## frame specific\n", @@ -433,10 +436,52 @@ " { \"name\": \"first_checklist_answer\" , \"frames\": [{\"start\": 29, \"end\": 35 }]},\n", " { \"name\": \"second_checklist_answer\", \"frames\": [{\"start\": 39, \"end\": 45 }]} \n", " ] \n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "##### Global Classifications ####### \n", + "\n", + "# Python Annotation\n", + "## For global classifications use ClassificationAnnotation\n", + "global_radio_annotation = [lb_types.ClassificationAnnotation(\n", + " name=\"radio_class_global\",\n", + " value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = \"first_radio_answer\"))\n", + ")]\n", + "\n", + "global_checklist_annotation=[lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_global\", \n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\"\n", + " ), \n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\"\n", + " )\n", + " ]\n", + " )\n", + " )]\n", + "\n", + "# NDJSON\n", + "global_radio_classification_ndjson = {\n", + " \"name\": \"radio_class_global\", \n", + " \"answer\": { \"name\": \"first_radio_answer\"}\n", "}\n", "\n", "\n", - "\n" + "global_checklist_classification_ndjson = {\n", + " \"name\": \"checklist_class_global\", \n", + " \"answer\": [\n", + " { \"name\": \"first_checklist_answer\" },\n", + " { \"name\": \"second_checklist_answer\"} \n", + " ] \n", + "}" ], "cell_type": "code", "outputs": [], @@ -447,14 +492,55 @@ "source": [ "########## Nested Global Classification ########### \n", "\n", - "nested_radio_classification = {\n", - " 'name': 'radio_question_nested',\n", - " 'answer': {'name': 'first_radio_question'},\n", - " 'classifications' : [\n", - " {'name': 'sub_question_radio', 'answer': {'name': 'sub_answer'}}\n", + "# Python Annotation\n", + "nested_radio_annotation =[lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")]\n", + "\n", + "# NDJSON\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\"name\": \"first_radio_answer\"},\n", + " \"classifications\" : [\n", + " {\"name\": \"sub_radio_question\", \"answer\": {\"name\": \"first_sub_radio_answer\"}}\n", " ]\n", "}\n", "\n", + "# Python Annotation\n", + "nested_checklist_annotation = [lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")]\n", + "\n", + "# NDJSON\n", "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", @@ -476,11 +562,55 @@ "metadata": {}, "source": [ "########## Classifications under frame base tools ##########\n", - "\n", - "# Frame base nested classifications do not support using the feature's name to extract ontology features. \n", - "# For this single case we are going to use the classification's featureSchemaId and the answers' featureSchemaId \n", - "# We will update the annotation object with the featureSchemaIds on step 5 after we create the ontology in step 2\n", - "\n", + "# Python Annotation \n", + "frame_bbox_with_checklist_subclass = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name = \"bbox_class\", \n", + " keyframe=True,\n", + " frame=10,\n", + " segment_index=0,\n", + " value = lb_types.Rectangle(\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0),# x= left + width , y = top + height\n", + " )\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name = \"bbox_class\", \n", + " keyframe=True,\n", + " frame=11,\n", + " segment_index=0,\n", + " value = lb_types.Rectangle(\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name='bbox_radio',\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name='bbox_radio_answer_1',\n", + " ))\n", + " )\n", + " ]\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name = \"bbox_class\", \n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value = lb_types.Rectangle(\n", + " start=lb_types.Point(x=146.0, y=98.0), # x = left, y = top \n", + " end=lb_types.Point(x=382.0, y=341.0), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name='bbox_radio',\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name='bbox_radio_answer_2',\n", + " ))\n", + " )\n", + " ]\n", + " )\n", + "]\n", "\n", "frame_bbox_with_checklist_subclass_ndjson = {\n", " \"name\": \"bbox_class\",\n", @@ -494,9 +624,12 @@ " \"height\": 382.0,\n", " \"width\": 341.0\n", " },\n", - " \"classifications\" : [\n", - " {'schemaId' : '', 'answer' : {'schemaId': '' }}\n", - " ] \n", + " \"classifications\": [\n", + " {\n", + " \"name\": \"bbox_radio\",\n", + " \"answer\": {\"name\": \"bbox_radio_answer_1\"}\n", + " }\n", + " ]\n", " },\n", " { \n", " \"frame\": 11,\n", @@ -506,9 +639,6 @@ " \"height\": 382.0,\n", " \"width\": 341.0\n", " },\n", - " \"classifications\" : [\n", - " {'schemaId' : '', 'answer' : {'schemaId': '' }}\n", - " ] \n", " },\n", " { \n", " \"frame\": 13,\n", @@ -518,9 +648,6 @@ " \"height\": 382.0,\n", " \"width\": 341.0\n", " },\n", - " \"classifications\" : [\n", - " {'schemaId' : '', 'answer' : {'schemaId': '' }}\n", - " ] \n", " }\n", " ]\n", " }\n", @@ -531,6 +658,46 @@ "outputs": [], "execution_count": null }, + { + "metadata": {}, + "source": [ + "##### Raster Segmentation ########\n", + "\n", + "instance_uri = \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/mask_example.png\"\n", + "\n", + "\n", + "\n", + "video_mask_annotation=[\n", + " lb_types.VideoMaskAnnotation(\n", + " frames=[\n", + " lb_types.MaskFrame(index=10, instance_uri=instance_uri)\n", + " ],\n", + " instances=[\n", + " lb_types.MaskInstance(color_rgb=(255,255,255), name=\"video_mask\")\n", + " ] \n", + " )\n", + "]\n", + "\n", + "## This works\n", + "video_mask_annotation_ndjson = {\n", + " \"masks\": {\n", + " \"frames\": [{\n", + " \"index\": 10,\n", + " \"instanceURI\": instance_uri\n", + " }],\n", + " \"instances\": [\n", + " {\n", + " \"colorRGB\": (255, 255, 255),\n", + " \"name\": \"video_mask\",\n", + " }\n", + " ]\n", + " }\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": [ @@ -553,7 +720,7 @@ " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\", \n", " \"global_key\": global_key,\n", " \"media_type\": \"VIDEO\"\n", - "}\n", + " }\n", "\n", "dataset = client.create_dataset(name=\"video_demo_dataset\")\n", "task = dataset.create_data_rows([asset])\n", @@ -586,6 +753,7 @@ " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n", " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n", " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n", " lb.Tool(\n", " tool=lb.Tool.Type.BBOX, name=\"bbox_class\",\n", " classifications=[\n", @@ -623,14 +791,14 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question_nested\",\n", + " name=\"nested_radio_question\",\n", " options=[\n", - " lb.Option(\"first_radio_question\",\n", + " lb.Option(\"first_radio_answer\",\n", " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_question_radio\",\n", - " options=[lb.Option(\"sub_answer\")]\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")]\n", " )\n", " ]\n", " )\n", @@ -651,10 +819,28 @@ " )\n", " ]\n", " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO, \n", + " name=\"radio_class_global\",\n", + " options=[ \n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\")\n", + " ]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class_global\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\")\n", + " ]\n", + " )\n", " ] \n", ")\n", "\n", - "ontology = client.create_ontology(\"Ontology Video Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Video)" + "ontology = client.create_ontology(\"Ontology Video Annotations\", \n", + " ontology_builder.asdict(), \n", + " media_type=lb.MediaType.Video)" ], "cell_type": "code", "outputs": [], @@ -700,7 +886,7 @@ "# Create a batch to send to your MAL project\n", "batch = project.create_batch(\n", " \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n", - " global_keys=[global_key], # A paginated collection of data row objects\n", + " global_keys=[global_key], # A paginated collection of data row objects, a list of data rows or global keys\n", " priority=5 # priority between 1(Highest) - 5(lowest)\n", ")\n", "\n", @@ -730,23 +916,29 @@ { "metadata": {}, "source": [ - "label = []\n", + "labels = []\n", "annotations_list = [\n", " checklist_annotation, \n", " radio_annotation,\n", " bbox_annotation, \n", + " frame_bbox_with_checklist_subclass,\n", " point_annotation, \n", - " polyline_annotation\n", + " polyline_annotation,\n", + " global_checklist_annotation,\n", + " global_radio_annotation,\n", + " video_mask_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", " ]\n", "\n", - "flatten_list_annotations = [ann for ann_sublist in annotations_list for ann in ann_sublist]\n", + "flatten_list_annotations = [ann for ann_sublist in annotations_list for ann in ann_sublist] \n", "\n", - "label.append(\n", + "labels.append(\n", " lb_types.Label(\n", " data=lb_types.VideoData(global_key=global_key),\n", " annotations = flatten_list_annotations\n", " )\n", - ")\n" + ")" ], "cell_type": "code", "outputs": [], @@ -763,51 +955,31 @@ { "metadata": {}, "source": [ - "## For nested frame base classifications we need to pass a featureSchemaId instead of the name. \n", - "\n", - "features = project.ontology().normalized\n", - "\n", - "for i in features['tools']:\n", - " print(i)\n", - " if i['name'] == 'bbox_class':\n", - " ## Classification feature schema id\n", - " class_feature_schema_id = i['classifications'][0]['featureSchemaId']\n", - " ## Answer feature schema id (select one of the answers)\n", - " class_options_feature_schema_id = i['classifications'][0]['options'][0]['featureSchemaId']\n", - "\n", - " ## Update the original annotation with the schema ids\n", - " for frame in frame_bbox_with_checklist_subclass_ndjson['segments']:\n", - " for k in frame['keyframes']:\n", - " k['classifications'][0].update(\n", - " {'schemaId': class_feature_schema_id , \n", - " 'answer': {'schemaId': class_options_feature_schema_id}\n", - " }\n", - " )\n", - " " + "First, let's update the bbox with nested classifications with the corresponding featureSchemaId" ], - "cell_type": "code", - "outputs": [], - "execution_count": null + "cell_type": "markdown" }, { "metadata": {}, "source": [ "label_ndjson = []\n", "\n", - "\n", "for annotation in [\n", " point_annotation_ndjson,\n", " bbox_annotation_ndjson,\n", " polyline_frame_annotation_ndjson, \n", " frame_checklist_classification_ndjson, \n", " frame_radio_classification_ndjson,\n", - " nested_radio_classification,\n", + " nested_radio_annotation_ndjson,\n", " nested_checklist_annotation_ndjson,\n", - " frame_bbox_with_checklist_subclass_ndjson \n", + " frame_bbox_with_checklist_subclass_ndjson,\n", + " global_radio_classification_ndjson,\n", + " global_checklist_classification_ndjson,\n", + " video_mask_annotation_ndjson\n", "]: \n", " annotation.update({\n", - " 'dataRow': {\n", - " 'globalKey': global_key\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", " }\n", " })\n", " label_ndjson.append(annotation)\n" @@ -839,9 +1011,9 @@ " client = client, \n", " project_id = project.uid, \n", " name=\"mal_import_job-\" + str(uuid.uuid4()), \n", - " predictions=label_ndjson)\n", + " predictions=labels)\n", "\n", - "upload_job_mal.wait_until_done();\n", + "upload_job_mal.wait_until_done()\n", "print(\"Errors:\", upload_job_mal.errors)\n", "print(\"Status of uploads: \", upload_job_mal.statuses)\n", "print(\" \")" @@ -864,7 +1036,7 @@ " client = client,\n", " project_id = project.uid, \n", " name = \"label_import_job-\" + str(uuid.uuid4()),\n", - " labels=label_ndjson\n", + " labels=labels\n", ")\n", "\n", "upload_job_label_import.wait_until_done()\n", diff --git a/examples/prediction_upload/video_predictions.ipynb b/examples/prediction_upload/video_predictions.ipynb index 7ee9a8909..441ea9ad2 100644 --- a/examples/prediction_upload/video_predictions.ipynb +++ b/examples/prediction_upload/video_predictions.ipynb @@ -88,48 +88,13 @@ { "metadata": {}, "source": [ - "API_KEY = \"\"\n", + "API_KEY=\"\"\n", "client = lb.Client(API_KEY)" ], "cell_type": "code", "outputs": [], "execution_count": null }, - { - "metadata": {}, - "source": [ - "## Helper method - only required to update bbox_with_radio_subclass tool with nested classification\n", - "\n", - "def set_feature_schema_id(features, nested_ndjson):\n", - " \"\"\"\n", - " features: accespts a project's ontology features \n", - " nested_ndjson: nested ndjson annotation object\n", - " name: tool name \n", - "\n", - " \"\"\"\n", - "\n", - " for i in features['tools']:\n", - " print(i)\n", - " if i['name'] == \"bbox_with_radio_subclass\":\n", - " ## Classification feature schema id\n", - " class_feature_schema_id = i['classifications'][0]['featureSchemaId']\n", - " ## Answer feature schema id (select one of the answers)\n", - " class_options_feature_schema_id = i['classifications'][0]['options'][0]['featureSchemaId']\n", - "\n", - " ## Update the original annotation with the schema ids\n", - " for frame in nested_ndjson['segments']:\n", - " for k in frame['keyframes']:\n", - " k['classifications'][0].update(\n", - " {'schemaId': class_feature_schema_id , \n", - " 'answer': {'schemaId': class_options_feature_schema_id}\n", - " }\n", - " )\n", - " return nested_ndjson" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, { "metadata": {}, "source": [ @@ -300,9 +265,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\": \"\", \"confidence\": 0.5} \n", + " {\"name\": \"first_sub_radio_answer\", \"confidence\": 0.5} \n", " }] \n", " },\n", " {\n", @@ -314,9 +279,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\": \"\", \"confidence\": 0.5}\n", + " {\"name\": \"first_sub_radio_answer\", \"confidence\": 0.5}\n", " }] \n", " },\n", " {\n", @@ -328,9 +293,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\": \"\", \"confidence\": 0.5}\n", + " {\"name\": \"first_sub_radio_answer\", \"confidence\": 0.5}\n", " }]\n", " }\n", " ]\n", @@ -618,16 +583,6 @@ ], "cell_type": "markdown" }, - { - "metadata": {}, - "source": [ - "## Lets set the schema ids for the nested prediction\n", - "set_feature_schema_id(features_schema, bbox_with_radio_subclass_prediction_ndjson)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, { "metadata": {}, "source": [ @@ -716,7 +671,7 @@ "source": [ "project.create_batch(\n", " \"batch_video_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[global_key], # A list of data rows or data row ids\n", + " global_keys=[global_key], # A list of data rows, data row ids or global keys\n", " priority=5 # priority between 1(Highest) - 5(lowest)\n", ")" ], @@ -851,9 +806,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\":\"\"}\n", + " {\"name\":\"first_sub_radio_answer\"}\n", " }] \n", " },\n", " {\n", @@ -865,9 +820,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\":\"\"}\n", + " {\"name\":\"first_sub_radio_answer\"}\n", " }] \n", " },\n", " {\n", @@ -879,9 +834,9 @@ " \"width\": 341.0\n", " },\n", " \"classifications\": [{\n", - " \"schemaId\": \"\",\n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": \n", - " {\"schemaId\":\"\"}\n", + " {\"name\":\"first_sub_radio_answer\"}\n", " }]\n", " }\n", " ]\n", @@ -993,13 +948,8 @@ }, { "metadata": {}, - "source": [ - "## Lets set the schema ids for the nested annotation\n", - "set_feature_schema_id(features_schema, bbox_with_radio_subclass_annotation_ndjson)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null + "source": [], + "cell_type": "markdown" }, { "metadata": {}, diff --git a/examples/project_configuration/queue_management.ipynb b/examples/project_configuration/queue_management.ipynb index 9c31d64e7..28125b207 100644 --- a/examples/project_configuration/queue_management.ipynb +++ b/examples/project_configuration/queue_management.ipynb @@ -39,8 +39,7 @@ "source": [ "* The queue is used to task labelers with specific assets\n", "* We can do any of the following:\n", - " * Set the number of times we want an image labeled\n", - " * Referred to as `Consensus` in the front end\n", + " * Set quality settings\n", " * Set the order of items in the queue\n", " * Set the percent of assets to review" ], @@ -49,7 +48,7 @@ { "metadata": {}, "source": [ - "!pip install labelbox\n", + "!pip install labelbox -q\n", "!pip install numpy" ], "cell_type": "code", @@ -81,7 +80,7 @@ "metadata": {}, "source": [ "# Add your api key\n", - "API_KEY = None\n", + "API_KEY = \"\"\n", "client = lb.Client(api_key=API_KEY)" ], "cell_type": "code", @@ -102,20 +101,19 @@ "# Project defaults to batch mode with benchmark quality settings if queue mode argument is not provided\n", "# Note that queue mode will be deprecated once dataset mode is deprecated \n", "\n", - "batch_project = client.create_project(name=\"batch-test-project\",\n", + "project = client.create_project(name=\"batch-test-project\",\n", " description=\"a description\",\n", " media_type=lb.MediaType.Image,\n", " # Setup quality settings (Benchmarks)\n", " auto_audit_number_of_labels=1,\n", " queue_mode=lb.QueueMode.Batch)\n", "\n", - "dataset_project = client.create_project(name=\"dataset-test-project\",\n", - " description=\"a description\",\n", - " media_type=lb.MediaType.Image,\n", - " ## Setup quality settings (Consensus)\n", - " auto_audit_percentage=0.1,\n", - " auto_audit_number_of_labels=3,\n", - " queue_mode=lb.QueueMode.Dataset)\n", + "# For Consensus quality settings use : \n", + "consensus_quality = {\n", + " \"auto_audit_percentage\": 0.1,\n", + " \"auto_audit_number_of_labels\": 3\n", + "}\n", + "\n", "\n", "dataset = client.create_dataset(name=\"queue_dataset\")" ], @@ -135,34 +133,23 @@ "source": [ "## Example image\n", "uploads = []\n", - "\n", + "global_keys = [\"ID-1\", \"ID-2\", \"ID-3\", \"ID-4\"]\n", "# Generate data rows\n", - "for i in range(1, 9):\n", + "for i in range(1, 5):\n", " row = {\n", " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\": f\"ID-{uuid.uuid1()}\"\n", + " \"global_key\": global_keys[i-1]\n", " }\n", " uploads.append(row)\n", "\n", "\n", "data_rows = dataset.create_data_rows(uploads)\n", - "\n", "data_rows.wait_till_done()\n", - "print(\"ERRORS dataset: \" , data_rows.errors)\n", - "\n", - "print(\"RESULT URL: \", data_rows.result_url)" + "print(\"Errors\" , data_rows.errors)\n", + "print(\"Dataset status: \", data_rows.status)" ], "cell_type": "code", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ERRORS dataset2: None\n", - "RESULT URL2: https://storage.labelbox.com/cl3ahv73w1891087qbwzs3edd%2Fdata-row-imports-results%2Fcl9rhym4l3p4x07xy1gsz301t_cl9ria9z33t2q0702ftd39bdj.json?Expires=1666988187661&KeyName=labelbox-assets-key-3&Signature=e_DUH8TN5UuoCgtoNULdttXXM_k\n" - ] - } - ], + "outputs": [], "execution_count": null }, { @@ -175,92 +162,47 @@ { "metadata": {}, "source": [ - "####### Connect your dataset \n", - "# Dataset projects do not support batches\n", - "# LPO Will be deprecated for datasets, please use batches to use data row priority\n", - "dataset_project.datasets.connect(dataset)\n", - "\n", "######## Create batches \n", "\n", - "# Batch projects do not support datasets\n", - "\n", - "# Get all the data row ids from your dataset to create the batch\n", - "batch_datarows = [dr.uid for dr in list(dataset.export_data_rows())]\n", - "\n", - "\n", "# Create the batch \n", "\n", - "batch = batch_project.create_batch(\n", + "batch = project.create_batch(\n", " \"batch-demo\", # Each batch in a project must have a unique name\n", - " batch_datarows[0:2], # A list of data rows or data row ids\n", - " 5 # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", + " global_keys = global_keys[0:2], # A list of data rows, data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", ")\n", "\n", - "batch2 = batch_project.create_batch(\n", + "batch2 = project.create_batch(\n", " \"batch-demo-2\", # Each batch in a project must have a unique name\n", " #Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n", - " batch_datarows[2:4],\n", - " 1 # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", + " global_keys=global_keys[2:4], # A list of data rows, data row ids or global keys\n", + " priority=1 # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", ")\n", "\n", - "batch3 = batch_project.create_batch(\n", - " \"batch-demo-3\", # Each batch in a project must have a unique name\n", - " # Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n", - " batch_datarows[4: len(batch_datarows)],\n", - " 2 # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", - ")\n", "\n", "print(\"Batch: \", batch)\n", - "print(\"Batch2: \", batch2)\n", - "print(\"Batch3: \", batch3)" + "print(\"Batch2: \", batch2)" ], "cell_type": "code", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Batch: \n", - "Batch2: \n", - "Batch3: \n" - ] - } - ], + "outputs": [], "execution_count": null }, { "metadata": {}, "source": [ - "* Go to your project and you should see the numbers appear in order." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "print(\"View the results here:\",\n", - " f\"https://app.labelbox.com/projects/{batch_project.uid}\")\n", "print(\"View the results here:\",\n", - " f\"https://app.labelbox.com/projects/{dataset_project.uid}\")\n", + " f\"https://app.labelbox.com/projects/{project.uid}\")\n", "# Click `start labeling` to see the images in order" ], "cell_type": "code", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "View the results here: https://app.labelbox.com/projects/cl9rhx7a6339l07vd7rc366n6\n", - "View the results here: https://app.labelbox.com/projects/cl9rhx7xx3pso07xra8caeck2\n" - ] - } - ], + "outputs": [], "execution_count": null }, { "metadata": {}, "source": [ - "## Remove Queue Order\n", + "## Queue Order\n", + "- Add priority for each data row\n", "- Remove all the batch priority in your project" ], "cell_type": "markdown" @@ -268,21 +210,45 @@ { "metadata": {}, "source": [ - "batch_project.unset_labeling_parameter_overrides(dataset.export_data_rows())" + "## See current LPOs\n", + "print(project.labeling_parameter_overrides().get_many(4))\n", + "\n", + "\n", + "## Select data rows from batches \n", + "data_rows = []\n", + "for b in list(project.batches()):\n", + " for dr in b.export_data_rows(): \n", + " data_rows.append(dr)\n", + " \n", + "## Add LPOs\n", + "lpos1 = []\n", + "p=1;\n", + "for dr in data_rows: \n", + " lpos1.append((dr, p, 1))\n", + " p+=1\n", + "\n", + "\n", + "project.set_labeling_parameter_overrides(lpos1)\n", + "# Get the project's LPOs" ], "cell_type": "code", - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Verify LPOs\n", + "project_lpos = list(project.labeling_parameter_overrides())\n", + "## Remove LPOs\n", + "# project.unset_labeling_parameter_overrides(dataset.export_data_rows())\n", + "\n", + "for lpo in project_lpos:\n", + " print(lpo)\n", + " print(\"Data row:\", lpo.data_row().uid)" ], + "cell_type": "code", + "outputs": [], "execution_count": null }, { @@ -295,8 +261,7 @@ { "metadata": {}, "source": [ - "# batch_project.delete()\n", - "# dataset_project.delete()\n", + "# project.delete()\n", "# dataset.delete()" ], "cell_type": "code", diff --git a/labelbox/__init__.py b/labelbox/__init__.py index ee24b3222..9d50f9564 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,5 +1,5 @@ name = "labelbox" -__version__ = "3.42.0" +__version__ = "3.43.0" from labelbox.client import Client from labelbox.schema.project import Project @@ -8,6 +8,7 @@ from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport from labelbox.schema.dataset import Dataset from labelbox.schema.data_row import DataRow +from labelbox.schema.enums import AnnotationImportState from labelbox.schema.label import Label from labelbox.schema.batch import Batch from labelbox.schema.review import Review diff --git a/labelbox/data/annotation_types/__init__.py b/labelbox/data/annotation_types/__init__.py index f1547030a..e085699f8 100644 --- a/labelbox/data/annotation_types/__init__.py +++ b/labelbox/data/annotation_types/__init__.py @@ -4,9 +4,15 @@ from .geometry import Polygon from .geometry import Rectangle from .geometry import Geometry +from .geometry import DocumentRectangle +from .geometry import RectangleUnit from .annotation import ClassificationAnnotation from .annotation import ObjectAnnotation + +from .relationship import RelationshipAnnotation +from .relationship import Relationship + from .video import VideoClassificationAnnotation from .video import VideoObjectAnnotation from .video import DICOMObjectAnnotation diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py index 2139f3c2b..d2786d669 100644 --- a/labelbox/data/annotation_types/annotation.py +++ b/labelbox/data/annotation_types/annotation.py @@ -1,40 +1,14 @@ -import abc -from typing import Any, Dict, List, Optional, Union +from typing import List, Union + +from labelbox.data.annotation_types.base_annotation import BaseAnnotation +from labelbox.data.annotation_types.geometry.geometry import Geometry from labelbox.data.mixins import ConfidenceMixin -from .classification import Checklist, Dropdown, Radio, Text -from .feature import FeatureSchema -from .geometry import Geometry, Rectangle, Point +from labelbox.data.annotation_types.classification.classification import ClassificationAnnotation from .ner import DocumentEntity, TextEntity, ConversationEntity -class BaseAnnotation(FeatureSchema, abc.ABC): - """ Base annotation class. Shouldn't be directly instantiated - """ - extra: Dict[str, Any] = {} - - -class ClassificationAnnotation(BaseAnnotation, ConfidenceMixin): - """Classification annotations (non localized) - - >>> ClassificationAnnotation( - >>> value=Text(answer="my caption message"), - >>> feature_schema_id="my-feature-schema-id" - >>> ) - - Args: - name (Optional[str]) - feature_schema_id (Optional[Cuid]) - value (Union[Text, Checklist, Radio, Dropdown]) - message_id (Optional[str]) Message id for conversational text - extra (Dict[str, Any]) - """ - - value: Union[Text, Checklist, Radio, Dropdown] - message_id: Optional[str] = None - - class ObjectAnnotation(BaseAnnotation, ConfidenceMixin): """Generic localized annotation (non classifications) diff --git a/labelbox/data/annotation_types/base_annotation.py b/labelbox/data/annotation_types/base_annotation.py new file mode 100644 index 000000000..7afa72334 --- /dev/null +++ b/labelbox/data/annotation_types/base_annotation.py @@ -0,0 +1,18 @@ +import abc +from uuid import UUID, uuid4 +from typing import Any, Dict +from pydantic import PrivateAttr + +from .feature import FeatureSchema + + +class BaseAnnotation(FeatureSchema, abc.ABC): + """ Base annotation class. Shouldn't be directly instantiated + """ + _uuid: UUID = PrivateAttr() + extra: Dict[str, Any] = {} + + def __init__(self, **data): + super().__init__(**data) + extra_uuid = data.get("extra", {}).get("uuid") + self._uuid = data.get("_uuid") or extra_uuid or uuid4() diff --git a/labelbox/data/annotation_types/classification/classification.py b/labelbox/data/annotation_types/classification/classification.py index adca57908..9abfa1342 100644 --- a/labelbox/data/annotation_types/classification/classification.py +++ b/labelbox/data/annotation_types/classification/classification.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Union, Optional import warnings +from labelbox.data.annotation_types.base_annotation import BaseAnnotation from labelbox.data.mixins import ConfidenceMixin @@ -35,11 +36,14 @@ class ClassificationAnswer(FeatureSchema, ConfidenceMixin): """ extra: Dict[str, Any] = {} keyframe: Optional[bool] = None + classifications: List['ClassificationAnnotation'] = [] def dict(self, *args, **kwargs) -> Dict[str, str]: res = super().dict(*args, **kwargs) if res['keyframe'] is None: res.pop('keyframe') + if res['classifications'] == []: + res.pop('classifications') return res @@ -87,3 +91,26 @@ def __init__(self, **data: Any): super().__init__(**data) warnings.warn("Dropdown classification is deprecated and will be " "removed in a future release") + + +class ClassificationAnnotation(BaseAnnotation, ConfidenceMixin): + """Classification annotations (non localized) + + >>> ClassificationAnnotation( + >>> value=Text(answer="my caption message"), + >>> feature_schema_id="my-feature-schema-id" + >>> ) + + Args: + name (Optional[str]) + classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation + feature_schema_id (Optional[Cuid]) + value (Union[Text, Checklist, Radio, Dropdown]) + extra (Dict[str, Any]) + """ + + value: Union[Text, Checklist, Radio, Dropdown] + message_id: Optional[str] = None + + +ClassificationAnswer.update_forward_refs() diff --git a/labelbox/data/annotation_types/geometry/__init__.py b/labelbox/data/annotation_types/geometry/__init__.py index a4f3846ec..cbfdd2c16 100644 --- a/labelbox/data/annotation_types/geometry/__init__.py +++ b/labelbox/data/annotation_types/geometry/__init__.py @@ -3,4 +3,6 @@ from .mask import Mask from .polygon import Polygon from .rectangle import Rectangle +from .rectangle import DocumentRectangle +from .rectangle import RectangleUnit from .geometry import Geometry diff --git a/labelbox/data/annotation_types/geometry/rectangle.py b/labelbox/data/annotation_types/geometry/rectangle.py index 74e973d17..3c43d44ba 100644 --- a/labelbox/data/annotation_types/geometry/rectangle.py +++ b/labelbox/data/annotation_types/geometry/rectangle.py @@ -1,4 +1,5 @@ from typing import Optional, Union, Tuple +from enum import Enum import cv2 import geojson @@ -78,3 +79,29 @@ def draw(self, def from_xyhw(cls, x: float, y: float, h: float, w: float) -> "Rectangle": """Create Rectangle from x,y, height width format""" return cls(start=Point(x=x, y=y), end=Point(x=x + w, y=y + h)) + + +class RectangleUnit(Enum): + INCHES = 'INCHES' + PIXELS = 'PIXELS' + POINTS = 'POINTS' + + +class DocumentRectangle(Rectangle): + """Represents a 2d rectangle on a Document + + >>> Rectangle( + >>> start=Point(x=0, y=0), + >>> end=Point(x=1, y=1), + >>> page=4, + >>> unit=RectangleUnits.POINTS + >>> ) + + Args: + start (Point): Top left coordinate of the rectangle + end (Point): Bottom right coordinate of the rectangle + page (int): Page number of the document + unit (RectangleUnits): Units of the rectangle + """ + page: int + unit: RectangleUnit diff --git a/labelbox/data/annotation_types/label.py b/labelbox/data/annotation_types/label.py index afc369bd0..457180854 100644 --- a/labelbox/data/annotation_types/label.py +++ b/labelbox/data/annotation_types/label.py @@ -7,7 +7,8 @@ import labelbox from labelbox.data.annotation_types.data.tiled_image import TiledImageData from labelbox.schema import ontology -from .annotation import (ClassificationAnnotation, ObjectAnnotation) +from .annotation import ClassificationAnnotation, ObjectAnnotation +from .relationship import RelationshipAnnotation from .classification import ClassificationAnswer from .data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, MaskData, TextData, VideoData from .geometry import Mask @@ -44,7 +45,8 @@ class Label(BaseModel): data: DataType annotations: List[Union[ClassificationAnnotation, ObjectAnnotation, VideoMaskAnnotation, ScalarMetric, - ConfusionMatrixMetric]] = [] + ConfusionMatrixMetric, + RelationshipAnnotation]] = [] extra: Dict[str, Any] = {} def object_annotations(self) -> List[ObjectAnnotation]: diff --git a/labelbox/data/annotation_types/relationship.py b/labelbox/data/annotation_types/relationship.py new file mode 100644 index 000000000..27a833830 --- /dev/null +++ b/labelbox/data/annotation_types/relationship.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel +from enum import Enum +from labelbox.data.annotation_types.annotation import BaseAnnotation, ObjectAnnotation + + +class Relationship(BaseModel): + + class Type(Enum): + UNIDIRECTIONAL = "unidirectional" + BIDIRECTIONAL = "bidirectional" + + source: ObjectAnnotation + target: ObjectAnnotation + type: Type = Type.UNIDIRECTIONAL + + +class RelationshipAnnotation(BaseAnnotation): + value: Relationship diff --git a/labelbox/data/annotation_types/video.py b/labelbox/data/annotation_types/video.py index dc4576fc7..715263b3f 100644 --- a/labelbox/data/annotation_types/video.py +++ b/labelbox/data/annotation_types/video.py @@ -1,11 +1,12 @@ from enum import Enum +from typing import List, Optional, Tuple from pydantic import BaseModel, validator -from labelbox.data.annotation_types.annotation import BaseAnnotation, ClassificationAnnotation, ObjectAnnotation +from labelbox.data.annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation -from typing import List, Optional, Tuple +from labelbox.data.annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation +from labelbox.data.annotation_types.feature import FeatureSchema from labelbox.data.mixins import ConfidenceNotSupportedMixin - from labelbox.utils import _CamelCaseMixin, is_valid_uri @@ -96,15 +97,14 @@ def validate_uri(cls, v): return v -class MaskInstance(_CamelCaseMixin, BaseModel): +class MaskInstance(_CamelCaseMixin, FeatureSchema): color_rgb: Tuple[int, int, int] name: str -class VideoMaskAnnotation(BaseAnnotation): - """DICOM video annotation - >>> DICOMVideoAnnotation( - >>> name="dicom_mask", +class VideoMaskAnnotation(BaseModel): + """Video mask annotation + >>> VideoMaskAnnotation( >>> frames=[ >>> MaskFrame(index=1, instance_uri='https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA'), >>> MaskFrame(index=5, instance_uri='https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys1%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA'), diff --git a/labelbox/data/metrics/group.py b/labelbox/data/metrics/group.py index 6c14648ac..5579ac9ce 100644 --- a/labelbox/data/metrics/group.py +++ b/labelbox/data/metrics/group.py @@ -4,8 +4,8 @@ from collections import defaultdict from typing import Dict, List, Tuple, Union -from labelbox.data.annotation_types.annotation import ClassificationAnnotation, Checklist, Radio, Text -from labelbox.data.annotation_types.classification.classification import ClassificationAnswer +from labelbox.data.annotation_types.annotation import ClassificationAnnotation +from labelbox.data.annotation_types.classification.classification import Checklist, ClassificationAnswer, Radio, Text try: from typing import Literal except ImportError: diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py index 1d81ff40d..21a5a6d20 100644 --- a/labelbox/data/serialization/ndjson/base.py +++ b/labelbox/data/serialization/ndjson/base.py @@ -1,8 +1,8 @@ from typing import Optional from uuid import uuid4 -from pydantic import BaseModel, root_validator, validator, Field +from pydantic import root_validator, validator -from labelbox.utils import _CamelCaseMixin, camel_case, is_exactly_one_set +from labelbox.utils import _CamelCaseMixin, is_exactly_one_set from ...annotation_types.types import Cuid diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py index 574c0ef7a..7bf2e4ff9 100644 --- a/labelbox/data/serialization/ndjson/classification.py +++ b/labelbox/data/serialization/ndjson/classification.py @@ -2,6 +2,7 @@ from pydantic import BaseModel, Field, root_validator from labelbox.data.mixins import ConfidenceMixin +from labelbox.data.serialization.ndjson.base import DataRow, NDAnnotation from labelbox.utils import camel_case from ...annotation_types.annotation import ClassificationAnnotation @@ -9,12 +10,12 @@ from ...annotation_types.classification.classification import ClassificationAnswer, Dropdown, Text, Checklist, Radio from ...annotation_types.types import Cuid from ...annotation_types.data import TextData, VideoData, ImageData -from .base import DataRow, NDAnnotation -class NDFeature(ConfidenceMixin): +class NDAnswer(ConfidenceMixin): name: Optional[str] = None schema_id: Optional[Cuid] = None + classifications: Optional[List['NDSubclassificationType']] = [] @root_validator() def must_set_one(cls, values): @@ -30,6 +31,12 @@ def dict(self, *args, **kwargs): res.pop('name') if 'schemaId' in res and res['schemaId'] is None: res.pop('schemaId') + if self.classifications is None or len(self.classifications) == 0: + res.pop('classifications') + else: + res['classifications'] = [ + c.dict(*args, **kwargs) for c in self.classifications + ] return res class Config: @@ -54,7 +61,7 @@ def dict(self, *args, **kwargs): return res -class NDTextSubclass(NDFeature): +class NDTextSubclass(NDAnswer): answer: str def to_common(self) -> Text: @@ -66,14 +73,19 @@ def from_common(cls, text: Text, name: str, return cls(answer=text.answer, name=name, schema_id=feature_schema_id) -class NDChecklistSubclass(NDFeature): - answer: List[NDFeature] = Field(..., alias='answers') +class NDChecklistSubclass(NDAnswer): + answer: List[NDAnswer] = Field(..., alias='answers') def to_common(self) -> Checklist: + return Checklist(answer=[ ClassificationAnswer(name=answer.name, feature_schema_id=answer.schema_id, - confidence=answer.confidence) + confidence=answer.confidence, + classifications=[ + NDSubclassification.to_common(annot) + for annot in answer.classifications + ]) for answer in self.answer ]) @@ -81,9 +93,13 @@ def to_common(self) -> Checklist: def from_common(cls, checklist: Checklist, name: str, feature_schema_id: Cuid) -> "NDChecklistSubclass": return cls(answer=[ - NDFeature(name=answer.name, - schema_id=answer.feature_schema_id, - confidence=answer.confidence) + NDAnswer(name=answer.name, + schema_id=answer.feature_schema_id, + confidence=answer.confidence, + classifications=[ + NDSubclassification.from_common(annot) + for annot in answer.classifications + ]) for answer in checklist.answer ], name=name, @@ -96,21 +112,30 @@ def dict(self, *args, **kwargs): return res -class NDRadioSubclass(NDFeature): - answer: NDFeature +class NDRadioSubclass(NDAnswer): + answer: NDAnswer def to_common(self) -> Radio: - return Radio( - answer=ClassificationAnswer(name=self.answer.name, - feature_schema_id=self.answer.schema_id, - confidence=self.answer.confidence)) + return Radio(answer=ClassificationAnswer( + name=self.answer.name, + feature_schema_id=self.answer.schema_id, + confidence=self.answer.confidence, + classifications=[ + NDSubclassification.to_common(annot) + for annot in self.answer.classifications + ], + )) @classmethod def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid) -> "NDRadioSubclass": - return cls(answer=NDFeature(name=radio.answer.name, - schema_id=radio.answer.feature_schema_id, - confidence=radio.answer.confidence), + return cls(answer=NDAnswer(name=radio.answer.name, + schema_id=radio.answer.feature_schema_id, + confidence=radio.answer.confidence, + classifications=[ + NDSubclassification.from_common(annot) + for annot in radio.answer.classifications + ]), name=name, schema_id=feature_schema_id) @@ -122,6 +147,7 @@ class NDText(NDAnnotation, NDTextSubclass): @classmethod def from_common(cls, + uuid: str, text: Text, name: str, feature_schema_id: Cuid, @@ -134,7 +160,7 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, message_id=message_id, confidence=confidence, ) @@ -144,6 +170,7 @@ class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @classmethod def from_common(cls, + uuid: str, checklist: Checklist, name: str, feature_schema_id: Cuid, @@ -151,16 +178,21 @@ def from_common(cls, data: Union[VideoData, TextData, ImageData], message_id: str, confidence: Optional[float] = None) -> "NDChecklist": + return cls(answer=[ - NDFeature(name=answer.name, - schema_id=answer.feature_schema_id, - confidence=answer.confidence) + NDAnswer(name=answer.name, + schema_id=answer.feature_schema_id, + confidence=answer.confidence, + classifications=[ + NDSubclassification.from_common(annot) + for annot in answer.classifications + ]) for answer in checklist.answer ], data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, frames=extra.get('frames'), message_id=message_id, confidence=confidence) @@ -169,21 +201,28 @@ def from_common(cls, class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod - def from_common(cls, - radio: Radio, - name: str, - feature_schema_id: Cuid, - extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], - message_id: str, - confidence: Optional[float] = None) -> "NDRadio": - return cls(answer=NDFeature(name=radio.answer.name, - schema_id=radio.answer.feature_schema_id, - confidence=radio.answer.confidence), + def from_common( + cls, + uuid: str, + radio: Radio, + name: str, + feature_schema_id: Cuid, + extra: Dict[str, Any], + data: Union[VideoData, TextData, ImageData], + message_id: str, + confidence: Optional[float] = None, + ) -> "NDRadio": + return cls(answer=NDAnswer(name=radio.answer.name, + schema_id=radio.answer.feature_schema_id, + confidence=radio.answer.confidence, + classifications=[ + NDSubclassification.from_common(annot) + for annot in radio.answer.classifications + ]), data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, frames=extra.get('frames'), message_id=message_id, confidence=confidence) @@ -235,7 +274,9 @@ def to_common( feature_schema_id=annotation.schema_id, extra={'uuid': annotation.uuid}, message_id=annotation.message_id, - confidence=annotation.confidence) + confidence=annotation.confidence, + ) + if getattr(annotation, 'frames', None) is None: return [common] results = [] @@ -256,7 +297,8 @@ def from_common( raise TypeError( f"Unable to convert object to MAL format. `{type(annotation.value)}`" ) - return classify_obj.from_common(annotation.value, annotation.name, + return classify_obj.from_common(str(annotation._uuid), annotation.value, + annotation.name, annotation.feature_schema_id, annotation.extra, data, annotation.message_id, @@ -281,6 +323,14 @@ def lookup_classification( NDSubclassificationType = Union[NDChecklistSubclass, NDRadioSubclass, NDTextSubclass] +NDAnswer.update_forward_refs() +NDChecklistSubclass.update_forward_refs() +NDChecklist.update_forward_refs() +NDRadioSubclass.update_forward_refs() +NDRadio.update_forward_refs() +NDText.update_forward_refs() +NDTextSubclass.update_forward_refs() + # Make sure to keep NDChecklist prior to NDRadio in the list, # otherwise list of answers gets parsed by NDRadio whereas NDChecklist must be used NDClassificationType = Union[NDChecklist, NDRadio, NDText] diff --git a/labelbox/data/serialization/ndjson/converter.py b/labelbox/data/serialization/ndjson/converter.py index efede36a0..35787733a 100644 --- a/labelbox/data/serialization/ndjson/converter.py +++ b/labelbox/data/serialization/ndjson/converter.py @@ -40,7 +40,6 @@ def serialize( Returns: A generator for accessing the ndjson representation of the data """ - for example in NDLabel.from_common(labels): res = example.dict(by_alias=True) for k, v in list(res.items()): diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py index 931654b8b..4b8211d14 100644 --- a/labelbox/data/serialization/ndjson/label.py +++ b/labelbox/data/serialization/ndjson/label.py @@ -1,12 +1,13 @@ from itertools import groupby from operator import itemgetter -from typing import Dict, Generator, List, Optional, Tuple, Union +from typing import Dict, Generator, List, Tuple, Union from collections import defaultdict import warnings from pydantic import BaseModel from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation +from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.video import DICOMObjectAnnotation, VideoClassificationAnnotation from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator @@ -19,23 +20,60 @@ from .metric import NDScalarMetric, NDMetricAnnotation, NDConfusionMatrixMetric from .classification import NDChecklistSubclass, NDClassification, NDClassificationType, NDRadioSubclass from .objects import NDObject, NDObjectType, NDSegments, NDDicomSegments, NDVideoMasks, NDDicomMasks +from .relationship import NDRelationship from .base import DataRow +AnnotationType = Union[NDObjectType, NDClassificationType, + NDConfusionMatrixMetric, NDScalarMetric, NDDicomSegments, + NDSegments, NDDicomMasks, NDVideoMasks, NDRelationship] + class NDLabel(BaseModel): - annotations: List[Union[NDObjectType, NDClassificationType, - NDConfusionMatrixMetric, NDScalarMetric, - NDDicomSegments, NDSegments, NDDicomMasks, - NDVideoMasks]] + annotations: List[AnnotationType] + + class _Relationship(BaseModel): + """This object holds information about the relationship""" + ndjson: NDRelationship + source: str + target: str + + class _AnnotationGroup(BaseModel): + """Stores all the annotations and relationships per datarow""" + data_row: DataRow = None + ndjson_annotations: Dict[str, AnnotationType] = {} + relationships: List["NDLabel._Relationship"] = [] def to_common(self) -> LabelGenerator: - grouped_annotations = defaultdict(list) - for annotation in self.annotations: - grouped_annotations[annotation.data_row.id or - annotation.data_row.global_key].append( - annotation) + annotation_groups = defaultdict(NDLabel._AnnotationGroup) + + for ndjson_annotation in self.annotations: + key = ndjson_annotation.data_row.id or ndjson_annotation.data_row.global_key + group = annotation_groups[key] + + if isinstance(ndjson_annotation, NDRelationship): + group.relationships.append( + NDLabel._Relationship( + ndjson=ndjson_annotation, + source=ndjson_annotation.relationship.source, + target=ndjson_annotation.relationship.target)) + else: + # if this is the first object in this group, we + # take note of the DataRow this group belongs to + # and store it in the _AnnotationGroupTuple + if not group.ndjson_annotations: + group.data_row = ndjson_annotation.data_row + + # if this assertion fails and it's a valid case, + # we need to change the value type of + # `_AnnotationGroupTuple.ndjson_objects` to accept a list of objects + # and adapt the code to support duplicate UUIDs + assert ndjson_annotation.uuid not in group.ndjson_annotations, f"UUID '{ndjson_annotation.uuid}' is not unique" + + group.ndjson_annotations[ + ndjson_annotation.uuid] = ndjson_annotation + return LabelGenerator( - data=self._generate_annotations(grouped_annotations)) + data=self._generate_annotations(annotation_groups)) @classmethod def from_common(cls, @@ -45,40 +83,66 @@ def from_common(cls, yield from cls._create_video_annotations(label) def _generate_annotations( - self, - grouped_annotations: Dict[str, - List[Union[NDObjectType, NDClassificationType, - NDConfusionMatrixMetric, - NDScalarMetric, NDSegments]]] + self, annotation_groups: Dict[str, _AnnotationGroup] ) -> Generator[Label, None, None]: - for _, annotations in grouped_annotations.items(): - annots = [] - data_row = annotations[0].data_row - for annotation in annotations: - if isinstance(annotation, NDDicomSegments): - annots.extend( - NDDicomSegments.to_common(annotation, annotation.name, - annotation.schema_id)) - elif isinstance(annotation, NDSegments): - annots.extend( - NDSegments.to_common(annotation, annotation.name, - annotation.schema_id)) - elif isinstance(annotation, NDDicomMasks): - annots.append(NDDicomMasks.to_common(annotation)) - elif isinstance(annotation, NDVideoMasks): - annots.append(NDVideoMasks.to_common(annotation)) - elif isinstance(annotation, NDObjectType.__args__): - annots.append(NDObject.to_common(annotation)) - elif isinstance(annotation, NDClassificationType.__args__): - annots.extend(NDClassification.to_common(annotation)) - elif isinstance(annotation, + for _, group in annotation_groups.items(): + relationship_annotations: Dict[str, ObjectAnnotation] = {} + annotations = [] + # first, we iterate through all the NDJSON objects and store the + # deserialized objects in the _AnnotationGroupTuple + # object *if* the object can be used in a relationship + for uuid, ndjson_annotation in group.ndjson_annotations.items(): + if isinstance(ndjson_annotation, NDDicomSegments): + annotations.extend( + NDDicomSegments.to_common(ndjson_annotation, + ndjson_annotation.name, + ndjson_annotation.schema_id)) + elif isinstance(ndjson_annotation, NDSegments): + annotations.extend( + NDSegments.to_common(ndjson_annotation, + ndjson_annotation.name, + ndjson_annotation.schema_id)) + elif isinstance(ndjson_annotation, NDDicomMasks): + annotations.append( + NDDicomMasks.to_common(ndjson_annotation)) + elif isinstance(ndjson_annotation, NDVideoMasks): + annotations.append( + NDVideoMasks.to_common(ndjson_annotation)) + elif isinstance(ndjson_annotation, NDObjectType.__args__): + annotation = NDObject.to_common(ndjson_annotation) + annotations.append(annotation) + relationship_annotations[uuid] = annotation + elif isinstance(ndjson_annotation, + NDClassificationType.__args__): + annotations.extend( + NDClassification.to_common(ndjson_annotation)) + elif isinstance(ndjson_annotation, (NDScalarMetric, NDConfusionMatrixMetric)): - annots.append(NDMetricAnnotation.to_common(annotation)) + annotations.append( + NDMetricAnnotation.to_common(ndjson_annotation)) else: raise TypeError( - f"Unsupported annotation. {type(annotation)}") - yield Label(annotations=annots, - data=self._infer_media_type(data_row, annots)) + f"Unsupported annotation. {type(ndjson_annotation)}") + + # after all the annotations have been discovered, we can now create + # the relationship objects and use references to the objects + # involved + for relationship in group.relationships: + try: + source, target = relationship_annotations[ + relationship.source], relationship_annotations[ + relationship.target] + except KeyError: + raise ValueError( + f"Relationship object refers to nonexistent object with UUID '{relationship.source}' and/or '{relationship.target}'" + ) + annotations.append( + NDRelationship.to_common(relationship.ndjson, source, + target)) + + yield Label(annotations=annotations, + data=self._infer_media_type(group.data_row, + annotations)) def _infer_media_type( self, data_row: DataRow, @@ -174,10 +238,6 @@ def _create_video_annotations( yield NDClassification.from_common(annotation, label.data) elif isinstance(annotation_group[0], VideoObjectAnnotation): - warnings.warn( - """Nested classifications are not currently supported - for video object annotations - and will not import alongside the object annotations.""") segments = [] for start_frame, end_frame in segment_frame_ranges: segment = [] @@ -207,6 +267,8 @@ def _create_non_video_annotations(cls, label: Label): yield NDObject.from_common(annotation, label.data) elif isinstance(annotation, (ScalarMetric, ConfusionMatrixMetric)): yield NDMetricAnnotation.from_common(annotation, label.data) + elif isinstance(annotation, RelationshipAnnotation): + yield NDRelationship.from_common(annotation, label.data) else: raise TypeError( f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`" diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py index 43a732d96..e88850783 100644 --- a/labelbox/data/serialization/ndjson/objects.py +++ b/labelbox/data/serialization/ndjson/objects.py @@ -1,4 +1,3 @@ -from ast import Bytes from io import BytesIO from typing import Any, Dict, List, Tuple, Union, Optional import base64 @@ -17,11 +16,11 @@ from ...annotation_types.data import ImageData, TextData, MaskData from ...annotation_types.ner import DocumentEntity, DocumentTextSelection, TextEntity from ...annotation_types.types import Cuid -from ...annotation_types.geometry import Rectangle, Polygon, Line, Point, Mask +from ...annotation_types.geometry import DocumentRectangle, Rectangle, Polygon, Line, Point, Mask from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation from ...annotation_types.video import VideoMaskAnnotation, DICOMMaskAnnotation, MaskFrame, MaskInstance -from .classification import NDSubclassification, NDSubclassificationType -from .base import DataRow, NDAnnotation +from .classification import NDClassification, NDSubclassification, NDSubclassificationType +from .base import DataRow, NDAnnotation, NDJsonBase class NDBaseObject(NDAnnotation): @@ -57,6 +56,7 @@ def to_common(self) -> Point: @classmethod def from_common(cls, + uuid: str, point: Point, classifications: List[ClassificationAnnotation], name: str, @@ -71,13 +71,14 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) class NDFramePoint(VideoSupported): point: _Point + classifications: List[NDSubclassificationType] = [] def to_common(self, name: str, feature_schema_id: Cuid, segment_index: int) -> VideoObjectAnnotation: @@ -87,11 +88,22 @@ def to_common(self, name: str, feature_schema_id: Cuid, name=name, feature_schema_id=feature_schema_id, value=Point(x=self.point.x, - y=self.point.y)) + y=self.point.y), + classifications=[ + NDSubclassification.to_common(annot) + for annot in self.classifications + ]) @classmethod - def from_common(cls, frame: int, point: Point): - return cls(frame=frame, point=_Point(x=point.x, y=point.y)) + def from_common( + cls, + frame: int, + point: Point, + classifications: List[NDSubclassificationType], + ): + return cls(frame=frame, + point=_Point(x=point.x, y=point.y), + classifications=classifications) class NDLine(NDBaseObject, ConfidenceMixin): @@ -102,6 +114,7 @@ def to_common(self) -> Line: @classmethod def from_common(cls, + uuid: str, line: Line, classifications: List[ClassificationAnnotation], name: str, @@ -116,13 +129,14 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) class NDFrameLine(VideoSupported): line: List[_Point] + classifications: List[NDSubclassificationType] = [] def to_common(self, name: str, feature_schema_id: Cuid, segment_index: int) -> VideoObjectAnnotation: @@ -132,15 +146,25 @@ def to_common(self, name: str, feature_schema_id: Cuid, keyframe=True, name=name, feature_schema_id=feature_schema_id, - value=Line(points=[Point(x=pt.x, y=pt.y) for pt in self.line])) + value=Line(points=[Point(x=pt.x, y=pt.y) for pt in self.line]), + classifications=[ + NDSubclassification.to_common(annot) + for annot in self.classifications + ]) @classmethod - def from_common(cls, frame: int, line: Line): + def from_common( + cls, + frame: int, + line: Line, + classifications: List[NDSubclassificationType], + ): return cls(frame=frame, line=[{ 'x': pt.x, 'y': pt.y - } for pt in line.points]) + } for pt in line.points], + classifications=classifications) class NDDicomLine(NDFrameLine): @@ -165,6 +189,7 @@ def to_common(self) -> Polygon: @classmethod def from_common(cls, + uuid: str, polygon: Polygon, classifications: List[ClassificationAnnotation], name: str, @@ -179,7 +204,7 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) @@ -194,6 +219,7 @@ def to_common(self) -> Rectangle: @classmethod def from_common(cls, + uuid: str, rectangle: Rectangle, classifications: List[ClassificationAnnotation], name: str, @@ -208,15 +234,51 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, page=extra.get('page'), unit=extra.get('unit'), confidence=confidence) +class NDDocumentRectangle(NDRectangle): + page: int + unit: str + + def to_common(self) -> DocumentRectangle: + return DocumentRectangle(start=Point(x=self.bbox.left, y=self.bbox.top), + end=Point(x=self.bbox.left + self.bbox.width, + y=self.bbox.top + self.bbox.height), + page=self.page, + unit=self.unit) + + @classmethod + def from_common(cls, + uuid: str, + rectangle: DocumentRectangle, + classifications: List[ClassificationAnnotation], + name: str, + feature_schema_id: Cuid, + extra: Dict[str, Any], + data: Union[ImageData, TextData], + confidence: Optional[float] = None) -> "NDRectangle": + return cls(bbox=Bbox(top=min(rectangle.start.y, rectangle.end.y), + left=min(rectangle.start.x, rectangle.end.x), + height=abs(rectangle.end.y - rectangle.start.y), + width=abs(rectangle.end.x - rectangle.start.x)), + data_row=DataRow(id=data.uid, global_key=data.global_key), + name=name, + schema_id=feature_schema_id, + uuid=uuid, + classifications=classifications, + page=rectangle.page, + unit=rectangle.unit.value, + confidence=confidence) + + class NDFrameRectangle(VideoSupported): bbox: Bbox + classifications: List[NDSubclassificationType] = [] def to_common(self, name: str, feature_schema_id: Cuid, segment_index: int) -> VideoObjectAnnotation: @@ -228,15 +290,25 @@ def to_common(self, name: str, feature_schema_id: Cuid, feature_schema_id=feature_schema_id, value=Rectangle(start=Point(x=self.bbox.left, y=self.bbox.top), end=Point(x=self.bbox.left + self.bbox.width, - y=self.bbox.top + self.bbox.height))) + y=self.bbox.top + self.bbox.height)), + classifications=[ + NDSubclassification.to_common(annot) + for annot in self.classifications + ]) @classmethod - def from_common(cls, frame: int, rectangle: Rectangle): + def from_common( + cls, + frame: int, + rectangle: Rectangle, + classifications: List[NDSubclassificationType], + ): return cls(frame=frame, bbox=Bbox(top=min(rectangle.start.y, rectangle.end.y), left=min(rectangle.start.x, rectangle.end.x), height=abs(rectangle.end.y - rectangle.start.y), - width=abs(rectangle.end.x - rectangle.start.x))) + width=abs(rectangle.end.x - rectangle.start.x)), + classifications=classifications) class NDSegment(BaseModel): @@ -256,6 +328,7 @@ def lookup_segment_object_type(segment: List) -> "NDFrameObjectType": @staticmethod def segment_with_uuid(keyframe: Union[NDFrameRectangle, NDFramePoint, NDFrameLine], uuid: str): + keyframe._uuid = uuid keyframe.extra = {'uuid': uuid} return keyframe @@ -274,8 +347,11 @@ def from_common(cls, segment): nd_frame_object_type = cls.lookup_segment_object_type(segment) return cls(keyframes=[ - nd_frame_object_type.from_common(object_annotation.frame, - object_annotation.value) + nd_frame_object_type.from_common( + object_annotation.frame, object_annotation.value, [ + NDSubclassification.from_common(annot) + for annot in object_annotation.classifications + ]) for object_annotation in segment ]) @@ -312,11 +388,10 @@ def to_common(self, name: str, feature_schema_id: Cuid): result = [] for idx, segment in enumerate(self.segments): result.extend( - NDSegment.to_common(segment, - name=name, - feature_schema_id=feature_schema_id, - segment_index=idx, - uuid=self.uuid)) + segment.to_common(name=name, + feature_schema_id=feature_schema_id, + segment_index=idx, + uuid=self.uuid)) return result @classmethod @@ -340,12 +415,11 @@ def to_common(self, name: str, feature_schema_id: Cuid): result = [] for idx, segment in enumerate(self.segments): result.extend( - NDDicomSegment.to_common(segment, - name=name, - feature_schema_id=feature_schema_id, - segment_index=idx, - uuid=self.uuid, - group_key=self.group_key)) + segment.to_common(name=name, + feature_schema_id=feature_schema_id, + segment_index=idx, + uuid=self.uuid, + group_key=self.group_key)) return result @classmethod @@ -391,6 +465,7 @@ def to_common(self) -> Mask: @classmethod def from_common(cls, + uuid: str, mask: Mask, classifications: List[ClassificationAnnotation], name: str, @@ -412,7 +487,7 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) @@ -422,15 +497,13 @@ class NDVideoMasksFramesInstances(BaseModel): instances: List[MaskInstance] -class NDVideoMasks(ConfidenceMixin, NDAnnotation): +class NDVideoMasks(NDJsonBase, ConfidenceMixin): masks: NDVideoMasksFramesInstances def to_common(self) -> VideoMaskAnnotation: return VideoMaskAnnotation( frames=self.masks.frames, instances=self.masks.instances, - name=self.name, - feature_schema_id=self.schema_id, ) @classmethod @@ -439,8 +512,6 @@ def from_common(cls, annotation, data): data_row=DataRow(id=data.uid, global_key=data.global_key), masks=NDVideoMasksFramesInstances(frames=annotation.frames, instances=annotation.instances), - name=annotation.name, - schema_id=annotation.feature_schema_id, ) @@ -450,8 +521,6 @@ def to_common(self) -> DICOMMaskAnnotation: return DICOMMaskAnnotation( frames=self.masks.frames, instances=self.masks.instances, - name=self.name, - feature_schema_id=self.schema_id, group_key=self.group_key, ) @@ -461,8 +530,6 @@ def from_common(cls, annotation, data): data_row=DataRow(id=data.uid, global_key=data.global_key), masks=NDVideoMasksFramesInstances(frames=annotation.frames, instances=annotation.instances), - name=annotation.name, - schema_id=annotation.feature_schema_id, group_key=annotation.group_key.value, ) @@ -480,6 +547,7 @@ def to_common(self) -> TextEntity: @classmethod def from_common(cls, + uuid: str, text_entity: TextEntity, classifications: List[ClassificationAnnotation], name: str, @@ -494,7 +562,7 @@ def from_common(cls, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) @@ -509,6 +577,7 @@ def to_common(self) -> DocumentEntity: @classmethod def from_common(cls, + uuid: str, document_entity: DocumentEntity, classifications: List[ClassificationAnnotation], name: str, @@ -521,7 +590,7 @@ def from_common(cls, dataRow=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) @@ -537,6 +606,7 @@ def to_common(self) -> ConversationEntity: @classmethod def from_common( cls, + uuid: str, conversation_entity: ConversationEntity, classifications: List[ClassificationAnnotation], name: str, @@ -550,7 +620,7 @@ def from_common( dataRow=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, - uuid=extra.get('uuid'), + uuid=uuid, classifications=classifications, confidence=confidence) @@ -582,7 +652,8 @@ def from_common( cls, annotation: Union[ObjectAnnotation, List[List[VideoObjectAnnotation]], VideoMaskAnnotation], data: Union[ImageData, TextData] - ) -> Union[NDLine, NDPoint, NDPolygon, NDRectangle, NDMask, NDTextEntity]: + ) -> Union[NDLine, NDPoint, NDPolygon, NDDocumentRectangle, NDRectangle, + NDMask, NDTextEntity]: obj = cls.lookup_object(annotation) # if it is video segments @@ -611,7 +682,8 @@ def from_common( optional_kwargs = {} if (annotation.confidence): optional_kwargs['confidence'] = annotation.confidence - return obj.from_common(annotation.value, subclasses, annotation.name, + return obj.from_common(str(annotation._uuid), annotation.value, + subclasses, annotation.name, annotation.feature_schema_id, annotation.extra, data, **optional_kwargs) @@ -639,6 +711,7 @@ def lookup_object( Point: NDPoint, Polygon: NDPolygon, Rectangle: NDRectangle, + DocumentRectangle: NDDocumentRectangle, Mask: NDMask, TextEntity: NDTextEntity, DocumentEntity: NDDocumentEntity, @@ -655,8 +728,8 @@ def lookup_object( # I could implement the registry approach suggested there, but I found that if I list subclass (that has more attributes) before the parent class, it works # This is a bit of a hack, but it works for now NDEntityType = Union[NDConversationEntity, NDTextEntity] -NDObjectType = Union[NDLine, NDPolygon, NDPoint, NDRectangle, NDMask, - NDEntityType, NDDocumentEntity] +NDObjectType = Union[NDLine, NDPolygon, NDPoint, NDDocumentRectangle, + NDRectangle, NDMask, NDEntityType, NDDocumentEntity] NDFrameObjectType = NDFrameRectangle, NDFramePoint, NDFrameLine NDDicomObjectType = NDDicomLine diff --git a/labelbox/data/serialization/ndjson/relationship.py b/labelbox/data/serialization/ndjson/relationship.py new file mode 100644 index 000000000..d95c1584f --- /dev/null +++ b/labelbox/data/serialization/ndjson/relationship.py @@ -0,0 +1,44 @@ +from typing import Union +from pydantic import BaseModel +from .base import NDAnnotation, DataRow +from ...annotation_types.data import ImageData, TextData +from ...annotation_types.relationship import RelationshipAnnotation +from ...annotation_types.relationship import Relationship +from .objects import NDObjectType +from .base import DataRow + +SUPPORTED_ANNOTATIONS = NDObjectType + + +class _Relationship(BaseModel): + source: str + target: str + type: str + + +class NDRelationship(NDAnnotation): + relationship: _Relationship + + @staticmethod + def to_common(annotation: "NDRelationship", source: SUPPORTED_ANNOTATIONS, + target: SUPPORTED_ANNOTATIONS) -> RelationshipAnnotation: + return RelationshipAnnotation(name=annotation.name, + value=Relationship( + source=source, + target=target, + type=Relationship.Type( + annotation.relationship.type)), + extra={'uuid': annotation.uuid}, + feature_schema_id=annotation.schema_id) + + @classmethod + def from_common(cls, annotation: RelationshipAnnotation, + data: Union[ImageData, TextData]) -> "NDRelationship": + relationship = annotation.value + return cls(uuid=str(annotation._uuid), + name=annotation.name, + dataRow=DataRow(id=data.uid, global_key=data.global_key), + relationship=_Relationship( + source=str(relationship.source._uuid), + target=str(relationship.target._uuid), + type=relationship.type.value)) diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index 42a34a5bf..301f34ef2 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -187,8 +187,8 @@ def export_v2(client: 'Client', "performance_details": False, "label_details": False, "media_type_override": None, - "model_runs_ids": None, - "projects_ids": None, + "model_run_ids": None, + "project_ids": None, }) mutation_name = "exportDataRowsInCatalog" @@ -233,7 +233,11 @@ def export_v2(client: 'Client', "includePerformanceDetails": _params.get('performance_details', False), "includeLabelDetails": - _params.get('label_details', False) + _params.get('label_details', False), + "projectIds": + _params.get('project_ids', None), + "modelRunIds": + _params.get('model_run_ids', None), }, } } diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 265792613..f852478fa 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -567,8 +567,8 @@ def export_v2(self, "performance_details": False, "label_details": False, "media_type_override": None, - "model_runs_ids": None, - "projects_ids": None, + "model_run_ids": None, + "project_ids": None, }) _filters = filters or DatasetExportFilters({ @@ -622,7 +622,11 @@ def _get_timezone() -> str: "includePerformanceDetails": _params.get('performance_details', False), "includeLabelDetails": - _params.get('label_details', False) + _params.get('label_details', False), + "projectIds": + _params.get('project_ids', None), + "modelRunIds": + _params.get('model_run_ids', None), }, } } diff --git a/labelbox/schema/export_params.py b/labelbox/schema/export_params.py index de4e51eb6..d0156ac10 100644 --- a/labelbox/schema/export_params.py +++ b/labelbox/schema/export_params.py @@ -26,8 +26,8 @@ class CatalogExportParams(DataRowParams): project_details: Optional[bool] label_details: Optional[bool] performance_details: Optional[bool] - model_runs_ids: Optional[List[str]] - projects_ids: Optional[List[str]] + model_run_ids: Optional[List[str]] + project_ids: Optional[List[str]] pass diff --git a/labelbox/schema/slice.py b/labelbox/schema/slice.py index 67f947def..993174177 100644 --- a/labelbox/schema/slice.py +++ b/labelbox/schema/slice.py @@ -85,8 +85,8 @@ def export_v2(self, "performance_details": False, "label_details": False, "media_type_override": None, - "model_runs_ids": None, - "projects_ids": None, + "model_run_ids": None, + "project_ids": None, }) mutation_name = "exportDataRowsInSlice" @@ -118,9 +118,9 @@ def export_v2(self, "includeLabelDetails": _params.get('label_details', False), "projectIds": - _params.get('projects_ids', None), + _params.get('project_ids', None), "modelRunIds": - _params.get('model_runs_ids', None), + _params.get('model_run_ids', None), }, } } diff --git a/tests/data/annotation_types/classification/test_classification.py b/tests/data/annotation_types/classification/test_classification.py index a923fb6d0..dd0079e60 100644 --- a/tests/data/annotation_types/classification/test_classification.py +++ b/tests/data/annotation_types/classification/test_classification.py @@ -100,7 +100,7 @@ def test_radio(): 'name': answer.name, 'feature_schema_id': None, 'extra': {}, - 'confidence': 0.81 + 'confidence': 0.81, } } classification = ClassificationAnnotation( diff --git a/tests/data/assets/ndjson/classification_import.json b/tests/data/assets/ndjson/classification_import.json index 178d61fa6..2c25c7bba 100644 --- a/tests/data/assets/ndjson/classification_import.json +++ b/tests/data/assets/ndjson/classification_import.json @@ -29,6 +29,6 @@ "dataRow": { "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "78ff6a23-bebe-475c-8f67-4c456909648f" } -] \ No newline at end of file +] diff --git a/tests/data/assets/ndjson/classification_import_global_key.json b/tests/data/assets/ndjson/classification_import_global_key.json index 0c8e5482b..9231592af 100644 --- a/tests/data/assets/ndjson/classification_import_global_key.json +++ b/tests/data/assets/ndjson/classification_import_global_key.json @@ -29,6 +29,6 @@ "dataRow": { "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" } -] \ No newline at end of file +] diff --git a/tests/data/assets/ndjson/classification_import_name_only.json b/tests/data/assets/ndjson/classification_import_name_only.json index 504778dd6..c616e86b7 100644 --- a/tests/data/assets/ndjson/classification_import_name_only.json +++ b/tests/data/assets/ndjson/classification_import_name_only.json @@ -29,6 +29,6 @@ "dataRow": { "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "150d60de-30af-44e4-be20-55201c533312" } -] \ No newline at end of file +] diff --git a/tests/data/assets/ndjson/custom_confusion_matrix_import.json b/tests/data/assets/ndjson/custom_confusion_matrix_import.json index 77e4ff1c5..4f2e5057f 100644 --- a/tests/data/assets/ndjson/custom_confusion_matrix_import.json +++ b/tests/data/assets/ndjson/custom_confusion_matrix_import.json @@ -1,3 +1,55 @@ -[{"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : [1,1,2,3], "metricName" : "50%_iou", "featureName" : "sample_class", "subclassName" : "sample_subclass", "aggregation" : "CONFUSION_MATRIX"}, - {"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : [0,1,2,5], "metricName" : "50%_iou", "featureName" : "sample_class", "aggregation" : "CONFUSION_MATRIX"}, - {"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : {"0.1" : [0,1,2,3], "0.2" : [5,3,4,3]}, "metricName" : "50%_iou", "aggregation" : "CONFUSION_MATRIX"}] +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": [ + 1, + 1, + 2, + 3 + ], + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX" + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": [ + 0, + 1, + 2, + 5 + ], + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX" + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": { + "0.1": [ + 0, + 1, + 2, + 3 + ], + "0.2": [ + 5, + 3, + 4, + 3 + ] + }, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX" + } +] diff --git a/tests/data/assets/ndjson/custom_scalar_import.json b/tests/data/assets/ndjson/custom_scalar_import.json index f54ee9711..7c421c889 100644 --- a/tests/data/assets/ndjson/custom_scalar_import.json +++ b/tests/data/assets/ndjson/custom_scalar_import.json @@ -1,3 +1,35 @@ -[{"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : 0.1, "metricName" : "custom_iou", "featureName" : "sample_class", "subclassName" : "sample_subclass", "aggregation" : "SUM"}, - {"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : 0.1, "metricName" : "custom_iou", "featureName" : "sample_class", "aggregation" : "SUM"}, - {"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"id": "ckrmdnqj4000007msh9p2a27r"}, "metricValue" : { "0.1" : 0.1, "0.2" : 0.5}, "metricName" : "custom_iou", "aggregation" : "SUM"}] +[ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM" + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM" + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": { + "id": "ckrmdnqj4000007msh9p2a27r" + }, + "metricValue": { + "0.1": 0.1, + "0.2": 0.5 + }, + "metricName": "custom_iou", + "aggregation": "SUM" + } +] diff --git a/tests/data/assets/ndjson/nested_import.json b/tests/data/assets/ndjson/nested_import.json index f428eae48..6afae52c3 100644 --- a/tests/data/assets/ndjson/nested_import.json +++ b/tests/data/assets/ndjson/nested_import.json @@ -64,7 +64,7 @@ "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "5d03213e-4408-456c-9eca-cf0723202961" }, { "bbox": { @@ -83,6 +83,6 @@ "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83" } -] \ No newline at end of file +] diff --git a/tests/data/assets/ndjson/nested_import_name_only.json b/tests/data/assets/ndjson/nested_import_name_only.json index 8b7185217..7e81fe626 100644 --- a/tests/data/assets/ndjson/nested_import_name_only.json +++ b/tests/data/assets/ndjson/nested_import_name_only.json @@ -64,7 +64,7 @@ "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, "name": "box c", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6" }, { "bbox": { @@ -83,6 +83,6 @@ "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, "name": "box c", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7" } -] \ No newline at end of file +] diff --git a/tests/data/assets/ndjson/relationship_import.json b/tests/data/assets/ndjson/relationship_import.json new file mode 100644 index 000000000..bb4d905ff --- /dev/null +++ b/tests/data/assets/ndjson/relationship_import.json @@ -0,0 +1,42 @@ +[ + { + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + "dataRow": { + "id": "clf98gj90000qp38ka34yhptl" + }, + "name": "cat", + "classifications": [], + "bbox": { + "top": 200.0, + "left": 100.0, + "height": 100.0, + "width": 100.0 + } + }, + { + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + "dataRow": { + "id": "clf98gj90000qp38ka34yhptl" + }, + "name": "dog", + "classifications": [], + "bbox": { + "top": 500.0, + "left": 400.0, + "height": 200.0, + "width": 200.0 + } + }, + { + "uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948", + "dataRow": { + "id": "clf98gj90000qp38ka34yhptl" + }, + "name": "is chasing", + "relationship": { + "source": "9b1e1249-36b4-4665-b60a-9060e0d18660", + "target": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + "type": "unidirectional" + } + } +] diff --git a/tests/data/assets/ndjson/video_import.json b/tests/data/assets/ndjson/video_import.json index 5db642489..c7f214527 100644 --- a/tests/data/assets/ndjson/video_import.json +++ b/tests/data/assets/ndjson/video_import.json @@ -1,106 +1,166 @@ -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}] +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}] + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - }, - { - "frame": 5, - "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - } - ] + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0} - } - ] + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0} - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0} - } - ] - } - ] + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} - }, - { - "frame": 5, - "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} - } - ] + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 }, - { - "keyframes": [ - { - "frame": 10, - "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} - } - ] - } - ] - } -] \ No newline at end of file + "classifications": [] + }] + }] +}] diff --git a/tests/data/assets/ndjson/video_import_global_key.json b/tests/data/assets/ndjson/video_import_global_key.json index 568501dc5..11e0753d9 100644 --- a/tests/data/assets/ndjson/video_import_global_key.json +++ b/tests/data/assets/ndjson/video_import_global_key.json @@ -1,106 +1,166 @@ -[ - { - "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}] +[{ + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" }, - { - "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}] + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "schemaId": "ckrb1sfl8099e0y919v260awv" + }], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" }, - { - "classifications": [], - "schemaId": "cl5islwg200gfci6g0oitaypu", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - }, - { - "frame": 5, - "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - } - ] + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" +}, { + "classifications": [], + "schemaId": + "cl5islwg200gfci6g0oitaypu", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" }, - { - "classifications": [], - "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0} - } - ] + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0} - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0} - } - ] - } - ] + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "schemaId": + "cl5iw0roz00lwci6g5jni62vs", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" }, - { - "classifications": [], - "schemaId": "cl5iw0roz00lwci6g5jni62vs", - "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} - }, - { - "frame": 5, - "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} - } - ] + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 }, - { - "keyframes": [ - { - "frame": 10, - "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} - } - ] - } - ] - } -] \ No newline at end of file + "classifications": [] + }] + }] +}] diff --git a/tests/data/assets/ndjson/video_import_name_only.json b/tests/data/assets/ndjson/video_import_name_only.json index 4f69d1d43..8c287aac2 100644 --- a/tests/data/assets/ndjson/video_import_name_only.json +++ b/tests/data/assets/ndjson/video_import_name_only.json @@ -1,106 +1,166 @@ -[ - { - "answer": {"name": "answer 1"}, - "name": "question 1", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}] +[{ + "answer": { + "name": "answer 1" }, - { - "answer": [{"name": "answer 2"}], - "name": "question 2", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{"start": 0, "end": 5}] + "name": "question 1", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "answer": "a value", - "name": "question 3", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{ + "start": 30, + "end": 35 + }, { + "start": 50, + "end": 51 + }] +}, { + "answer": [{ + "name": "answer 2" + }], + "name": "question 2", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "name": "segment 1", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - }, - { - "frame": 5, - "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - }, - { - "keyframes": [ - { - "frame": 8, - "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] - } - ] - } - ] + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{ + "start": 0, + "end": 5 + }] +}, { + "answer": "a value", + "name": "question 3", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3" +}, { + "classifications": [], + "name": + "segment 1", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "name": "segment 2", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "point": {"x": 10.0, "y": 10.0} - } - ] + "uuid": + "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [{ + "keyframes": [{ + "frame": 1, + "line": [{ + "x": 10.0, + "y": 10.0 + }, { + "x": 100.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }, { + "frame": 5, + "line": [{ + "x": 15.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 8, + "line": [{ + "x": 100.0, + "y": 10.0 + }, { + "x": 50.0, + "y": 100.0 + }, { + "x": 50.0, + "y": 30.0 + }], + "classifications": [] + }] + }] +}, { + "classifications": [], + "name": + "segment 2", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "uuid": + "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [{ + "keyframes": [{ + "frame": 1, + "point": { + "x": 10.0, + "y": 10.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 5, + "point": { + "x": 50.0, + "y": 50.0 }, - { - "keyframes": [ - { - "frame": 5, - "point": {"x": 50.0, "y": 50.0} - }, - { - "frame": 10, - "point": {"x": 10.0, "y": 50.0} - } - ] - } - ] + "classifications": [] + }, { + "frame": 10, + "point": { + "x": 10.0, + "y": 50.0 + }, + "classifications": [] + }] + }] +}, { + "classifications": [], + "name": + "segment 3", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - { - "classifications": [], - "name": "segment 3", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [ - { - "keyframes": [ - { - "frame": 1, - "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} - }, - { - "frame": 5, - "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} - } - ] + "uuid": + "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [{ + "keyframes": [{ + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0 + }, + "classifications": [] + }, { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0 + }, + "classifications": [] + }] + }, { + "keyframes": [{ + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0 }, - { - "keyframes": [ - { - "frame": 10, - "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} - } - ] - } - ] - } -] \ No newline at end of file + "classifications": [] + }] + }] +}] diff --git a/tests/data/serialization/ndjson/test_checklist.py b/tests/data/serialization/ndjson/test_checklist.py index dbf29a1b3..55d56a448 100644 --- a/tests/data/serialization/ndjson/test_checklist.py +++ b/tests/data/serialization/ndjson/test_checklist.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import Checklist, ClassificationAnswer, Radio from labelbox.data.annotation_types.data.text import TextData @@ -7,37 +6,281 @@ from labelbox.data.serialization.ndjson.converter import NDJsonConverter -def test_serialization(): - label = Label(uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( - uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", - ), - annotations=[ - ClassificationAnnotation( - name="checkbox_question_geo", - confidence=0.5, - value=Checklist(answer=[ - ClassificationAnswer(name="first_answer"), - ClassificationAnswer(name="second_answer") - ])) - ]) +def test_serialization_min(): + label = Label( + uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="checkbox_question_geo", + value=Checklist( + answer=[ClassificationAnswer(name="first_answer")]), + ) + ]) + expected = { + 'name': 'checkbox_question_geo', + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + }, + 'answer': [{ + 'name': 'first_answer' + }] + } serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + + res.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations + + +def test_serialization_with_classification(): + label = Label( + uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="checkbox_question_geo", + confidence=0.5, + value=Checklist(answer=[ + ClassificationAnswer( + name="first_answer", + confidence=0.1, + classifications=[ + ClassificationAnnotation( + name="sub_radio_question", + value=Radio(answer=ClassificationAnswer( + name="first_sub_radio_answer", + confidence=0.31))), + ClassificationAnnotation( + name="sub_chck_question", + value=Checklist(answer=[ + ClassificationAnswer( + name="second_subchk_answer", + confidence=0.41), + ClassificationAnswer( + name="third_subchk_answer", + confidence=0.42), + ],)) + ]), + ])) + ]) + expected = { + 'confidence': + 0.5, + 'name': + 'checkbox_question_geo', + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + }, + 'answer': [{ + 'confidence': + 0.1, + 'name': + 'first_answer', + 'classifications': [{ + 'name': 'sub_radio_question', + 'answer': { + 'confidence': 0.31, + 'name': 'first_sub_radio_answer', + } + }, { + 'name': + 'sub_chck_question', + 'answer': [{ + 'confidence': 0.41, + 'name': 'second_subchk_answer', + }, { + 'confidence': 0.42, + 'name': 'third_subchk_answer', + }] + }] + }] + } + + serialized = NDJsonConverter.serialize([label]) res = next(serialized) - assert res['confidence'] == 0.5 - assert res['name'] == "checkbox_question_geo" - assert res['answer'][0]['name'] == "first_answer" - assert res['answer'][1]['name'] == "second_answer" - assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" + + res.pop("uuid") + assert res == expected deserialized = NDJsonConverter.deserialize([res]) res = next(deserialized) - annotation = res.annotations[0] - assert annotation.confidence == 0.5 + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations + - annotation_value = annotation.value - assert type(annotation_value) is Checklist - assert annotation_value.answer[0].name == "first_answer" - assert annotation_value.answer[1].name == "second_answer" +def test_serialization_with_classification_double_nested(): + label = Label( + uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="checkbox_question_geo", + confidence=0.5, + value=Checklist(answer=[ + ClassificationAnswer( + name="first_answer", + confidence=0.1, + classifications=[ + ClassificationAnnotation( + name="sub_radio_question", + value=Radio(answer=ClassificationAnswer( + name="first_sub_radio_answer", + confidence=0.31, + classifications=[ + ClassificationAnnotation( + name="sub_chck_question", + value=Checklist(answer=[ + ClassificationAnswer( + name="second_subchk_answer", + confidence=0.41), + ClassificationAnswer( + name="third_subchk_answer", + confidence=0.42), + ],)) + ]))), + ]), + ])) + ]) + + expected = { + 'confidence': + 0.5, + 'name': + 'checkbox_question_geo', + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + }, + 'answer': [{ + 'confidence': + 0.1, + 'name': + 'first_answer', + 'classifications': [{ + 'name': 'sub_radio_question', + 'answer': { + 'confidence': + 0.31, + 'name': + 'first_sub_radio_answer', + 'classifications': [{ + 'name': + 'sub_chck_question', + 'answer': [{ + 'confidence': 0.41, + 'name': 'second_subchk_answer', + }, { + 'confidence': 0.42, + 'name': 'third_subchk_answer', + }] + }] + } + }] + }] + } + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + + res.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations + + +def test_serialization_with_classification_double_nested_2(): + label = Label( + uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="sub_radio_question", + value=Radio(answer=ClassificationAnswer( + name="first_sub_radio_answer", + confidence=0.31, + classifications=[ + ClassificationAnnotation( + name="sub_chck_question", + value=Checklist(answer=[ + ClassificationAnswer( + name="second_subchk_answer", + confidence=0.41, + classifications=[ + ClassificationAnnotation( + name="checkbox_question_geo", + value=Checklist(answer=[ + ClassificationAnswer( + name="first_answer", + confidence=0.1, + classifications=[]), + ])) + ]), + ClassificationAnswer(name="third_subchk_answer", + confidence=0.42), + ])) + ]))), + ]) + + expected = { + 'name': 'sub_radio_question', + 'answer': { + 'confidence': + 0.31, + 'name': + 'first_sub_radio_answer', + 'classifications': [{ + 'name': + 'sub_chck_question', + 'answer': [{ + 'confidence': + 0.41, + 'name': + 'second_subchk_answer', + 'classifications': [{ + 'name': 'checkbox_question_geo', + 'answer': [{ + 'confidence': 0.1, + 'name': 'first_answer', + }] + }] + }, { + 'confidence': 0.42, + 'name': 'third_subchk_answer', + }] + }] + }, + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + } + } + + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + res.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations diff --git a/tests/data/serialization/ndjson/test_dicom.py b/tests/data/serialization/ndjson/test_dicom.py index 285a02c64..eac5038c0 100644 --- a/tests/data/serialization/ndjson/test_dicom.py +++ b/tests/data/serialization/ndjson/test_dicom.py @@ -8,7 +8,7 @@ """ dicom_polyline_annotations = [ - lb_types.DICOMObjectAnnotation(uuid="dicom-object-uid", + lb_types.DICOMObjectAnnotation(uuid="78a8a027-9089-420c-8348-6099eb77e4aa", name="dicom_polyline", frame=2, value=lb_types.Line(points=[ @@ -35,8 +35,7 @@ 'axial', 'segments': [{ 'keyframes': [{ - 'frame': - 2, + 'frame': 2, 'line': [ { 'x': 680.0, @@ -50,7 +49,8 @@ 'x': 190.0, 'y': 220.0 }, - ] + ], + 'classifications': [], }] }], } @@ -79,15 +79,13 @@ lb_types.MaskInstance(color_rgb=(255, 0, 0), name="mask3") ] -video_mask_annotation = lb_types.VideoMaskAnnotation(name="video_mask", - frames=frames, +video_mask_annotation = lb_types.VideoMaskAnnotation(frames=frames, instances=instances) video_mask_annotation_ndjson = { 'dataRow': { 'id': 'test-uid' }, - 'name': 'video_mask', 'masks': { 'frames': [{ 'index': 1, @@ -144,7 +142,6 @@ dicom_mask_annotation_ndjson = copy(video_mask_annotation_ndjson) dicom_mask_annotation_ndjson['groupKey'] = 'axial' -dicom_mask_annotation_ndjson['name'] = 'dicom_mask' dicom_mask_annotation_ndjson_with_global_key = copy( dicom_mask_annotation_ndjson) dicom_mask_annotation_ndjson_with_global_key['dataRow'] = { @@ -184,7 +181,8 @@ def test_serialize_label(label, ndjson): @pytest.mark.parametrize('label, ndjson', labels_ndjsons) def test_deserialize_label(label, ndjson): deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - deserialized_label.annotations[0].extra = {} + if hasattr(deserialized_label.annotations[0], 'extra'): + deserialized_label.annotations[0].extra = {} assert deserialized_label.annotations == label.annotations @@ -192,5 +190,6 @@ def test_deserialize_label(label, ndjson): def test_serialize_deserialize_label(label): serialized = list(NDJsonConverter.serialize([label])) deserialized = list(NDJsonConverter.deserialize(serialized)) - deserialized[0].annotations[0].extra = {} + if hasattr(deserialized[0].annotations[0], 'extra'): + deserialized[0].annotations[0].extra = {} assert deserialized[0].annotations == label.annotations diff --git a/tests/data/serialization/ndjson/test_document.py b/tests/data/serialization/ndjson/test_document.py index 3120d366c..a6aa03908 100644 --- a/tests/data/serialization/ndjson/test_document.py +++ b/tests/data/serialization/ndjson/test_document.py @@ -1,7 +1,34 @@ import json - +import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +bbox_annotation = lb_types.ObjectAnnotation( + name="bounding_box", # must match your ontology feature's name + value=lb_types.DocumentRectangle( + start=lb_types.Point(x=42.799, y=86.498), # Top left + end=lb_types.Point(x=141.911, y=303.195), # Bottom right + page=1, + unit=lb_types.RectangleUnit.POINTS)) +bbox_labels = [ + lb_types.Label(data=lb_types.DocumentData(global_key='test-global-key'), + annotations=[bbox_annotation]) +] +bbox_ndjson = [{ + 'bbox': { + 'height': 216.697, + 'left': 42.799, + 'top': 86.498, + 'width': 99.112, + }, + 'classifications': [], + 'dataRow': { + 'globalKey': 'test-global-key' + }, + 'name': 'bounding_box', + 'page': 1, + 'unit': 'POINTS' +}] + def round_dict(data): if isinstance(data, dict): @@ -38,3 +65,15 @@ def test_pdf_with_name_only(): res = list(NDJsonConverter.serialize(res)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] f.close() + + +def test_pdf_bbox_serialize(): + serialized = list(NDJsonConverter.serialize(bbox_labels)) + serialized[0].pop('uuid') + assert serialized == bbox_ndjson + + +def test_pdf_bbox_deserialize(): + deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) + deserialized[0].annotations[0].extra = {} + assert deserialized[0].annotations == bbox_labels[0].annotations diff --git a/tests/data/serialization/ndjson/test_export_video_objects.py b/tests/data/serialization/ndjson/test_export_video_objects.py index 00f62fab6..c85b48234 100644 --- a/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/tests/data/serialization/ndjson/test_export_video_objects.py @@ -537,7 +537,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 249.0, 'width': 384.0 - } + }, + 'classifications': [] }, { 'frame': 5, 'bbox': { @@ -545,7 +546,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 316.0, 'width': 277.0 - } + }, + 'classifications': [] }] }, { 'keyframes': [{ @@ -555,7 +557,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 216.0, 'width': 213.0 - } + }, + 'classifications': [] }, { 'frame': 15, 'bbox': { @@ -563,7 +566,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 274.0, 'width': 288.0 - } + }, + 'classifications': [] }, { 'frame': 21, 'bbox': { @@ -571,7 +575,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 317.0, 'width': 464.0 - } + }, + 'classifications': [] }, { 'frame': 29, 'bbox': { @@ -579,7 +584,8 @@ def video_serialized_bbox_label(): 'left': 70.0, 'height': 329.0, 'width': 536.0 - } + }, + 'classifications': [] }] }] } diff --git a/tests/data/serialization/ndjson/test_radio.py b/tests/data/serialization/ndjson/test_radio.py index 0e56f117c..583eb1fa0 100644 --- a/tests/data/serialization/ndjson/test_radio.py +++ b/tests/data/serialization/ndjson/test_radio.py @@ -1,13 +1,49 @@ import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation -from labelbox.data.annotation_types.classification.classification import ClassificationAnswer, Radio +from labelbox.data.annotation_types.classification.classification import ClassificationAnswer +from labelbox.data.annotation_types.classification.classification import Radio from labelbox.data.annotation_types.data.text import TextData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter -def test_serialization(): +def test_serialization_with_radio_min(): + label = Label( + uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="radio_question_geo", + value=Radio( + answer=ClassificationAnswer(name="first_radio_answer",))) + ]) + + expected = { + 'name': 'radio_question_geo', + 'answer': { + 'name': 'first_radio_answer' + }, + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + } + } + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + + res.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations + + +def test_serialization_with_radio_classification(): label = Label(uid="ckj7z2q0b0000jx6x0q2q7q0d", data=TextData( uid="bkj7z2q0b0000jx6x0q2q7q0d", @@ -18,22 +54,42 @@ def test_serialization(): name="radio_question_geo", confidence=0.5, value=Radio(answer=ClassificationAnswer( - confidence=0.6, name="first_radio_answer"))) + confidence=0.6, + name="first_radio_answer", + classifications=[ + ClassificationAnnotation( + name="sub_radio_question", + value=Radio(answer=ClassificationAnswer( + name="first_sub_radio_answer"))) + ]))) ]) + expected = { + 'confidence': 0.5, + 'name': 'radio_question_geo', + 'answer': { + 'confidence': + 0.6, + 'name': + 'first_radio_answer', + 'classifications': [{ + 'name': 'sub_radio_question', + 'answer': { + 'name': 'first_sub_radio_answer', + } + }] + }, + 'dataRow': { + 'id': 'bkj7z2q0b0000jx6x0q2q7q0d' + } + } + serialized = NDJsonConverter.serialize([label]) res = next(serialized) - assert res['confidence'] == 0.5 - assert res['name'] == "radio_question_geo" - assert res['answer']['name'] == "first_radio_answer" - assert res['answer']['confidence'] == 0.6 - assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" + res.pop("uuid") + assert res == expected deserialized = NDJsonConverter.deserialize([res]) res = next(deserialized) - annotation = res.annotations[0] - assert annotation.confidence == 0.5 - - annotation_value = annotation.value - assert type(annotation_value) is Radio - assert annotation_value.answer.name == "first_radio_answer" + res.annotations[0].extra.pop("uuid") + assert res.annotations == label.annotations diff --git a/tests/data/serialization/ndjson/test_relationship.py b/tests/data/serialization/ndjson/test_relationship.py new file mode 100644 index 000000000..5285d0195 --- /dev/null +++ b/tests/data/serialization/ndjson/test_relationship.py @@ -0,0 +1,41 @@ +import json +import pytest +from uuid import uuid4 + +from labelbox.data.serialization.ndjson.converter import NDJsonConverter + + +def test_relationship(): + with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file: + data = json.load(file) + + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + + assert res == data + + +def test_relationship_nonexistent_object(): + with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file: + data = json.load(file) + + relationship_annotation = data[2] + source_uuid = relationship_annotation["relationship"]["source"] + target_uuid = str(uuid4()) + relationship_annotation["relationship"]["target"] = target_uuid + error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" + + with pytest.raises(ValueError, match=error_msg): + list(NDJsonConverter.deserialize(data)) + + +def test_relationship_duplicate_uuids(): + with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file: + data = json.load(file) + + source, target = data[0], data[1] + target["uuid"] = source["uuid"] + error_msg = f"UUID '{source['uuid']}' is not unique" + + with pytest.raises(AssertionError, match=error_msg): + list(NDJsonConverter.deserialize(data)) diff --git a/tests/data/serialization/ndjson/test_text.py b/tests/data/serialization/ndjson/test_text.py index 9d1b51e2c..74ae1c9af 100644 --- a/tests/data/serialization/ndjson/test_text.py +++ b/tests/data/serialization/ndjson/test_text.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ClassificationAnswer, Radio, Text from labelbox.data.annotation_types.data.text import TextData @@ -32,6 +31,22 @@ def test_serialization(): annotation = res.annotations[0] assert annotation.confidence == 0.5 + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" + + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + assert res['confidence'] == 0.5 + assert res['name'] == "radio_question_geo" + assert res['answer'] == "first_radio_answer" + assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + assert annotation.confidence == 0.5 + annotation_value = annotation.value assert type(annotation_value) is Text assert annotation_value.answer == "first_radio_answer" \ No newline at end of file diff --git a/tests/data/serialization/ndjson/test_video.py b/tests/data/serialization/ndjson/test_video.py index b7f7d887b..ae1adcbed 100644 --- a/tests/data/serialization/ndjson/test_video.py +++ b/tests/data/serialization/ndjson/test_video.py @@ -1,6 +1,18 @@ import json +from labelbox.client import Client +from labelbox.data.annotation_types.classification.classification import Checklist, ClassificationAnnotation, ClassificationAnswer, Radio +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.line import Line +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import Rectangle +from labelbox.data.annotation_types.geometry.point import Point + +from labelbox.data.annotation_types.label import Label +from labelbox.data.annotation_types.video import VideoObjectAnnotation +import ndjson from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.schema.annotation_import import MALPredictionImport def test_video(): @@ -20,3 +32,476 @@ def test_video_name_only(): res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + + +def test_video_classification_global_subclassifications(): + label = Label( + data=VideoData(global_key="sample-video-4.mp4",), + annotations=[ + ClassificationAnnotation( + name='radio_question_nested', + value=Radio(answer=ClassificationAnswer( + name='first_radio_question')), + ), + ClassificationAnnotation( + name='nested_checklist_question', + value=Checklist( + name='checklist', + answer=[ + ClassificationAnswer( + name='first_checklist_answer', + classifications=[ + ClassificationAnnotation( + name='sub_checklist_question', + value=Radio(answer=ClassificationAnswer( + name='first_sub_checklist_answer'))) + ]) + ])) + ]) + + expected_first_annotation = { + 'name': 'radio_question_nested', + 'answer': { + 'name': 'first_radio_question' + }, + 'dataRow': { + 'globalKey': 'sample-video-4.mp4' + } + } + + expected_second_annotation = nested_checklist_annotation_ndjson = { + "name": "nested_checklist_question", + "answer": [{ + "name": + "first_checklist_answer", + "classifications": [{ + "name": "sub_checklist_question", + "answer": { + "name": "first_sub_checklist_answer" + } + }] + }], + 'dataRow': { + 'globalKey': 'sample-video-4.mp4' + } + } + + serialized = NDJsonConverter.serialize([label]) + res = [x for x in serialized] + for annotations in res: + annotations.pop("uuid") + assert res == [expected_first_annotation, expected_second_annotation] + + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for annotation in annotations: + annotation.extra.pop("uuid") + assert annotations == label.annotations + + +def test_video_classification_nesting_bbox(): + bbox_annotation = [ + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=13, + segment_index=0, + value=Rectangle( + start=Point(x=146.0, y=98.0), # Top left + end=Point(x=382.0, y=341.0), # Bottom right + ), + classifications=[ + ClassificationAnnotation( + name='radio_question_nested', + value=Radio(answer=ClassificationAnswer( + name='first_radio_question', + classifications=[ + ClassificationAnnotation(name='sub_question_radio', + value=Checklist(answer=[ + ClassificationAnswer( + name='sub_answer') + ])) + ])), + ) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=15, + segment_index=0, + value=Rectangle( + start=Point(x=146.0, y=98.0), # Top left + end=Point(x=382.0, y=341.0), # Bottom right, + ), + classifications=[ + ClassificationAnnotation( + name='nested_checklist_question', + value=Checklist(answer=[ + ClassificationAnswer( + name='first_checklist_answer', + classifications=[ + ClassificationAnnotation( + name='sub_checklist_question', + value=Radio(answer=ClassificationAnswer( + name='first_sub_checklist_answer'))) + ]) + ])) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=19, + segment_index=0, + value=Rectangle( + start=Point(x=146.0, y=98.0), # Top left + end=Point(x=382.0, y=341.0), # Bottom right + )) + ] + expected = [{ + 'dataRow': { + 'globalKey': 'sample-video-4.mp4' + }, + 'name': + 'bbox_video', + 'classifications': [], + 'segments': [{ + 'keyframes': [{ + 'frame': + 13, + 'bbox': { + 'top': 98.0, + 'left': 146.0, + 'height': 243.0, + 'width': 236.0 + }, + 'classifications': [{ + 'name': 'radio_question_nested', + 'answer': { + 'name': + 'first_radio_question', + 'classifications': [{ + 'name': 'sub_question_radio', + 'answer': [{ + 'name': 'sub_answer' + }] + }] + } + }] + }, { + 'frame': + 15, + 'bbox': { + 'top': 98.0, + 'left': 146.0, + 'height': 243.0, + 'width': 236.0 + }, + 'classifications': [{ + 'name': + 'nested_checklist_question', + 'answer': [{ + 'name': + 'first_checklist_answer', + 'classifications': [{ + 'name': 'sub_checklist_question', + 'answer': { + 'name': 'first_sub_checklist_answer' + } + }] + }] + }] + }, { + 'frame': 19, + 'bbox': { + 'top': 98.0, + 'left': 146.0, + 'height': 243.0, + 'width': 236.0 + }, + 'classifications': [] + }] + }] + }] + + label = Label(data=VideoData(global_key="sample-video-4.mp4",), + annotations=bbox_annotation) + + serialized = NDJsonConverter.serialize([label]) + res = [x for x in serialized] + for annotations in res: + annotations.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for annotation in annotations: + annotation.extra.pop("uuid") + assert annotations == label.annotations + + +def test_video_classification_point(): + bbox_annotation = [ + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=13, + segment_index=0, + value=Point(x=46.0, y=8.0), + classifications=[ + ClassificationAnnotation( + name='radio_question_nested', + value=Radio(answer=ClassificationAnswer( + name='first_radio_question', + classifications=[ + ClassificationAnnotation(name='sub_question_radio', + value=Checklist(answer=[ + ClassificationAnswer( + name='sub_answer') + ])) + ])), + ) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=15, + segment_index=0, + value=Point(x=56.0, y=18.0), + classifications=[ + ClassificationAnnotation( + name='nested_checklist_question', + value=Checklist(answer=[ + ClassificationAnswer( + name='first_checklist_answer', + classifications=[ + ClassificationAnnotation( + name='sub_checklist_question', + value=Radio(answer=ClassificationAnswer( + name='first_sub_checklist_answer'))) + ]) + ])) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=19, + segment_index=0, + value=Point(x=66.0, y=28.0), + ) + ] + expected = [{ + 'dataRow': { + 'globalKey': 'sample-video-4.mp4' + }, + 'name': + 'bbox_video', + 'classifications': [], + 'segments': [{ + 'keyframes': [{ + 'frame': + 13, + 'point': { + 'x': 46.0, + 'y': 8.0, + }, + 'classifications': [{ + 'name': 'radio_question_nested', + 'answer': { + 'name': + 'first_radio_question', + 'classifications': [{ + 'name': 'sub_question_radio', + 'answer': [{ + 'name': 'sub_answer' + }] + }] + } + }] + }, { + 'frame': + 15, + 'point': { + 'x': 56.0, + 'y': 18.0, + }, + 'classifications': [{ + 'name': + 'nested_checklist_question', + 'answer': [{ + 'name': + 'first_checklist_answer', + 'classifications': [{ + 'name': 'sub_checklist_question', + 'answer': { + 'name': 'first_sub_checklist_answer' + } + }] + }] + }] + }, { + 'frame': 19, + 'point': { + 'x': 66.0, + 'y': 28.0, + }, + 'classifications': [] + }] + }] + }] + + label = Label(data=VideoData(global_key="sample-video-4.mp4",), + annotations=bbox_annotation) + + serialized = NDJsonConverter.serialize([label]) + res = [x for x in serialized] + for annotations in res: + annotations.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for annotation in annotations: + annotation.extra.pop("uuid") + assert annotations == label.annotations + + +def test_video_classification_frameline(): + bbox_annotation = [ + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=13, + segment_index=0, + value=Line( + points=[Point(x=8, y=10), Point(x=10, y=9)]), + classifications=[ + ClassificationAnnotation( + name='radio_question_nested', + value=Radio(answer=ClassificationAnswer( + name='first_radio_question', + classifications=[ + ClassificationAnnotation(name='sub_question_radio', + value=Checklist(answer=[ + ClassificationAnswer( + name='sub_answer') + ])) + ])), + ) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=15, + segment_index=0, + value=Line( + points=[Point(x=18, y=20), Point(x=20, y=19)]), + classifications=[ + ClassificationAnnotation( + name='nested_checklist_question', + value=Checklist(answer=[ + ClassificationAnswer( + name='first_checklist_answer', + classifications=[ + ClassificationAnnotation( + name='sub_checklist_question', + value=Radio(answer=ClassificationAnswer( + name='first_sub_checklist_answer'))) + ]) + ])) + ]), + VideoObjectAnnotation( + name="bbox_video", + keyframe=True, + frame=19, + segment_index=0, + value=Line( + points=[Point(x=28, y=30), Point(x=30, y=29)]), + ) + ] + expected = [{ + 'dataRow': { + 'globalKey': 'sample-video-4.mp4' + }, + 'name': + 'bbox_video', + 'classifications': [], + 'segments': [{ + 'keyframes': [{ + 'frame': + 13, + 'line': [{ + 'x': 8.0, + 'y': 10.0, + }, { + 'x': 10.0, + 'y': 9.0, + }], + 'classifications': [{ + 'name': 'radio_question_nested', + 'answer': { + 'name': + 'first_radio_question', + 'classifications': [{ + 'name': 'sub_question_radio', + 'answer': [{ + 'name': 'sub_answer' + }] + }] + } + }] + }, { + 'frame': + 15, + 'line': [{ + 'x': 18.0, + 'y': 20.0, + }, { + 'x': 20.0, + 'y': 19.0, + }], + 'classifications': [{ + 'name': + 'nested_checklist_question', + 'answer': [{ + 'name': + 'first_checklist_answer', + 'classifications': [{ + 'name': 'sub_checklist_question', + 'answer': { + 'name': 'first_sub_checklist_answer' + } + }] + }] + }] + }, { + 'frame': 19, + 'line': [{ + 'x': 28.0, + 'y': 30.0, + }, { + 'x': 30.0, + 'y': 29.0, + }], + 'classifications': [] + }] + }] + }] + + label = Label(data=VideoData(global_key="sample-video-4.mp4",), + annotations=bbox_annotation) + + serialized = NDJsonConverter.serialize([label]) + res = [x for x in serialized] + for annotations in res: + annotations.pop("uuid") + assert res == expected + + deserialized = NDJsonConverter.deserialize(res) + res = next(deserialized) + annotations = res.annotations + for annotation in annotations: + annotation.extra.pop("uuid") + assert annotations == label.annotations \ No newline at end of file diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 857fb3b37..c59076457 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -184,16 +184,40 @@ def ontology(): 'color': '#a23030', 'classifications': [{ - 'required': False, - 'instructions': 'nested', - 'name': 'nested', - 'type': 'radio', + 'required': + False, + 'instructions': + 'nested', + 'name': + 'nested', + 'type': + 'radio', 'options': [{ - 'label': 'radio_option_1', - 'value': 'radio_value_1' + 'label': + 'radio_option_1', + 'value': + 'radio_value_1', + 'options': [{ + 'required': + False, + 'instructions': + 'nested_checkbox', + 'name': + 'nested_checkbox', + 'type': + 'checklist', + 'options': [{ + 'label': 'nested_checkbox_option_1', + 'value': 'nested_checkbox_value_1' + }, { + 'label': 'nested_checkbox_option_2', + 'value': 'nested_checkbox_value_2' + }] + }] }] }] } + polygon_tool = { 'required': False, 'name': 'polygon', @@ -229,6 +253,13 @@ def ontology(): 'color': '#A30059', 'classifications': [] } + raster_segmentation_tool = { + 'required': False, + 'name': 'segmentation_mask', + 'tool': 'raster-segmentation', + 'color': '#ff0000', + 'classifications': [] + } checklist = { 'required': False, @@ -285,7 +316,7 @@ def ontology(): tools = [ bbox_tool, polygon_tool, polyline_tool, point_tool, entity_tool, - segmentation_tool, named_entity + segmentation_tool, raster_segmentation_tool, named_entity ] classifications = [checklist, free_form_text, radio] return {"tools": tools, "classifications": classifications} @@ -366,6 +397,16 @@ def dataset_pdf_entity(client, rand_gen, document_data_row): dataset.delete() +@pytest.fixture +def video_data(client, rand_gen, video_data_row): + dataset = client.create_dataset(name=rand_gen(str)) + data_row_ids = [] + data_row = dataset.create_data_row(video_data_row) + data_row_ids.append(data_row.uid) + yield dataset, data_row_ids + dataset.delete() + + @pytest.fixture def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row): dataset = client.create_dataset(name=rand_gen(str)) @@ -377,14 +418,14 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row): @pytest.fixture -def configured_project_without_data_rows(client, configured_project, rand_gen): +def configured_project_without_data_rows(client, ontology, rand_gen): project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) editor = list( client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] - project.setup_editor(configured_project.ontology()) + project.setup(editor, ontology) yield project project.delete() @@ -642,9 +683,8 @@ def model_run_with_training_metadata(rand_gen, model): @pytest.fixture -def model_run_with_model_run_data_rows(client, configured_project, - model_run_predictions, model_run, - wait_for_label_processing): +def model_run_with_data_rows(client, configured_project, model_run_predictions, + model_run, wait_for_label_processing): configured_project.enable_model_assisted_labeling() upload_task = LabelImport.create_from_objects( diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index cece4ca61..78714ad41 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -3,15 +3,20 @@ import pytest import random from labelbox.data.annotation_types.annotation import ObjectAnnotation +from labelbox.data.annotation_types.classification.classification import Checklist, ClassificationAnnotation, ClassificationAnswer, Radio +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.geometry.point import Point +from labelbox.data.annotation_types.geometry.rectangle import Rectangle, RectangleUnit from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.data.text import TextData from labelbox.data.annotation_types.ner import DocumentEntity, DocumentTextSelection +from labelbox.data.annotation_types.video import VideoObjectAnnotation from labelbox.data.serialization import NDJsonConverter from labelbox.exceptions import MALValidationError, UuidError from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import MALPredictionImport +from labelbox.schema.annotation_import import LabelImport, MALPredictionImport """ - Here we only want to check that the uploads are calling the validation - Then with unit tests we can check the types of errors raised @@ -338,3 +343,66 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, import_annotations.wait_until_done() assert import_annotations.errors == [] + + +def test_nested_video_object_annotations(client, + configured_project_without_data_rows, + video_data, rand_gen): + bbox_annotation = [ + VideoObjectAnnotation( + name="bbox", + keyframe=True, + frame=13, + segment_index=0, + value=Rectangle( + start=Point(x=146.0, y=98.0), # Top left + end=Point(x=382.0, y=341.0), # Bottom right + ), + classifications=[ + ClassificationAnnotation( + name='nested', + value=Radio(answer=ClassificationAnswer( + name='radio_option_1', + classifications=[ + ClassificationAnnotation( + name='nested_checkbox', + value=Checklist(answer=[ + ClassificationAnswer( + name='nested_checkbox_option_1'), + ClassificationAnswer( + name='nested_checkbox_option_2') + ])) + ])), + ) + ]), + VideoObjectAnnotation( + name="bbox", + keyframe=True, + frame=19, + segment_index=0, + value=Rectangle( + start=Point(x=146.0, y=98.0), # Top left + end=Point(x=382.0, y=341.0), # Bottom right + )) + ] + + labels = [] + _, data_row_uids = video_data + configured_project_without_data_rows.create_batch( + rand_gen(str), + data_row_uids, # sample of data row objects + 5 # priority between 1(Highest) - 5(lowest) + ) + + for data_row_uid in data_row_uids: + labels.append( + Label(data=VideoData(uid=data_row_uid), + annotations=bbox_annotation)) + import_annotations = MALPredictionImport.create_from_objects( + client=client, + project_id=configured_project_without_data_rows.uid, + name=f"import {str(uuid.uuid4())}", + predictions=labels) + import_annotations.wait_until_done() + + assert import_annotations.errors == [] diff --git a/tests/integration/annotation_import/test_conversation_import.py b/tests/integration/annotation_import/test_conversation_import.py index 941cee690..2e21eff9b 100644 --- a/tests/integration/annotation_import/test_conversation_import.py +++ b/tests/integration/annotation_import/test_conversation_import.py @@ -39,16 +39,4 @@ def test_conversation_entity(client, configured_project_without_data_rows, predictions=labels) import_annotations.wait_until_done() - assert import_annotations.errors == [] - - exported_labels = configured_project_without_data_rows.label_generator() - for label in exported_labels: - assert len( - label.annotations) == 1 # we have created only 1 annotation above - annotation = label.annotations[0] - - assert type(annotation) is ConversationEntity - assert annotation.name == "named-entity" - assert annotation.value.message_id == "4" - assert annotation.value.start == 0 - assert annotation.value.end == 8 + assert import_annotations.errors == [] \ No newline at end of file diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index 778f5fefe..ed48e7913 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -1,10 +1,113 @@ +import itertools import pytest +import uuid + import labelbox as lb import labelbox.types as lb_types from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData from labelbox.data.serialization import NDJsonConverter from labelbox.schema.annotation_import import AnnotationImportState +radio_annotation = lb_types.ClassificationAnnotation( + name="radio", + value=lb_types.Radio(answer=lb_types.ClassificationAnswer( + name="second_radio_answer"))) +checklist_annotation = lb_types.ClassificationAnnotation( + name="checklist", + value=lb_types.Checklist(answer=[ + lb_types.ClassificationAnswer(name="option1"), + lb_types.ClassificationAnswer(name="option2") + ])) +text_annotation = lb_types.ClassificationAnnotation( + name="text", value=lb_types.Text(answer="sample text")) + +video_mask_annotation = lb_types.VideoMaskAnnotation(frames=[ + lb_types.MaskFrame( + index=10, + instance_uri= + "https://storage.googleapis.com/labelbox-datasets/video-sample-data/mask_example.png" + ) +], + instances=[ + lb_types.MaskInstance( + color_rgb=(255, + 255, + 255), + name= + "segmentation_mask" + ) + ]) + +test_params = [[ + 'html', lb_types.HTMLData, + [radio_annotation, checklist_annotation, text_annotation] +], + [ + 'audio', lb_types.AudioData, + [radio_annotation, checklist_annotation, text_annotation] + ], ['video', lb_types.VideoData, [video_mask_annotation]]] + + +def get_annotation_comparison_dicts_from_labels(labels): + labels_ndjson = list(NDJsonConverter.serialize(labels)) + for annotation in labels_ndjson: + annotation.pop('uuid') + annotation.pop('dataRow') + + if 'masks' in annotation: + for frame in annotation['masks']['frames']: + frame.pop('instanceURI') + for instance in annotation['masks']['instances']: + instance.pop('colorRGB') + return labels_ndjson + + +def get_annotation_comparison_dicts_from_export(export_result, data_row_id, + project_id): + exported_data_row = [ + dr for dr in export_result if dr['data_row']['id'] == data_row_id + ][0] + exported_label = exported_data_row['projects'][project_id]['labels'][0] + exported_annotations = exported_label['annotations'] + converted_annotations = [] + if exported_label['label_kind'] == 'Video': + frames = [] + instances = [] + for frame_id, frame in exported_annotations['frames'].items(): + frames.append({'index': int(frame_id)}) + for object in frame['objects'].values(): + instances.append({'name': object['name']}) + converted_annotations.append( + {'masks': { + 'frames': frames, + 'instances': instances, + }}) + else: + exported_annotations = list( + itertools.chain(*exported_annotations.values())) + for annotation in exported_annotations: + if annotation['name'] == 'radio': + converted_annotations.append({ + 'name': annotation['name'], + 'answer': { + 'name': annotation['radio_answer']['name'] + } + }) + elif annotation['name'] == 'checklist': + converted_annotations.append({ + 'name': + annotation['name'], + 'answer': [{ + 'name': answer['name'] + } for answer in annotation['checklist_answers']] + }) + elif annotation['name'] == 'text': + converted_annotations.append({ + 'name': annotation['name'], + 'answer': annotation['text_answer']['content'] + }) + return converted_annotations + # TODO: Add VideoData. Currently label import job finishes without errors but project.export_labels() returns empty list. @pytest.mark.parametrize('data_type_class', [ @@ -44,3 +147,72 @@ def test_import_data_types(client, configured_project, classifications = exported_labels[0]['Label']['classifications'] assert len(objects) + len(classifications) == len(labels) data_row.delete() + + +@pytest.mark.parametrize('data_type, data_class, annotations', test_params) +def test_import_label_annotations(client, configured_project, + data_row_json_by_data_type, data_type, + data_class, annotations): + + dataset = next(configured_project.datasets()) + data_row_json = data_row_json_by_data_type[data_type] + data_row = dataset.create_data_row(data_row_json) + + labels = [ + lb_types.Label(data=data_class(uid=data_row.uid), + annotations=annotations) + ] + + label_import = lb.LabelImport.create_from_objects(client, + configured_project.uid, + f'test-import-html', + labels) + label_import.wait_until_done() + + assert label_import.state == lb.AnnotationImportState.FINISHED + assert len(label_import.errors) == 0 + export_params = { + "attachments": False, + "metadata_fields": False, + "data_row_details": False, + "project_details": False, + "performance_details": False + } + export_task = configured_project.export_v2(params=export_params) + export_task.wait_till_done() + assert export_task.errors is None + expected_annotations = get_annotation_comparison_dicts_from_labels(labels) + actual_annotations = get_annotation_comparison_dicts_from_export( + export_task.result, data_row.uid, configured_project.uid) + assert actual_annotations == expected_annotations + data_row.delete() + + +@pytest.mark.parametrize('data_type, data_class, annotations', test_params) +def test_import_mal_annotations(client, configured_project_without_data_rows, + data_row_json_by_data_type, data_type, + data_class, annotations, rand_gen): + + dataset = client.create_dataset(name=rand_gen(str)) + data_row_json = data_row_json_by_data_type[data_type] + data_row = dataset.create_data_row(data_row_json) + + configured_project_without_data_rows.create_batch( + rand_gen(str), + [data_row.uid], + ) + + labels = [ + lb_types.Label(data=data_class(uid=data_row.uid), + annotations=annotations) + ] + + import_annotations = lb.MALPredictionImport.create_from_objects( + client=client, + project_id=configured_project_without_data_rows.uid, + name=f"import {str(uuid.uuid4())}", + predictions=labels) + import_annotations.wait_until_done() + + assert import_annotations.errors == [] + # MAL Labels cannot be exported and compared to input labels diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py index 44a308776..198ce2e3e 100644 --- a/tests/integration/annotation_import/test_label_import.py +++ b/tests/integration/annotation_import/test_label_import.py @@ -9,14 +9,13 @@ """ -def test_create_from_url(client, project, annotation_import_test_helpers): +def test_create_from_url(client, configured_project, + annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - label_import = LabelImport.create_from_url(client=client, - project_id=project.uid, - name=name, - url=url) - assert label_import.parent_id == project.uid + label_import = LabelImport.create_from_url( + client=client, project_id=configured_project.uid, name=name, url=url) + assert label_import.parent_id == configured_project.uid annotation_import_test_helpers.check_running_state(label_import, name, url) @@ -53,16 +52,13 @@ def test_create_from_objects(client, configured_project, object_predictions, # annotation_import_test_helpers.assert_file_content(label_import.input_file_url, object_predictions) -def test_get(client, project, annotation_import_test_helpers): +def test_get(client, configured_project, annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" + label_import = LabelImport.create_from_url( + client=client, project_id=configured_project.uid, name=name, url=url) - label_import = LabelImport.create_from_url(client=client, - project_id=project.uid, - name=name, - url=url) - - assert label_import.parent_id == project.uid + assert label_import.parent_id == configured_project.uid annotation_import_test_helpers.check_running_state(label_import, name, url) diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index afca122b6..fb7ff50ca 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -11,27 +11,26 @@ """ -def test_create_from_url(model_run_with_model_run_data_rows, +def test_create_from_url(model_run_with_data_rows, annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - annotation_import = model_run_with_model_run_data_rows.add_predictions( + annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=url) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import_test_helpers.check_running_state(annotation_import, name, url) annotation_import.wait_until_done() -def test_create_from_objects(model_run_with_model_run_data_rows, - object_predictions, +def test_create_from_objects(model_run_with_data_rows, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) - annotation_import = model_run_with_model_run_data_rows.add_predictions( + annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=object_predictions) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import_test_helpers.check_running_state(annotation_import, name) annotation_import_test_helpers.assert_file_content( annotation_import.input_file_url, object_predictions) @@ -76,17 +75,16 @@ def test_model_run_project_labels(model_run_with_all_project_labels, assert actual_label['DataRow ID'] == expected_label['dataRow']['id'] -def test_create_from_label_objects(model_run_with_model_run_data_rows, - object_predictions, +def test_create_from_label_objects(model_run_with_data_rows, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) predictions = list(NDJsonConverter.deserialize(object_predictions)) - annotation_import = model_run_with_model_run_data_rows.add_predictions( + annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=predictions) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import_test_helpers.check_running_state(annotation_import, name) normalized_predictions = NDJsonConverter.serialize(predictions) annotation_import_test_helpers.assert_file_content( @@ -94,7 +92,7 @@ def test_create_from_label_objects(model_run_with_model_run_data_rows, annotation_import.wait_until_done() -def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows, +def test_create_from_local_file(tmp_path, model_run_with_data_rows, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) @@ -103,37 +101,34 @@ def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows, with file_path.open("w") as f: ndjson.dump(object_predictions, f) - annotation_import = model_run_with_model_run_data_rows.add_predictions( + annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=str(file_path)) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import_test_helpers.check_running_state(annotation_import, name) annotation_import_test_helpers.assert_file_content( annotation_import.input_file_url, object_predictions) annotation_import.wait_until_done() -def test_get(client, model_run_with_model_run_data_rows, - annotation_import_test_helpers): +def test_get(client, model_run_with_data_rows, annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - model_run_with_model_run_data_rows.add_predictions(name=name, - predictions=url) + model_run_with_data_rows.add_predictions(name=name, predictions=url) annotation_import = MEAPredictionImport.from_name( - client, model_run_id=model_run_with_model_run_data_rows.uid, name=name) + client, model_run_id=model_run_with_data_rows.uid, name=name) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import_test_helpers.check_running_state(annotation_import, name, url) annotation_import.wait_until_done() @pytest.mark.slow -def test_wait_till_done(model_run_predictions, - model_run_with_model_run_data_rows): +def test_wait_till_done(model_run_predictions, model_run_with_data_rows): name = str(uuid.uuid4()) - annotation_import = model_run_with_model_run_data_rows.add_predictions( + annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=model_run_predictions) assert len(annotation_import.inputs) == len(model_run_predictions) diff --git a/tests/integration/annotation_import/test_model_run.py b/tests/integration/annotation_import/test_model_run.py index db426c454..ceaf53c2f 100644 --- a/tests/integration/annotation_import/test_model_run.py +++ b/tests/integration/annotation_import/test_model_run.py @@ -1,6 +1,5 @@ import time import os -import uuid import pytest from collections import Counter @@ -8,6 +7,23 @@ from labelbox import DataSplit, ModelRun +def _model_run_export_v2_results(model_run, task_name, params, num_retries=5): + """Export model run results and retry if no results are returned.""" + while (num_retries > 0): + task = model_run.export_v2(task_name, params=params) + assert task.name == task_name + task.wait_till_done() + assert task.status == "COMPLETE" + assert task.errors is None + task_results = task.result + if len(task_results) == 0: + num_retries -= 1 + time.sleep(5) + else: + return task_results + return [] + + def test_model_run(client, configured_project_with_label, rand_gen): project, _, _, label = configured_project_with_label label_id = label.uid @@ -77,8 +93,8 @@ def test_model_run_get_config(model_run_with_training_metadata): assert res["batch_size"] == new_config["batch_size"] -def test_model_run_data_rows_delete(model_run_with_model_run_data_rows): - model_run = model_run_with_model_run_data_rows +def test_model_run_data_rows_delete(model_run_with_data_rows): + model_run = model_run_with_data_rows before = list(model_run.model_run_data_rows()) annotation_data_row = before[0] @@ -107,32 +123,31 @@ def test_model_run_upsert_data_rows_using_global_keys(model_run, data_rows): def test_model_run_upsert_data_rows_with_existing_labels( - model_run_with_model_run_data_rows): - model_run_data_rows = list( - model_run_with_model_run_data_rows.model_run_data_rows()) + model_run_with_data_rows): + model_run_data_rows = list(model_run_with_data_rows.model_run_data_rows()) n_data_rows = len(model_run_data_rows) - model_run_with_model_run_data_rows.upsert_data_rows([ + model_run_with_data_rows.upsert_data_rows([ model_run_data_row.data_row().uid for model_run_data_row in model_run_data_rows ]) assert n_data_rows == len( - list(model_run_with_model_run_data_rows.model_run_data_rows())) + list(model_run_with_data_rows.model_run_data_rows())) -def test_model_run_export_labels(model_run_with_model_run_data_rows): - labels = model_run_with_model_run_data_rows.export_labels(download=True) +def test_model_run_export_labels(model_run_with_data_rows): + labels = model_run_with_data_rows.export_labels(download=True) assert len(labels) == 3 @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem", reason="does not work for onprem") -def test_model_run_status(model_run_with_model_run_data_rows): +def test_model_run_status(model_run_with_data_rows): def get_model_run_status(): - return model_run_with_model_run_data_rows.client.execute( + return model_run_with_data_rows.client.execute( """query trainingPipelinePyApi($modelRunId: ID!) { trainingPipeline(where: {id : $modelRunId}) {status, errorMessage, metadata}} - """, {'modelRunId': model_run_with_model_run_data_rows.uid}, + """, {'modelRunId': model_run_with_data_rows.uid}, experimental=True)['trainingPipeline'] model_run_status = get_model_run_status() @@ -143,8 +158,7 @@ def get_model_run_status(): status = "COMPLETE" metadata = {'key1': 'value1'} errorMessage = "an error" - model_run_with_model_run_data_rows.update_status(status, metadata, - errorMessage) + model_run_with_data_rows.update_status(status, metadata, errorMessage) model_run_status = get_model_run_status() assert model_run_status['status'] == status @@ -152,38 +166,28 @@ def get_model_run_status(): assert model_run_status['errorMessage'] == errorMessage extra_metadata = {'key2': 'value2'} - model_run_with_model_run_data_rows.update_status(status, extra_metadata) + model_run_with_data_rows.update_status(status, extra_metadata) model_run_status = get_model_run_status() assert model_run_status['status'] == status assert model_run_status['metadata'] == {**metadata, **extra_metadata} assert model_run_status['errorMessage'] == errorMessage status = ModelRun.Status.FAILED - model_run_with_model_run_data_rows.update_status(status, metadata, - errorMessage) + model_run_with_data_rows.update_status(status, metadata, errorMessage) model_run_status = get_model_run_status() assert model_run_status['status'] == status.value with pytest.raises(ValueError): - model_run_with_model_run_data_rows.update_status( - "INVALID", metadata, errorMessage) + model_run_with_data_rows.update_status("INVALID", metadata, + errorMessage) -def test_model_run_export_v2(model_run_with_model_run_data_rows, - configured_project): +def test_model_run_export_v2(model_run_with_data_rows, configured_project): task_name = "test_task" - media_attributes = True params = {"media_attributes": media_attributes} - task = model_run_with_model_run_data_rows.export_v2(task_name, - params=params) - assert task.name == task_name - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - - task_results = task.result - + task_results = _model_run_export_v2_results(model_run_with_data_rows, + task_name, params) label_ids = [label.uid for label in configured_project.labels()] label_ids_set = set(label_ids) @@ -196,9 +200,8 @@ def test_model_run_export_v2(model_run_with_model_run_data_rows, else: assert 'media_attributes' not in task_result or task_result[ 'media_attributes'] is None - model_run = task_result['models'][ - model_run_with_model_run_data_rows.model_id]['model_runs'][ - model_run_with_model_run_data_rows.uid] + model_run = task_result['models'][model_run_with_data_rows.model_id][ + 'model_runs'][model_run_with_data_rows.uid] task_label_ids_set = set( map(lambda label: label['id'], model_run['labels'])) task_prediction_ids_set = set( diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py index 9f78a5bcd..eb1fa9d80 100644 --- a/tests/integration/annotation_import/test_upsert_prediction_import.py +++ b/tests/integration/annotation_import/test_upsert_prediction_import.py @@ -12,7 +12,7 @@ @pytest.mark.skip() def test_create_from_url(client, tmp_path, object_predictions, - model_run_with_model_run_data_rows, + model_run_with_data_rows, configured_project_without_data_rows, annotation_import_test_helpers): name = str(uuid.uuid4()) @@ -21,7 +21,7 @@ def test_create_from_url(client, tmp_path, object_predictions, model_run_data_rows = [ mrdr.data_row().uid - for mrdr in model_run_with_model_run_data_rows.model_run_data_rows() + for mrdr in model_run_with_data_rows.model_run_data_rows() ] predictions = [ p for p in object_predictions @@ -38,13 +38,13 @@ def test_create_from_url(client, tmp_path, object_predictions, sign=True, content_type="application/json") - annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project( + annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=url, project_id=configured_project_without_data_rows.uid, priority=5) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import.wait_until_done() assert not annotation_import.errors assert annotation_import.statuses @@ -60,26 +60,26 @@ def test_create_from_url(client, tmp_path, object_predictions, @pytest.mark.skip() -def test_create_from_objects(model_run_with_model_run_data_rows, +def test_create_from_objects(model_run_with_data_rows, configured_project_without_data_rows, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) model_run_data_rows = [ mrdr.data_row().uid - for mrdr in model_run_with_model_run_data_rows.model_run_data_rows() + for mrdr in model_run_with_data_rows.model_run_data_rows() ] predictions = [ p for p in object_predictions if p['dataRow']['id'] in model_run_data_rows ] - annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project( + annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=predictions, project_id=configured_project_without_data_rows.uid, priority=5) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import.wait_until_done() assert not annotation_import.errors assert annotation_import.statuses @@ -95,7 +95,7 @@ def test_create_from_objects(model_run_with_model_run_data_rows, @pytest.mark.skip() -def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows, +def test_create_from_local_file(tmp_path, model_run_with_data_rows, configured_project_without_data_rows, object_predictions, annotation_import_test_helpers): @@ -106,7 +106,7 @@ def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows, model_run_data_rows = [ mrdr.data_row().uid - for mrdr in model_run_with_model_run_data_rows.model_run_data_rows() + for mrdr in model_run_with_data_rows.model_run_data_rows() ] predictions = [ p for p in object_predictions @@ -116,13 +116,13 @@ def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows, with file_path.open("w") as f: ndjson.dump(predictions, f) - annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project( + annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=str(file_path), project_id=configured_project_without_data_rows.uid, priority=5) - assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid + assert annotation_import.model_run_id == model_run_with_data_rows.uid annotation_import.wait_until_done() assert not annotation_import.errors assert annotation_import.statuses diff --git a/tests/unit/test_mal_import.py b/tests/unit/test_mal_import.py index 08d32fd5f..799944a13 100644 --- a/tests/unit/test_mal_import.py +++ b/tests/unit/test_mal_import.py @@ -64,7 +64,7 @@ def test_invalid_labels_format(): "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, "schemaId": "ckrb1sfjx099a0y914hl319ie", - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + "uuid": "3a83db52-75e0-49af-a171-234ce604502a" } with patch.object(MALPredictionImport, '_create_mal_import_from_bytes'):