diff --git a/CHANGELOG.md b/CHANGELOG.md index 44ee2bfe9..1ff2a3943 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +# Version 3.58.0 (2023-12-11) +## Added +* `ontology_id` to the model app instantiation +* LLM data generation label types +* `run_foundry_app` to support running model foundry apps +* Two methods for sending data rows to any workflow task in a project, that can also include predictions from a model run, or annotations from a different project +## Fixed +* Documentation index for identifiables +## Removed +* Project.datasets and Datasets.projects methods as they have been deprecated +## Notebooks +* Added note books for Human labeling(GT/MAL/MEA) + data generation (GT/MAL) +* Remove relationship annotations from text and conversational imports + # Version 3.57.0 (2023-11-30) ## Added * Global key support for Project move_data_rows_to_task_queue diff --git a/Makefile b/Makefile index e9ffdbc96..8ee19e00b 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,15 @@ test-onprem: build-image -e LABELBOX_TEST_ONPREM_HOSTNAME=${LABELBOX_TEST_ONPREM_HOSTNAME} \ local/labelbox-python:test pytest $(PATH_TO_TEST) +test-dev0: build-image + docker run -it --rm -v ${PWD}:/usr/src -w /usr/src \ + -e LABELBOX_TEST_ENVIRON="custom" \ + -e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \ + -e LABELBOX_TEST_API_KEY_CUSTOM=${LABELBOX_TEST_API_KEY_CUSTOM} \ + -e LABELBOX_TEST_GRAPHQL_API_ENDPOINT="https://api.dev0.na-us.lb-dev.xyz/graphql" \ + -e LABELBOX_TEST_REST_API_ENDPOINT="https://api.dev0.na-us.lb-dev.xyz/api/v1" \ + local/labelbox-python:test pytest $(PATH_TO_TEST) + test-custom: build-image docker run -it --rm -v ${PWD}:/usr/src -w /usr/src \ -e LABELBOX_TEST_ENVIRON="custom" \ diff --git a/docs/source/conf.py b/docs/source/conf.py index 9ab1a2fbf..a10fb6392 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,7 +21,7 @@ copyright = '2021, Labelbox' author = 'Labelbox' -release = '3.57.0' +release = '3.58.0' # -- General configuration --------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 2222696d0..d4285fed2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -227,6 +227,7 @@ ExportTask --------------------------- .. automodule:: labelbox.schema.export_task :members: + :exclude-members: FileRetrieverByLine, FileRetrieverByOffset, FileRetrieverStrategy, Range, Converter :show-inheritance: Identifiables diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb index 8f427f978..5fc7c0472 100644 --- a/examples/annotation_import/conversational.ipynb +++ b/examples/annotation_import/conversational.ipynb @@ -33,16 +33,17 @@ "# Conversational Text Annotation Import\n", "* This notebook will provide examples of each supported annotation type for conversational text assets, and also cover MAL and Label Import methods:\n", "\n", - "Suported annotations that can be uploaded through the SDK\n", + "Supported annotations that can be uploaded through the SDK\n", "\n", "* Classification Radio \n", "* Classification Checklist \n", "* Classification Free Text \n", "* NER\n", - "* Relationships (only supported for MAL imports)\n", + "\n", "\n", "**Not** supported annotations\n", "\n", + "* Relationships\n", "* Bouding box \n", "* Polygon \n", "* Point\n", @@ -139,11 +140,11 @@ " )\n", ")\n", "\n", - "ner_annotation_ndjson = { \n", + "ner_annotation_ndjson = {\n", " \"name\": \"ner\",\n", - " \"location\": { \n", - " \"start\": 0, \n", - " \"end\": 8 \n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", " },\n", " \"messageId\": \"4\"\n", " }" @@ -177,7 +178,7 @@ { "metadata": {}, "source": [ - "##### Checklist Classification ####### \n", + "##### Checklist Classification #######\n", "\n", "checklist_annotation= lb_types.ClassificationAnnotation(\n", " name=\"checklist_convo\", # must match your ontology feature\"s name\n", @@ -185,7 +186,7 @@ " answer = [\n", " lb_types.ClassificationAnswer(\n", " name = \"first_checklist_answer\"\n", - " ), \n", + " ),\n", " lb_types.ClassificationAnswer(\n", " name = \"second_checklist_answer\"\n", " )\n", @@ -214,7 +215,7 @@ "######## Radio Classification ######\n", "\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_convo\", \n", + " name=\"radio_convo\",\n", " value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = \"first_radio_answer\")),\n", " message_id=\"0\"\n", ")\n", @@ -231,71 +232,6 @@ "outputs": [], "execution_count": null }, - { - "metadata": {}, - "source": [ - "####### Relationships ########## \n", - "ner_source = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(\n", - " start=16,\n", - " end=26,\n", - " message_id=\"4\"\n", - " )\n", - ")\n", - "ner_target = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(\n", - " start=29, \n", - " end=34, \n", - " message_id=\"4\"\n", - " )\n", - ")\n", - "\n", - "ner_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=ner_source,\n", - " target=ner_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ))\n", - "\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "ner_source_ndjson = { \n", - " \"uuid\": uuid_source, \n", - " \"name\": \"ner\",\n", - " \"location\": { \n", - " \"start\": 16, \n", - " \"end\": 26 \n", - " },\n", - " \"messageId\": \"4\"\n", - " }\n", - "\n", - "ner_target_ndjson = { \n", - " \"uuid\": uuid_target,\n", - " \"name\": \"ner\",\n", - " \"location\": { \n", - " \"start\": 29, \n", - " \"end\": 34\n", - " },\n", - " \"messageId\": \"4\"\n", - " }\n", - "\n", - "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"bidirectional\"\n", - " }\n", - "}" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, { "metadata": {}, "source": [ @@ -328,12 +264,12 @@ " \"name\": \"first_checklist_answer\",\n", " \"classifications\" : [\n", " {\n", - " \"name\": \"sub_checklist_question\", \n", + " \"name\": \"sub_checklist_question\",\n", " \"answer\": {\n", " \"name\": \"first_sub_checklist_answer\"\n", " }\n", - " } \n", - " ] \n", + " }\n", + " ]\n", " }]\n", "}\n", "# Global\n", @@ -424,28 +360,27 @@ "metadata": {}, "source": [ "ontology_builder = lb.OntologyBuilder(\n", - " tools=[ \n", + " tools=[\n", " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n", - " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP,name=\"relationship\")\n", - " ], \n", - " classifications=[ \n", - " lb.Classification( \n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", " class_type=lb.Classification.Type.TEXT,\n", - " scope=lb.Classification.Scope.INDEX, \n", - " name=\"text_convo\"), \n", - " lb.Classification( \n", - " class_type=lb.Classification.Type.CHECKLIST, \n", - " scope=lb.Classification.Scope.INDEX, \n", - " name=\"checklist_convo\", \n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"text_convo\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\") \n", + " lb.Option(value=\"second_checklist_answer\")\n", " ]\n", - " ), \n", - " lb.Classification( \n", - " class_type=lb.Classification.Type.RADIO, \n", - " name=\"radio_convo\", \n", - " scope=lb.Classification.Scope.INDEX, \n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_convo\",\n", + " scope=lb.Classification.Scope.INDEX,\n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", " lb.Option(value=\"second_radio_answer\")\n", @@ -460,7 +395,7 @@ " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " name=\"sub_checklist_question\",\n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ])\n", @@ -503,10 +438,10 @@ "metadata": {}, "source": [ "# Create Labelbox project\n", - "project = client.create_project(name=\"Conversational Text Annotation Import Demo\", \n", + "project = client.create_project(name=\"Conversational Text Annotation Import Demo\",\n", " media_type=lb.MediaType.Conversational)\n", "\n", - "# Setup your ontology \n", + "# Setup your ontology\n", "project.setup_editor(ontology) # Connect your ontology and editor to your project" ], "cell_type": "code", @@ -523,8 +458,6 @@ { "metadata": {}, "source": [ - "# Setup Batches and Ontology\n", - "\n", "# Create a batch to send to your MAL project\n", "batch = project.create_batch(\n", " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", @@ -570,9 +503,6 @@ " text_annotation,\n", " checklist_annotation,\n", " radio_annotation,\n", - " ner_source,\n", - " ner_target,\n", - " ner_relationship,\n", " nested_radio_annotation,\n", " nested_checklist_annotation\n", " ]\n", @@ -600,9 +530,6 @@ " text_annotation_ndjson,\n", " checklist_annotation_ndjson,\n", " radio_annotation_ndjson,\n", - " ner_source_ndjson,\n", - " ner_target_ndjson,\n", - " ner_relationship_annotation_ndjson,\n", " nested_checklist_annotation_ndjson,\n", " nested_radio_annotation_ndjson\n", " ]:\n", @@ -637,9 +564,9 @@ "source": [ "# Upload our label using Model-Assisted Labeling\n", "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client = client, \n", - " project_id = project.uid, \n", - " name=f\"mal_job-{str(uuid.uuid4())}\", \n", + " client = client,\n", + " project_id = project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", " predictions=label)\n", "\n", "upload_job.wait_until_done()\n", @@ -660,20 +587,16 @@ { "metadata": {}, "source": [ - "# Upload label for this data row in project \n", - "# Uncomment this code when excluding relationships from label import\n", - "# Relationships are not currently supported for label import\n", + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=\"label_import_job\"+str(uuid.uuid4()),\n", + " labels=label)\n", "\n", - "\n", - "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client, \n", - "# project_id = project.uid, \n", - "# name=\"label_import_job\"+str(uuid.uuid4()), \n", - "# labels=label)\n", - "\n", - "# upload_job.wait_until_done();\n", - "# print(\"Errors:\", upload_job.errors)\n", - "# print(\"Status of uploads: \", upload_job.statuses)" + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" ], "cell_type": "code", "outputs": [], diff --git a/examples/annotation_import/conversational_LLM.ipynb b/examples/annotation_import/conversational_LLM.ipynb new file mode 100644 index 000000000..bb811a1b8 --- /dev/null +++ b/examples/annotation_import/conversational_LLM.ipynb @@ -0,0 +1,649 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n", + "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install -q \"labelbox[data]\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Set up" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Replace with your API key" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Supported annotations for conversational text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Entity " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(\n", + " start=0,\n", + " end=8,\n", + " message_id=\"message-1\"\n", + " )\n", + ")\n", + "\n", + "ner_annotation_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"message-1\"\n", + " }" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Radio (single-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"Response B\")))\n", + "\n", + "\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"Choose the best response\",\n", + " \"answer\": {\n", + " \"name\": \"Response B\"\n", + " }\n", + "}\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Free-form text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\")\n", + ")\n", + "\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"Provide a reason for your choice\",\n", + " \"answer\": \"This is the more concise answer\"\n", + "\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Checklist (multi-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "checklist_annotation= lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\"\n", + " ),\n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\"\n", + " )\n", + " ]\n", + " ),\n", + " message_id=\"message-1\" # Message specific annotation\n", + " )\n", + "\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\"name\": \"first_checklist_answer\"},\n", + " {\"name\": \"second_checklist_answer\"}\n", + " ],\n", + " \"messageId\": \"message-1\"\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Nested radio and checklist" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "# Message based\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", + "# Message based\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\": \"nested_checklist_question\",\n", + " \"messageId\": \"message-1\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"classifications\" : [\n", + " {\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " }\n", + " }\n", + " ]\n", + " }]\n", + "}\n", + "# Global\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")\n", + "#Global\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n", + " }]\n", + " }\n", + "}\n", + "\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 1: Import data rows with \"modelOutputs\" into Catalog" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", + "\n", + "```\n", + "\"modelOutputs\" : [\n", + " {\n", + " \"title\": \"Name of the response option\",\n", + " \"content\": \"Content of the response\",\n", + " \"modelConfigName\": \"Name of model configuration\"\n", + " }\n", + "]\n", + "```\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Example of row_data with model outputs" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "pairwise_shopping_2 = \"\"\"\n", + " {\n", + " \"type\": \"application/vnd.labelbox.conversational\",\n", + " \"version\": 1,\n", + " \"messages\": [\n", + " {\n", + " \"messageId\": \"message-0\",\n", + " \"timestampUsec\": 1530718491,\n", + " \"content\": \"Hi! How can I help?\",\n", + " \"user\": {\n", + " \"userId\": \"Bot 002\",\n", + " \"name\": \"Bot\"\n", + " },\n", + " \"align\": \"left\",\n", + " \"canLabel\": false\n", + " },\n", + " {\n", + " \"messageId\": \"message-1\",\n", + " \"timestampUsec\": 1530718503,\n", + " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", + " \"user\": {\n", + " \"userId\": \"User 00686\",\n", + " \"name\": \"User\"\n", + " },\n", + " \"align\": \"right\",\n", + " \"canLabel\": true\n", + " }\n", + "\n", + " ],\n", + " \"modelOutputs\": [\n", + " {\n", + " \"title\": \"Response A\",\n", + " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", + " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", + " },\n", + " {\n", + " \"title\": \"Response B\",\n", + " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", + " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "global_key = \"pairwise_shooping_asset\"\n", + "\n", + "# Upload data rows\n", + "convo_data = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", + " \"global_key\": global_key\n", + "}\n", + "\n", + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n", + "# Create a datarows\n", + "task = dataset.create_data_rows([convo_data])\n", + "task.wait_till_done()\n", + "print(\"Errors:\",task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 2: Create/select an Ontology" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create an ontology with relevant classifications\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " name=\"Choose the best response\",\n", + " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"Provide a reason for your choice\"\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\")\n", + " ]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope = lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")]\n", + " )\n", + " ])\n", + " ]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope = lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")]\n", + " )\n", + " ])\n", + " ]\n", + " )\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n", + "\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 3: Create a labeling project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n", + " media_type=lb.MediaType.Conversational)\n", + "\n", + "# Setup your ontology\n", + "project.setup_editor(ontology) # Connect your ontology and editor to your project" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Send a batch of data rows to the project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create a batch to send to your project\n", + "batch = project.create_batch(\n", + " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 5: Create the annotations payload" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "Python annotation" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data=lb_types.ConversationData(\n", + " global_key=global_key\n", + " ),\n", + " annotations=[\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation\n", + " ]\n", + " )\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "NDJSON annotation" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " ner_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " nested_radio_annotation_ndjson\n", + " ]:\n", + " annotations.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 6: Upload annotations to a project as pre-labels or complete labels " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Model Assisted Labeling (MAL)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label)\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job = lb.LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=\"label_import_job\"+str(uuid.uuid4()),\n", + " labels=label)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/annotation_import/conversational_LLM_data_generation.ipynb b/examples/annotation_import/conversational_LLM_data_generation.ipynb new file mode 100644 index 000000000..c07a7a0da --- /dev/null +++ b/examples/annotation_import/conversational_LLM_data_generation.ipynb @@ -0,0 +1,453 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# LLM Data Generation with MAL and Ground Truth\n", + "This demo is meant to showcase how to generate prompts and responses to fine-tune large language models (LLMs) using MAL and Ground truth" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install -q \"labelbox[data]\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Set up " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import uuid" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Replace with your API key" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Supported annotations for LLM data generation\n", + "Currently, we only support NDJson format for prompt and responses" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "## Prompt:" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Classification: Free-form text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "prompt_annotation_ndjson = {\n", + " \"name\": \"Follow the prompt and select answers\",\n", + " \"answer\": \"This is an example of a prompt\"\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Responses:" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Classification: Radio (single-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "response_radio_annotation_ndjson= {\n", + " \"name\": \"response_radio\",\n", + " \"answer\": {\n", + " \"name\": \"response_a\"\n", + " }\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Free-form text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Only NDJson is currently supported\n", + "response_text_annotation_ndjson = {\n", + " \"name\": \"Provide a reason for your choice\",\n", + " \"answer\": \"This is an example of a response text\"\n", + "}\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Checklist (multi-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "response_checklist_annotation_ndjson = {\n", + " \"name\": \"response_checklist\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"response_a\"\n", + " },\n", + " {\n", + " \"name\": \"response_c\"\n", + " }\n", + " ]\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 1: Create a project and data rows in Labelbox UI\n", + "\n", + "Currently we do not support this workflow through the SDK.\n", + "#### Workflow:\n", + "\n", + "1. Navigate to annotate and select ***New project***\n", + "\n", + "2. Select ***LLM data generation*** and then select ***Humans generate prompts and responses***\n", + "\n", + "3. Name your project, select ***create a new dataset*** and name your dataset. (data rows will be generated automatically in \n", + "this step)\n", + "\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Enter the project id\n", + "project_id = \"\"\n", + "\n", + "# Select one of the global keys from the data rows generated\n", + "global_key = \"\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 2 : Create/select an Ontology in Labelbox UI" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "Currently we do not support this workflow through the SDK\n", + "#### Workflow: \n", + "1. In your project, navigate to ***Settings*** and ***Label editor***\n", + "\n", + "2. Click on ***Edit***\n", + "\n", + "3. Create a new ontology and add the features used in this demo\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### For this demo the following ontology was generated in the UI: " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ontology_json = \"\"\"\n", + "{\n", + " \"tools\": [],\n", + " \"relationships\": [],\n", + " \"classifications\": [\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0002yt07zy0khq42rp\",\n", + " \"featureSchemaId\": \"clpvq9d0002ys07zyf2eo9p14\",\n", + " \"type\": \"prompt\",\n", + " \"name\": \"Follow the prompt and select answers\",\n", + " \"archived\": false,\n", + " \"required\": true,\n", + " \"options\": [],\n", + " \"instructions\": \"Follow the prompt and select answers\",\n", + " \"minCharacters\": 5,\n", + " \"maxCharacters\": 100\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0002yz07zy0fjg28z7\",\n", + " \"featureSchemaId\": \"clpvq9d0002yu07zy28ik5w3i\",\n", + " \"type\": \"response-radio\",\n", + " \"name\": \"response_radio\",\n", + " \"instructions\": \"response_radio\",\n", + " \"scope\": \"global\",\n", + " \"required\": true,\n", + " \"archived\": false,\n", + " \"options\": [\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0002yw07zyci2q5adq\",\n", + " \"featureSchemaId\": \"clpvq9d0002yv07zyevmz1yoj\",\n", + " \"value\": \"response_a\",\n", + " \"label\": \"response_a\",\n", + " \"position\": 0,\n", + " \"options\": []\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0002yy07zy8pe48zdj\",\n", + " \"featureSchemaId\": \"clpvq9d0002yx07zy0jvmdxk8\",\n", + " \"value\": \"response_b\",\n", + " \"label\": \"response_b\",\n", + " \"position\": 1,\n", + " \"options\": []\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0002z107zygf8l62ys\",\n", + " \"featureSchemaId\": \"clpvq9d0002z007zyg26115f9\",\n", + " \"type\": \"response-text\",\n", + " \"name\": \"provide_a_reason_for_your_choice\",\n", + " \"instructions\": \"Provide a reason for your choice\",\n", + " \"scope\": \"global\",\n", + " \"required\": true,\n", + " \"archived\": false,\n", + " \"options\": [],\n", + " \"minCharacters\": 5,\n", + " \"maxCharacters\": 100\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0102z907zy8b10hjcj\",\n", + " \"featureSchemaId\": \"clpvq9d0002z207zy6xla7f82\",\n", + " \"type\": \"response-checklist\",\n", + " \"name\": \"response_checklist\",\n", + " \"instructions\": \"response_checklist\",\n", + " \"scope\": \"global\",\n", + " \"required\": true,\n", + " \"archived\": false,\n", + " \"options\": [\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0102z407zy0adq0rfr\",\n", + " \"featureSchemaId\": \"clpvq9d0002z307zy6dqb8xsw\",\n", + " \"value\": \"response_a\",\n", + " \"label\": \"response_a\",\n", + " \"position\": 0,\n", + " \"options\": []\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0102z607zych8b2z5d\",\n", + " \"featureSchemaId\": \"clpvq9d0102z507zyfwfgacrn\",\n", + " \"value\": \"response_c\",\n", + " \"label\": \"response_c\",\n", + " \"position\": 1,\n", + " \"options\": []\n", + " },\n", + " {\n", + " \"schemaNodeId\": \"clpvq9d0102z807zy03y7gysp\",\n", + " \"featureSchemaId\": \"clpvq9d0102z707zyh61y5o3u\",\n", + " \"value\": \"response_d\",\n", + " \"label\": \"response_d\",\n", + " \"position\": 2,\n", + " \"options\": []\n", + " }\n", + " ]\n", + " }\n", + " ],\n", + " \"realTime\": false\n", + "}\n", + "\n", + "\"\"\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 3: Create the annotations payload" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " prompt_annotation_ndjson,\n", + " response_radio_annotation_ndjson,\n", + " response_text_annotation_ndjson,\n", + " response_checklist_annotation_ndjson\n", + " ]:\n", + " annotations.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Upload annotations to a project as pre-labels or complete labels" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "project = client.get_project(project_id=project_id)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Model Assisted Labeling (MAL)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label_ndjson)\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job = lb.LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=\"label_import_job\"+str(uuid.uuid4()),\n", + " labels=label_ndjson)\n", + "\n", + "upload_job.wait_until_done();\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb index 31137617d..061408fdb 100644 --- a/examples/annotation_import/text.ipynb +++ b/examples/annotation_import/text.ipynb @@ -39,9 +39,10 @@ "* Classification radio \n", "* Classification checklist \n", "* Classification free-form text \n", - "* Relationships (Only supported for MAL and through the SDK)\n", + "\n", "\n", "**Not** supported:\n", + "* Relationships\n", "* Segmentation mask\n", "* Polygon\n", "* Bounding box \n", @@ -294,70 +295,6 @@ "outputs": [], "execution_count": null }, - { - "metadata": {}, - "source": [ - "##### Relationship ##### \n", - "# only supported for MAL imports \n", - "ner_source = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.TextEntity(\n", - " start=133, \n", - " end=140\n", - " )\n", - ")\n", - "\n", - "ner_target = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.TextEntity(\n", - " start=143,\n", - " end=159\n", - " )\n", - ")\n", - "\n", - "ner_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=ner_source,\n", - " target=ner_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ))\n", - "\n", - "\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "entity_source_ndjson = {\n", - " \"name\": \"named_entity\",\n", - " \"uuid\": uuid_source,\n", - " \"location\": {\n", - " \"start\" : 133,\n", - " \"end\": 140 \n", - " }\n", - "}\n", - "\n", - "entity_target_ndjson = {\n", - " \"name\": \"named_entity\",\n", - " \"uuid\": uuid_target,\n", - " \"location\": {\n", - " \"start\": 143,\n", - " \"end\": 159\n", - " }\n", - "}\n", - "\n", - "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\"\n", - " }\n", - "}" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, { "metadata": {}, "source": [ @@ -472,10 +409,6 @@ " tool=lb.Tool.Type.NER, \n", " name=\"named_entity\"\n", " ),\n", - " lb.Tool( \n", - " tool=lb.Tool.Type.RELATIONSHIP,\n", - " name=\"relationship\"\n", - " )\n", " ]\n", ")\n", "\n", @@ -566,9 +499,6 @@ " radio_annotation, \n", " checklist_annotation, \n", " text_annotation,\n", - " ner_source,\n", - " ner_target,\n", - " ner_relationship,\n", " nested_checklist_annotation,\n", " nested_radio_annotation\n", " ]\n", @@ -596,9 +526,6 @@ " text_annotation_ndjson,\n", " nested_radio_annotation_ndjson,\n", " nested_checklist_annotation_ndjson,\n", - " entity_source_ndjson,\n", - " entity_target_ndjson,\n", - " ner_relationship_annotation_ndjson,\n", " ] :\n", " annotations.update({\n", " \"dataRow\": { \"globalKey\": global_key }\n", @@ -636,7 +563,7 @@ " name=\"mal_import_job\"+str(uuid.uuid4()), \n", " predictions=labels)\n", "\n", - "upload_job_mal.wait_until_done();\n", + "upload_job_mal.wait_until_done()\n", "print(\"Errors:\", upload_job_mal.errors)\n", "print(\"Status of uploads: \", upload_job_mal.statuses)" ], @@ -654,19 +581,16 @@ { "metadata": {}, "source": [ - "# Uncomment if relationships are not being imported. \n", - "# Relationships will be supported for label import in the near future. \n", - "\n", - "# # Upload label for this data row in project \n", - "# upload_job_label_import = lb.LabelImport.create_from_objects(\n", - "# client = client, \n", - "# project_id = project.uid, \n", - "# name=\"label_import_job\"+str(uuid.uuid4()), \n", - "# labels=labels)\n", + "# Upload label for this data row in project \n", + "upload_job_label_import = lb.LabelImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=\"label_import_job\"+str(uuid.uuid4()), \n", + " labels=labels)\n", "\n", - "# upload_job_label_import.wait_until_done();\n", - "# print(\"Errors:\", upload_job_label_import.errors)\n", - "# print(\"Status of uploads: \", upload_job_label_import.statuses)" + "upload_job_label_import.wait_until_done()\n", + "print(\"Errors:\", upload_job_label_import.errors)\n", + "print(\"Status of uploads: \", upload_job_label_import.statuses)" ], "cell_type": "code", "outputs": [], diff --git a/examples/basics/basics.ipynb b/examples/basics/basics.ipynb index 0504e14d2..5ddd46b1c 100644 --- a/examples/basics/basics.ipynb +++ b/examples/basics/basics.ipynb @@ -315,8 +315,6 @@ { "metadata": {}, "source": [ - "# Since the project we created only has batches, we can't query for datasets. \n", - "# sample_project_datasets = project.datasets() --> Run if project is in dataset mode\n", "sample_project_batches = project.batches()\n", "\n", "list(sample_project_batches)\n", diff --git a/examples/basics/datasets.ipynb b/examples/basics/datasets.ipynb index 70a853087..bdf6fda14 100644 --- a/examples/basics/datasets.ipynb +++ b/examples/basics/datasets.ipynb @@ -138,17 +138,6 @@ "outputs": [], "execution_count": null }, - { - "metadata": {}, - "source": [ - "# Attached projects\n", - "print(\"Projects with this dataset attached :\", list(dataset.projects()))\n", - "print(\"Dataset name :\", dataset.name)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, { "metadata": {}, "source": [ diff --git a/examples/integrations/detectron2/coco_object.ipynb b/examples/integrations/detectron2/coco_object.ipynb index 947b699cf..a905a5b4a 100644 --- a/examples/integrations/detectron2/coco_object.ipynb +++ b/examples/integrations/detectron2/coco_object.ipynb @@ -550,7 +550,7 @@ " client.get_labeling_frontends(where=lb.LabelingFrontend.name == 'editor'))\n", "project.setup(editor, labels_mal.get_ontology().asdict())\n", "project.enable_model_assisted_labeling()\n", - "project.datasets.connect(dataset)" + "project.create_batches_from_dataset(\"batch\", dataset.uid)" ], "cell_type": "code", "outputs": [], @@ -583,4 +583,4 @@ "execution_count": null } ] -} +} \ No newline at end of file diff --git a/examples/integrations/detectron2/coco_panoptic.ipynb b/examples/integrations/detectron2/coco_panoptic.ipynb index 186dd8f99..1754b7252 100644 --- a/examples/integrations/detectron2/coco_panoptic.ipynb +++ b/examples/integrations/detectron2/coco_panoptic.ipynb @@ -1014,7 +1014,7 @@ " for future in tqdm(as_completed(futures)):\n", " labels.append(future.result())\n", "\n", - "labels_mea = lb_types.LabelGenerator(labels)\n", + "labels_mea = lb_types.LabelGenerator(labels)\n", "labels_mea.add_url_to_masks(signer) \\\n", " .add_url_to_data(signer) " ], @@ -1317,7 +1317,7 @@ " client.get_labeling_frontends(where=lb.LabelingFrontend.name == 'editor'))\n", "project.setup(editor, labels_mal.get_ontology().asdict())\n", "project.enable_model_assisted_labeling()\n", - "project.datasets.connect(dataset)\n" + "project.create_batches_from_dataset(\"batch\", dataset.uid)" ], "cell_type": "code", "outputs": [], @@ -1366,4 +1366,4 @@ "execution_count": null } ] -} +} \ No newline at end of file diff --git a/examples/integrations/tlt/labelbox_upload.ipynb b/examples/integrations/tlt/labelbox_upload.ipynb index 3f2cd9a4e..8ed72394f 100644 --- a/examples/integrations/tlt/labelbox_upload.ipynb +++ b/examples/integrations/tlt/labelbox_upload.ipynb @@ -228,8 +228,8 @@ "source": [ "project = client.create_project(name = \"animal_demo_proj\", media_type=lb.MediaType.Image)\n", "dataset = client.create_dataset(name = \"animal_demo_ds\")\n", - "project.datasets.connect(dataset)\n", "dataset.create_data_rows(image_paths)\n", + "project.create_batches_from_dataset(\"batch\", dataset.uid)\n", "project.enable_model_assisted_labeling()" ], "cell_type": "code", @@ -272,7 +272,9 @@ { "metadata": {}, "source": [ - "datarows = [dr for dr in list(project.datasets())[0].data_rows()]" + "datarows = []\n", + "for batch in list(project.batches()):\n", + " datarows.extend(list(batch.export_data_rows()))" ], "cell_type": "code", "outputs": [], diff --git a/examples/llm_asset_import/conversational_MAL_GT.ipynb b/examples/llm_asset_import/conversational_MAL_GT.ipynb deleted file mode 100644 index a01a7a2af..000000000 --- a/examples/llm_asset_import/conversational_MAL_GT.ipynb +++ /dev/null @@ -1,386 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "\n", - " \n", - "\n", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n", - "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "!pip install -q \"labelbox[data]\"" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "import labelbox as lb\n", - "import uuid" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Replace with your API Key" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 1: Create annotations" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Create a gobal radio and text annotation\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"Choose the best response\",\n", - " \"answer\": {\n", - " \"name\": \"Response B\"\n", - " }\n", - "}\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"Provide a reason for your choice\",\n", - " \"answer\": \"This is the more concise answer\",\n", - "\n", - "}" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 2: Setup a project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "## Import data rows with \"modelOutputs\" into Catalog\n", - "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", - "\n", - "```\n", - "\"modelOutputs\" : [\n", - " {\n", - " \"title\": \"Name of the response option\",\n", - " \"content\": \"Content of the response\",\n", - " \"modelConfigName\": \"Name of model configuration\"\n", - " }\n", - "]\n", - "```\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "### Example of row_data with model outputs" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "pairwise_shopping_2 = \"\"\"\n", - " {\n", - " \"type\": \"application/vnd.labelbox.conversational\",\n", - " \"version\": 1,\n", - " \"messages\": [\n", - " {\n", - " \"messageId\": \"message-0\",\n", - " \"timestampUsec\": 1530718491,\n", - " \"content\": \"Hi! How can I help?\",\n", - " \"user\": {\n", - " \"userId\": \"Bot 002\",\n", - " \"name\": \"Bot\"\n", - " },\n", - " \"align\": \"left\",\n", - " \"canLabel\": false\n", - " },\n", - " {\n", - " \"messageId\": \"message-1\",\n", - " \"timestampUsec\": 1530718503,\n", - " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", - " \"user\": {\n", - " \"userId\": \"User 00686\",\n", - " \"name\": \"User\"\n", - " },\n", - " \"align\": \"right\",\n", - " \"canLabel\": true\n", - " }\n", - "\n", - " ],\n", - " \"modelOutputs\": [\n", - " {\n", - " \"title\": \"Response A\",\n", - " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", - " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", - " },\n", - " {\n", - " \"title\": \"Response B\",\n", - " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", - " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", - " }\n", - " ]\n", - "}\n", - "\"\"\"" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "\n", - "### Create dataset and data rows using a cloud hosted JSON file with \"modelOutputs\"" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Generate dummy global keys\n", - "global_key_1 = str(uuid.uuid4())\n", - "global_key_2 = str(uuid.uuid4())\n", - "global_key_3 = str(uuid.uuid4())\n", - "\n", - "# Create a dataset\n", - "dataset = client.create_dataset(\n", - " name=\"pairwise_demo_\"+str(uuid.uuid4()),\n", - " iam_integration=None\n", - ")\n", - "# Upload data rows\n", - "task = dataset.create_data_rows([\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_1.json\",\n", - " \"global_key\": global_key_1\n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", - " \"global_key\": global_key_2\n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_3.json\",\n", - " \"global_key\": global_key_3\n", - " }\n", - " ])\n", - "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)\n" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create/select an ontology" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Create an ontology with relevant classifications\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " name=\"Choose the best response\",\n", - " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"Provide a reason for your choice\"\n", - " )\n", - " ]\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n", - "\n" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create a labeling project and send a batch of data rows to the project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "project = client.create_project(name=\"Pairwise Conversational Text Demo\",\n", - " media_type=lb.MediaType.Conversational)\n", - "project.setup_editor(ontology)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "project.create_batch(\n", - " \"batch_conversational\",\n", - " global_keys=[global_key_1, global_key_2, global_key_3],\n", - " priority=5\n", - ")" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 3: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "Setup the payload with the annotations that were created in Step 1." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "labels = []\n", - "for key in [global_key_1, global_key_2, global_key_3]:\n", - " for ann in [radio_annotation_ndjson, text_annotation_ndjson]:\n", - " ann_copy = ann.copy()\n", - " ann_copy.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": key\n", - " }\n", - " })\n", - " labels.append(ann_copy)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Model Assisted Labeling (MAL)" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client = client,\n", - " project_id = project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=labels)\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Label Import" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "upload_job = lb.LabelImport.create_from_objects(\n", - " client = client,\n", - " project_id = project.uid,\n", - " name=\"label_import_job\"+str(uuid.uuid4()),\n", - " labels=labels)\n", - "\n", - "upload_job.wait_until_done();\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file diff --git a/examples/llm_asset_import/conversational_MEA.ipynb b/examples/llm_asset_import/conversational_MEA.ipynb deleted file mode 100644 index aa75e49ca..000000000 --- a/examples/llm_asset_import/conversational_MEA.ipynb +++ /dev/null @@ -1,366 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "\n", - " \n", - "\n", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# LLM pairwise comparison with Conversational text using Model\n", - "\n", - "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "!pip install \"labelbox[data]\" -q" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "import labelbox as lb\n", - "import uuid" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Replace with your API Key" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 1: Create annotations" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Create a gobal radio and text annotation\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"Choose the best response\",\n", - " \"answer\": {\n", - " \"name\": \"Response B\"\n", - " }\n", - "}\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"Provide a reason for your choice\",\n", - " \"answer\": \"This is the more concise answer\",\n", - "\n", - "}" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 2: Setup a model\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "## Import data rows with \"modelOutputs\" into Catalog\n", - "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", - "\n", - "```\n", - "\"modelOutputs\" : [\n", - " {\n", - " \"title\": \"Name of the response option\",\n", - " \"content\": \"Content of the response\",\n", - " \"modelConfigName\": \"Name of model configuration\"\n", - " }\n", - "]\n", - "```\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "### Example of row_data with model outputs" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "pairwise_shopping_2 = \"\"\"\n", - " {\n", - " \"type\": \"application/vnd.labelbox.conversational\",\n", - " \"version\": 1,\n", - " \"messages\": [\n", - " {\n", - " \"messageId\": \"message-0\",\n", - " \"timestampUsec\": 1530718491,\n", - " \"content\": \"Hi! How can I help?\",\n", - " \"user\": {\n", - " \"userId\": \"Bot 002\",\n", - " \"name\": \"Bot\"\n", - " },\n", - " \"align\": \"left\",\n", - " \"canLabel\": false\n", - " },\n", - " {\n", - " \"messageId\": \"message-1\",\n", - " \"timestampUsec\": 1530718503,\n", - " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", - " \"user\": {\n", - " \"userId\": \"User 00686\",\n", - " \"name\": \"User\"\n", - " },\n", - " \"align\": \"right\",\n", - " \"canLabel\": true\n", - " }\n", - "\n", - " ],\n", - " \"modelOutputs\": [\n", - " {\n", - " \"title\": \"Response A\",\n", - " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", - " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", - " },\n", - " {\n", - " \"title\": \"Response B\",\n", - " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", - " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", - " }\n", - " ]\n", - "}\n", - "\"\"\"" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "\n", - "### Create dataset and data rows using a cloud hosted JSON file with \"modelOutputs\"" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Generate dummy global keys\n", - "global_key_1 = str(uuid.uuid4())\n", - "global_key_2 = str(uuid.uuid4())\n", - "global_key_3 = str(uuid.uuid4())\n", - "\n", - "# Create a dataset\n", - "dataset = client.create_dataset(\n", - " name=\"pairwise_demo_\"+str(uuid.uuid4()),\n", - " iam_integration=None\n", - ")\n", - "# Upload data rows\n", - "task = dataset.create_data_rows([\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_1.json\",\n", - " \"global_key\": global_key_1\n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", - " \"global_key\": global_key_2\n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_3.json\",\n", - " \"global_key\": global_key_3\n", - " }\n", - " ])\n", - "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)\n" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create/select an ontology" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Create an ontology with relevant classifications\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " name=\"Choose the best response\",\n", - " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"Provide a reason for your choice\"\n", - " )\n", - " ]\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n", - "\n" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create a model and model run" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# create model\n", - "model = client.create_model(name=\"Comparison_model_run_\"+ str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create model run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "Upsert data rows" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key_1, global_key_2, global_key_3])" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Step 3: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "Setup the payload with the annotations that were created in Step 1." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "predictions = []\n", - "for key in [global_key_1, global_key_2, global_key_3]:\n", - " for ann in [radio_annotation_ndjson, text_annotation_ndjson]:\n", - " ann_copy = ann.copy()\n", - " ann_copy.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": key\n", - " }\n", - " })\n", - " predictions.append(ann_copy)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Upload predictions to model run" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n", - " predictions=predictions)\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file diff --git a/examples/model_diagnostics/custom_metrics_demo.ipynb b/examples/model_diagnostics/custom_metrics_demo.ipynb index 87d7df119..a4c73a581 100644 --- a/examples/model_diagnostics/custom_metrics_demo.ipynb +++ b/examples/model_diagnostics/custom_metrics_demo.ipynb @@ -334,7 +334,7 @@ "\n", "dataset = client.create_dataset(name=\"Mapillary Diagnostics Demo\")\n", "print(f\"Dataset Created: {dataset.uid}\")\n", - "project.datasets.connect(dataset)" + "project.create_batches_from_dataset(\"batch\", dataset.uid)" ], "cell_type": "code", "outputs": [], @@ -580,4 +580,4 @@ "execution_count": null } ] -} +} \ No newline at end of file diff --git a/examples/model_diagnostics/model_diagnostics_demo.ipynb b/examples/model_diagnostics/model_diagnostics_demo.ipynb index 190198b13..5c728f979 100644 --- a/examples/model_diagnostics/model_diagnostics_demo.ipynb +++ b/examples/model_diagnostics/model_diagnostics_demo.ipynb @@ -345,7 +345,7 @@ "\n", "dataset = client.create_dataset(name=\"Mapillary Diagnostics Demo\")\n", "print(f\"Dataset Created: {dataset.uid}\")\n", - "project.datasets.connect(dataset)" + "project.create_batches_from_dataset(\"batch\", dataset.uid)" ], "cell_type": "code", "outputs": [], @@ -549,4 +549,4 @@ "execution_count": null } ] -} +} \ No newline at end of file diff --git a/examples/prediction_upload/conversational_LLM_predictions.ipynb b/examples/prediction_upload/conversational_LLM_predictions.ipynb new file mode 100644 index 000000000..0056d525c --- /dev/null +++ b/examples/prediction_upload/conversational_LLM_predictions.ipynb @@ -0,0 +1,830 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "\n", + " \n", + "\n", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# LLM pairwise comparison with Conversational text using Model\n", + "\n", + "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis in the model product.\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "!pip install \"labelbox[data]\" -q" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Set up" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Replace with your API Key" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Supported annotations for conversational text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Entity" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ner_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " confidence=0.5,\n", + " value=lb_types.ConversationEntity(\n", + " start=0,\n", + " end=8,\n", + " message_id=\"message-1\"\n", + " )\n", + ")\n", + "\n", + "ner_prediction_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"confidence\": 0.5,\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"message-1\"\n", + " }" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Radio (single-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"Response B\", confidence=0.5)))\n", + "\n", + "\n", + "\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"Choose the best response\",\n", + " \"answer\": {\n", + " \"name\": \"Response B\",\n", + " \"confidence\": 0.5\n", + " }\n", + "}\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Free-form text" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\", confidence=0.5)\n", + ")\n", + "\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"Provide a reason for your choice\",\n", + " \"answer\": \"This is the more concise answer\",\n", + " \"confidence\": 0.5\n", + "\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Checklist (multi-choice)" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "checklist_prediction= lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\",\n", + " confidence=0.5\n", + " ),\n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\",\n", + " confidence=0.5\n", + " )\n", + " ]\n", + " ),\n", + " message_id=\"message-1\" # Message specific annotation\n", + " )\n", + "\n", + "\n", + "checklist_prediction_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\"name\": \"first_checklist_answer\",\"confidence\":0.5},\n", + " {\"name\": \"second_checklist_answer\",\"confidence\":0.5}\n", + " ],\n", + " \"messageId\": \"message-1\"\n", + "}" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Classification: Nested radio and checklist" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "# Message based\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5 # Confidence scores should be added to the answer\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", + "# Message based\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\": \"nested_checklist_question\",\n", + " \"messageId\": \"message-1\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\" : [\n", + " {\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", + " }\n", + " }\n", + " ]\n", + " }]\n", + "}\n", + "# Global\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5 # Confidence scores should be added to the answer\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")\n", + "#Global\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5,\n", + " \"classifications\": [{\n", + " \"name\":\"sub_radio_question\",\n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5}\n", + " }]\n", + " }\n", + "}\n", + "\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 1: Import data rows with \"modelOutputs\" into Catalog\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", + "\n", + "```\n", + "\"modelOutputs\" : [\n", + " {\n", + " \"title\": \"Name of the response option\",\n", + " \"content\": \"Content of the response\",\n", + " \"modelConfigName\": \"Name of model configuration\"\n", + " }\n", + "]\n", + "```\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Example of row_data with model outputs" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "pairwise_shopping_2 = \"\"\"\n", + " {\n", + " \"type\": \"application/vnd.labelbox.conversational\",\n", + " \"version\": 1,\n", + " \"messages\": [\n", + " {\n", + " \"messageId\": \"message-0\",\n", + " \"timestampUsec\": 1530718491,\n", + " \"content\": \"Hi! How can I help?\",\n", + " \"user\": {\n", + " \"userId\": \"Bot 002\",\n", + " \"name\": \"Bot\"\n", + " },\n", + " \"align\": \"left\",\n", + " \"canLabel\": false\n", + " },\n", + " {\n", + " \"messageId\": \"message-1\",\n", + " \"timestampUsec\": 1530718503,\n", + " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", + " \"user\": {\n", + " \"userId\": \"User 00686\",\n", + " \"name\": \"User\"\n", + " },\n", + " \"align\": \"right\",\n", + " \"canLabel\": true\n", + " }\n", + "\n", + " ],\n", + " \"modelOutputs\": [\n", + " {\n", + " \"title\": \"Response A\",\n", + " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", + " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", + " },\n", + " {\n", + " \"title\": \"Response B\",\n", + " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", + " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "global_key = \"pairwise_shooping_asset\"\n", + "convo_data = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", + " \"global_key\": global_key\n", + "}\n", + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n", + "# Create a datarows\n", + "task = dataset.create_data_rows([convo_data])\n", + "print(\"Errors:\",task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 2: Create/select an Ontology for your model predictions" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Create an ontology with relevant classifications\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " name=\"Choose the best response\",\n", + " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"Provide a reason for your choice\"\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\")\n", + " ]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope = lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")]\n", + " )\n", + " ])\n", + " ]\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope = lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")]\n", + " )\n", + " ])\n", + " ]\n", + " )\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n", + "\n" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 3: Create a Model and Model Run" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# create model\n", + "model = client.create_model(name=\"Comparison_model_run_\"+ str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create model run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Send data rows to the Model run" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Step 5: Create the predictions payload" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_prediction = []\n", + "label_prediction.append(lb_types.Label(\n", + " data=lb_types.ConversationData(\n", + " global_key=global_key\n", + " ),\n", + " annotations= [\n", + " ner_prediction,\n", + " text_prediction,\n", + " checklist_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " nested_checklist_prediction\n", + " ]\n", + "))" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "Setup the payload with the annotations that were created in Step 1." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " ner_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " nested_radio_prediction_ndjson\n", + " ]:\n", + " annotations.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 6: Upload the predictions payload to the Model Run" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n", + " predictions=label_prediction)\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 7: Send annotations to the Model Run " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "7.1 Create a labelbox project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "project = client.create_project(name=\"Conversational Human Evaluation Demo\",\n", + " media_type=lb.MediaType.Conversational)\n", + "project.setup_editor(ontology)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "7.2 Create a batch to send to the project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "project.create_batch(\n", + " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5 # priority between 1(Highest) - 5(lowest)\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "7.3 Create the annotations payload" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(\n", + " start=0,\n", + " end=8,\n", + " message_id=\"message-1\"\n", + " )\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"Response B\"))\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\")\n", + ")\n", + "\n", + "checklist_annotation= lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(\n", + " answer = [\n", + " lb_types.ClassificationAnswer(\n", + " name = \"first_checklist_answer\"\n", + " ),\n", + " lb_types.ClassificationAnswer(\n", + " name = \"second_checklist_answer\"\n", + " )\n", + " ]\n", + " ),\n", + " message_id=\"message-1\" # Message specific annotation\n", + " )\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\"\n", + " )]\n", + " ))\n", + " ]\n", + " )]\n", + " )\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\"\n", + " )\n", + " )\n", + " )\n", + " ]\n", + " )\n", + " )\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "7.4 Create the label object" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "label_annotation = []\n", + "label_annotation.append(lb_types.Label(\n", + " data=lb_types.ConversationData(\n", + " global_key=global_key\n", + " ),\n", + " annotations= [\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation\n", + " ]\n", + "))" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "7.5 Upload annotations to the project using Label Import" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=\"label_import_job\"+ str(uuid.uuid4()),\n", + " labels=label_annotation)\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "7.6 Send the annotations to the Model Run" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Option deletions for cleanup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/labelbox/__init__.py b/labelbox/__init__.py index 09202fb96..ab617b4f1 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,6 +1,6 @@ name = "labelbox" -__version__ = "3.57.0" +__version__ = "3.58.0" from labelbox.client import Client from labelbox.schema.project import Project @@ -33,4 +33,4 @@ from labelbox.schema.slice import Slice, CatalogSlice, ModelSlice from labelbox.schema.queue_mode import QueueMode from labelbox.schema.task_queue import TaskQueue -from labelbox.schema.identifiables import UniqueIds, GlobalKeys +from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds diff --git a/labelbox/client.py b/labelbox/client.py index 3b6337bd1..87c85d732 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -1,48 +1,54 @@ # type: ignore -from datetime import datetime, timezone import json -from typing import Any, List, Dict, Union -from collections import defaultdict - import logging import mimetypes import os -import time +import random import sys +import time import urllib.parse +from collections import defaultdict +from datetime import datetime, timezone +from typing import Any, List, Dict, Union, Optional -from google.api_core import retry import requests import requests.exceptions +from google.api_core import retry import labelbox.exceptions -from labelbox import utils from labelbox import __version__ as SDK_VERSION +from labelbox import utils from labelbox.orm import query from labelbox.orm.db_object import DbObject from labelbox.orm.model import Entity from labelbox.pagination import PaginatedCollection +from labelbox.schema import role +from labelbox.schema.conflict_resolution_strategy import ConflictResolutionStrategy +from labelbox.schema.data_row import DataRow from labelbox.schema.data_row_metadata import DataRowMetadataOntology from labelbox.schema.dataset import Dataset -from labelbox.schema.data_row import DataRow from labelbox.schema.enums import CollectionJobStatus +from labelbox.schema.foundry.foundry_client import FoundryClient from labelbox.schema.iam_integration import IAMIntegration -from labelbox.schema import role +from labelbox.schema.identifiables import DataRowIds +from labelbox.schema.identifiables import GlobalKeys from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.media_type import MediaType, get_media_type_validation_error from labelbox.schema.model import Model from labelbox.schema.model_run import ModelRun -from labelbox.schema.ontology import Ontology, Tool, Classification, FeatureSchema +from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult +from labelbox.schema.ontology import Tool, Classification, FeatureSchema from labelbox.schema.organization import Organization +from labelbox.schema.project import Project from labelbox.schema.quality_mode import QualityMode, BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, \ BENCHMARK_AUTO_AUDIT_PERCENTAGE, CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS, CONSENSUS_AUTO_AUDIT_PERCENTAGE -from labelbox.schema.user import User -from labelbox.schema.project import Project +from labelbox.schema.queue_mode import QueueMode from labelbox.schema.role import Role +from labelbox.schema.send_to_annotate_params import SendToAnnotateFromCatalogParams, build_destination_task_queue_input, \ + build_predictions_input, build_annotations_input from labelbox.schema.slice import CatalogSlice, ModelSlice -from labelbox.schema.queue_mode import QueueMode -from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult - -from labelbox.schema.media_type import MediaType, get_media_type_validation_error +from labelbox.schema.task import Task +from labelbox.schema.user import User logger = logging.getLogger(__name__) @@ -1786,3 +1792,152 @@ def get_batch(self, project_id: str, batch_id: str) -> Entity.Batch: experimental=True)["project"]["batches"]["nodes"][0] return Entity.Batch(self, project_id, batch) + + def send_to_annotate_from_catalog(self, destination_project_id: str, + task_queue_id: Optional[str], + batch_name: str, + data_rows: Union[DataRowIds, GlobalKeys], + params: SendToAnnotateFromCatalogParams): + """ + Sends data rows from catalog to a specified project for annotation. + + Example usage: + >>> task = client.send_to_annotate_from_catalog( + >>> destination_project_id=DESTINATION_PROJECT_ID, + >>> task_queue_id=TASK_QUEUE_ID, + >>> batch_name="batch_name", + >>> data_rows=UniqueIds([DATA_ROW_ID]), + >>> params={ + >>> "source_project_id": + >>> SOURCE_PROJECT_ID, + >>> "override_existing_annotations_rule": + >>> ConflictResolutionStrategy.OverrideWithAnnotations + >>> }) + >>> task.wait_till_done() + + Args: + destination_project_id: The ID of the project to send the data rows to. + task_queue_id: The ID of the task queue to send the data rows to. If not specified, the data rows will be + sent to the Done workflow state. + batch_name: The name of the batch to create. If more than one batch is created, additional batches will be + named with a monotonically increasing numerical suffix, starting at "_1". + data_rows: The data rows to send to the project. + params: Additional parameters to configure the job. See SendToAnnotateFromCatalogParams for more details. + + Returns: The created task for this operation. + + """ + + mutation_str = """mutation SendToAnnotateFromCatalogPyApi($input: SendToAnnotateFromCatalogInput!) { + sendToAnnotateFromCatalog(input: $input) { + taskId + } + } + """ + + destination_task_queue = build_destination_task_queue_input( + task_queue_id) + data_rows_query = self.build_catalog_query(data_rows) + + source_model_run_id = params.get("source_model_run_id", None) + predictions_ontology_mapping = params.get( + "predictions_ontology_mapping", None) + predictions_input = build_predictions_input( + predictions_ontology_mapping, + source_model_run_id) if source_model_run_id else None + + source_project_id = params.get("source_project_id", None) + annotations_ontology_mapping = params.get( + "annotations_ontology_mapping", None) + annotations_input = build_annotations_input( + annotations_ontology_mapping, + source_project_id) if source_project_id else None + + batch_priority = params.get("batch_priority", 5) + exclude_data_rows_in_project = params.get( + "exclude_data_rows_in_project", False) + override_existing_annotations_rule = params.get( + "override_existing_annotations_rule", + ConflictResolutionStrategy.KeepExisting) + + res = self.execute( + mutation_str, { + "input": { + "destinationProjectId": + destination_project_id, + "batchInput": { + "batchName": batch_name, + "batchPriority": batch_priority + }, + "destinationTaskQueue": + destination_task_queue, + "excludeDataRowsInProject": + exclude_data_rows_in_project, + "annotationsInput": + annotations_input, + "predictionsInput": + predictions_input, + "conflictLabelsResolutionStrategy": + override_existing_annotations_rule, + "searchQuery": { + "scope": None, + "query": [data_rows_query] + }, + "ordering": { + "type": "RANDOM", + "random": { + "seed": random.randint(0, 10000) + }, + "sorting": None + }, + "sorting": + None, + "limit": + None + } + })['sendToAnnotateFromCatalog'] + + return Entity.Task.get_task(self, res['taskId']) + + @staticmethod + def build_catalog_query(data_rows: Union[DataRowIds, GlobalKeys]): + """ + Given a list of data rows, builds a query that can be used to fetch the associated data rows from the catalog. + + Args: + data_rows: A list of data rows. Can be either UniqueIds or GlobalKeys. + + Returns: A query that can be used to fetch the associated data rows from the catalog. + + """ + if isinstance(data_rows, DataRowIds): + data_rows_query = { + "type": "data_row_id", + "operator": "is", + "ids": list(data_rows) + } + elif isinstance(data_rows, GlobalKeys): + data_rows_query = { + "type": "global_key", + "operator": "is", + "ids": list(data_rows) + } + else: + raise ValueError( + f"Invalid data_rows type {type(data_rows)}. Type of data_rows must be DataRowIds or GlobalKey" + ) + return data_rows_query + + def run_foundry_app(self, model_run_name: str, data_rows: Union[DataRowIds, + GlobalKeys], + app_id: str) -> Task: + """ + Run a foundry app + + Args: + model_run_name (str): Name of a new model run to store app predictions in + data_rows (DataRowIds or GlobalKeys): Data row identifiers to run predictions on + app_id (str): Foundry app to run predictions with + """ + foundry_client = FoundryClient(self) + return foundry_client.run_app(model_run_name, data_rows, app_id) diff --git a/labelbox/data/annotation_types/__init__.py b/labelbox/data/annotation_types/__init__.py index e085699f8..45a78bd02 100644 --- a/labelbox/data/annotation_types/__init__.py +++ b/labelbox/data/annotation_types/__init__.py @@ -42,6 +42,9 @@ from .data import MaskData from .data import TextData from .data import VideoData +from .data import LlmPromptResponseCreationData +from .data import LlmPromptCreationData +from .data import LlmResponseCreationData from .label import Label from .collection import LabelList diff --git a/labelbox/data/annotation_types/data/__init__.py b/labelbox/data/annotation_types/data/__init__.py index ad17ec0d6..99978caac 100644 --- a/labelbox/data/annotation_types/data/__init__.py +++ b/labelbox/data/annotation_types/data/__init__.py @@ -6,4 +6,7 @@ from .raster import ImageData from .raster import MaskData from .text import TextData -from .video import VideoData \ No newline at end of file +from .video import VideoData +from .llm_prompt_response_creation import LlmPromptResponseCreationData +from .llm_prompt_creation import LlmPromptCreationData +from .llm_response_creation import LlmResponseCreationData \ No newline at end of file diff --git a/labelbox/data/annotation_types/data/llm_prompt_creation.py b/labelbox/data/annotation_types/data/llm_prompt_creation.py new file mode 100644 index 000000000..4fd788f1a --- /dev/null +++ b/labelbox/data/annotation_types/data/llm_prompt_creation.py @@ -0,0 +1,7 @@ +from labelbox.typing_imports import Literal +from labelbox.utils import _NoCoercionMixin +from .base_data import BaseData + + +class LlmPromptCreationData(BaseData, _NoCoercionMixin): + class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" \ No newline at end of file diff --git a/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/labelbox/data/annotation_types/data/llm_prompt_response_creation.py new file mode 100644 index 000000000..2bad75f6d --- /dev/null +++ b/labelbox/data/annotation_types/data/llm_prompt_response_creation.py @@ -0,0 +1,8 @@ +from labelbox.typing_imports import Literal +from labelbox.utils import _NoCoercionMixin +from .base_data import BaseData + + +class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): + class_name: Literal[ + "LlmPromptResponseCreationData"] = "LlmPromptResponseCreationData" \ No newline at end of file diff --git a/labelbox/data/annotation_types/data/llm_response_creation.py b/labelbox/data/annotation_types/data/llm_response_creation.py new file mode 100644 index 000000000..43c604e34 --- /dev/null +++ b/labelbox/data/annotation_types/data/llm_response_creation.py @@ -0,0 +1,7 @@ +from labelbox.typing_imports import Literal +from labelbox.utils import _NoCoercionMixin +from .base_data import BaseData + + +class LlmResponseCreationData(BaseData, _NoCoercionMixin): + class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" \ No newline at end of file diff --git a/labelbox/data/annotation_types/label.py b/labelbox/data/annotation_types/label.py index 457180854..a7009e3de 100644 --- a/labelbox/data/annotation_types/label.py +++ b/labelbox/data/annotation_types/label.py @@ -10,7 +10,7 @@ from .annotation import ClassificationAnnotation, ObjectAnnotation from .relationship import RelationshipAnnotation from .classification import ClassificationAnswer -from .data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, MaskData, TextData, VideoData +from .data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, MaskData, TextData, VideoData, LlmPromptCreationData, LlmPromptResponseCreationData, LlmResponseCreationData from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .types import Cuid @@ -19,7 +19,9 @@ from ..ontology import get_feature_schema_lookup DataType = Union[VideoData, ImageData, TextData, TiledImageData, AudioData, - ConversationData, DicomData, DocumentData, HTMLData] + ConversationData, DicomData, DocumentData, HTMLData, + LlmPromptCreationData, LlmPromptResponseCreationData, + LlmResponseCreationData] class Label(BaseModel): diff --git a/labelbox/schema/conflict_resolution_strategy.py b/labelbox/schema/conflict_resolution_strategy.py new file mode 100644 index 000000000..fd303fd5f --- /dev/null +++ b/labelbox/schema/conflict_resolution_strategy.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class ConflictResolutionStrategy(str, Enum): + KeepExisting = "KEEP_EXISTING" + OverrideWithAnnotations = "OVERRIDE_WITH_ANNOTATIONS" + OverrideWithPredictions = "OVERRIDE_WITH_PREDICTIONS" + + @staticmethod + def from_str(label: str) -> "ConflictResolutionStrategy": + return ConflictResolutionStrategy[label] diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 69c37dfaf..b563c9f1f 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -44,7 +44,6 @@ class Dataset(DbObject, Updateable, Deletable): created_at (datetime) row_count (int): The number of rows in the dataset. Fetch the dataset again to update since this is cached. - projects (Relationship): `ToMany` relationship to Project created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization @@ -56,12 +55,6 @@ class Dataset(DbObject, Updateable, Deletable): row_count = Field.Int("row_count") # Relationships - projects = Relationship.ToMany( - "Project", - True, - deprecation_warning= - "This method does not return any data for batch-based projects and it will be deprecated on or around November 1, 2023." - ) created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization", False) iam_integration = Relationship.ToOne("IAMIntegration", False, diff --git a/labelbox/schema/export_task.py b/labelbox/schema/export_task.py index 92ecc7b98..f06c14100 100644 --- a/labelbox/schema/export_task.py +++ b/labelbox/schema/export_task.py @@ -463,7 +463,7 @@ class ExportTask: for accessing task details such as UID, status, and creation time. """ - class TaskNotReadyException(Exception): + class ExportTaskException(Exception): """Raised when the task is not ready yet.""" def __init__(self, task: Task) -> None: @@ -554,16 +554,20 @@ def _get_metadata_header( def get_total_file_size(self, stream_type: StreamType) -> Union[int, None]: """Returns the total file size for a specific task.""" - if not self._task.status in ["COMPLETE", "FAILED"]: - raise ExportTask.TaskNotReadyException("Task is not ready yet") + if self._task.status == "FAILED": + raise ExportTask.ExportTaskException("Task failed") + if self._task.status != "COMPLETE": + raise ExportTask.ExportTaskException("Task is not ready yet") header = ExportTask._get_metadata_header(self._task.client, self._task.uid, stream_type) return header.total_size if header else None def get_total_lines(self, stream_type: StreamType) -> Union[int, None]: """Returns the total file size for a specific task.""" - if not self._task.status in ["COMPLETE", "FAILED"]: - raise ExportTask.TaskNotReadyException("Task is not ready yet") + if self._task.status == "FAILED": + raise ExportTask.ExportTaskException("Task failed") + if self._task.status != "COMPLETE": + raise ExportTask.ExportTaskException("Task is not ready yet") header = ExportTask._get_metadata_header(self._task.client, self._task.uid, stream_type) return header.total_lines if header else None @@ -600,8 +604,10 @@ def get_stream( stream_type: StreamType = StreamType.RESULT, ) -> Stream: """Returns the result of the task.""" - if not self._task.status in ["COMPLETE", "FAILED"]: - raise ExportTask.TaskNotReadyException("Task is not ready yet") + if self._task.status == "FAILED": + raise ExportTask.ExportTaskException("Task failed") + if self._task.status != "COMPLETE": + raise ExportTask.ExportTaskException("Task is not ready yet") metadata_header = self._get_metadata_header(self._task.client, self._task.uid, stream_type) diff --git a/labelbox/schema/foundry/__init__.py b/labelbox/schema/foundry/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/labelbox/schema/foundry/app.py b/labelbox/schema/foundry/app.py new file mode 100644 index 000000000..67536d0b0 --- /dev/null +++ b/labelbox/schema/foundry/app.py @@ -0,0 +1,23 @@ +from labelbox.utils import _CamelCaseMixin + +from pydantic import BaseModel + +from typing import Any, Dict, Optional + + +class App(_CamelCaseMixin, BaseModel): + id: Optional[str] + model_id: str + name: str + description: str + inference_params: Dict[str, Any] + class_to_schema_id: Dict[str, str] + ontology_id: str + created_by: Optional[str] = None + + @classmethod + def type_name(cls): + return "App" + + +APP_FIELD_NAMES = list(App.schema()['properties'].keys()) diff --git a/labelbox/schema/foundry/foundry_client.py b/labelbox/schema/foundry/foundry_client.py new file mode 100644 index 000000000..50df486d4 --- /dev/null +++ b/labelbox/schema/foundry/foundry_client.py @@ -0,0 +1,108 @@ +from typing import Union +from labelbox import exceptions +from labelbox.schema.foundry.app import App, APP_FIELD_NAMES +from labelbox.schema.foundry.model import Model, MODEL_FIELD_NAMES +from labelbox.schema.identifiables import DataRowIds, GlobalKeys +from labelbox.schema.task import Task + + +class FoundryClient: + + def __init__(self, client): + self.client = client + + def _create_app(self, app: App) -> App: + field_names_str = "\n".join(APP_FIELD_NAMES) + query_str = f""" + mutation CreateDataRowAttachmentPyApi( + $name: String!, $modelId: ID!, $ontologyId: ID!, $description: String, $inferenceParams: Json!, $classToSchemaId: Json! + ){{ + createModelFoundryApp(input: {{ + name: $name + modelId: $modelId + ontologyId: $ontologyId + description: $description + inferenceParams: $inferenceParams + classToSchemaId: $classToSchemaId + }}) + {{ + {field_names_str} + }} + }} + """ + + params = app.dict(by_alias=True, exclude={"id"}) + + try: + response = self.client.execute(query_str, params) + except exceptions.LabelboxError as e: + raise exceptions.LabelboxError('Unable to create app', e) + return App(**response["createModelFoundryApp"]) + + def _get_app(self, id: str) -> App: + field_names_str = "\n".join(APP_FIELD_NAMES) + + query_str = f""" + query GetFoundryAppByIdPyApi($id: ID!) {{ + findModelFoundryApp(where: {{id: $id}}) {{ + {field_names_str} + }} + }} + """ + params = {"id": id} + + try: + response = self.client.execute(query_str, params) + except exceptions.InvalidQueryError as e: + raise exceptions.ResourceNotFoundError(App, params) + except Exception as e: + raise exceptions.LabelboxError(f'Unable to get app with id {id}', e) + return App(**response["findModelFoundryApp"]) + + def _delete_app(self, id: str) -> None: + query_str = """ + mutation DeleteFoundryAppPyApi($id: ID!) { + deleteModelFoundryApp(id: $id) { + success + } + } + """ + params = {"id": id} + try: + self.client.execute(query_str, params) + except Exception as e: + raise exceptions.LabelboxError(f'Unable to delete app with id {id}', + e) + + def run_app(self, model_run_name: str, + data_rows: Union[DataRowIds, GlobalKeys], app_id: str) -> Task: + app = self._get_app(app_id) + + data_rows_query = self.client.build_catalog_query(data_rows) + + params = { + "modelId": str(app.model_id), + "name": model_run_name, + "classToSchemaId": app.class_to_schema_id, + "inferenceParams": app.inference_params, + "searchQuery": { + "query": [data_rows_query], + "scope": None + }, + "ontologyId": app.ontology_id + } + + query = """ + mutation CreateModelJobPyApi($input: CreateModelJobInput!) { + createModelJob(input: $input) { + taskId + __typename + } + } + """ + try: + response = self.client.execute(query, {"input": params}) + except Exception as e: + raise exceptions.LabelboxError('Unable to run foundry app', e) + task_id = response["createModelJob"]["taskId"] + return Task.get_task(self.client, task_id) diff --git a/labelbox/schema/foundry/model.py b/labelbox/schema/foundry/model.py new file mode 100644 index 000000000..3e7ebd6e7 --- /dev/null +++ b/labelbox/schema/foundry/model.py @@ -0,0 +1,18 @@ +from labelbox.utils import _CamelCaseMixin + +from pydantic import BaseModel + +from datetime import datetime +from typing import Dict + + +class Model(_CamelCaseMixin, BaseModel): + id: str + description: str + inference_params_json_schema: Dict + name: str + ontology_id: str + created_at: datetime + + +MODEL_FIELD_NAMES = list(Model.schema()['properties'].keys()) diff --git a/labelbox/schema/identifiables.py b/labelbox/schema/identifiables.py index f83ecd559..f7cd97b49 100644 --- a/labelbox/schema/identifiables.py +++ b/labelbox/schema/identifiables.py @@ -47,4 +47,6 @@ def __init__(self, iterable: List[str]): super().__init__(iterable, IdType.GlobalKey) +DataRowIds = UniqueIds + DataRowIdentifiers = Union[UniqueIds, GlobalKeys] diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py index 62b11db36..564df9694 100644 --- a/labelbox/schema/model_run.py +++ b/labelbox/schema/model_run.py @@ -1,22 +1,26 @@ # type: ignore -from typing import TYPE_CHECKING, Dict, Iterable, Union, List, Optional, Any -from pathlib import Path +import logging import os import time -import logging -import requests import warnings -from labelbox import parser from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, Dict, Iterable, Union, List, Optional, Any -from labelbox.pagination import PaginatedCollection -from labelbox.orm.query import results_query_part -from labelbox.orm.model import Field, Relationship, Entity +import requests + +from labelbox import parser from labelbox.orm.db_object import DbObject, experimental +from labelbox.orm.model import Field, Relationship, Entity +from labelbox.orm.query import results_query_part +from labelbox.pagination import PaginatedCollection +from labelbox.schema.conflict_resolution_strategy import ConflictResolutionStrategy from labelbox.schema.export_params import ModelRunExportParams from labelbox.schema.export_task import ExportTask +from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds +from labelbox.schema.send_to_annotate_params import SendToAnnotateFromModelParams, build_destination_task_queue_input, \ + build_predictions_input from labelbox.schema.task import Task -from labelbox.schema.user import User if TYPE_CHECKING: from labelbox import MEAPredictionImport @@ -170,7 +174,7 @@ def _wait_until_done(self, status_fn, timeout_seconds=120, sleep_time=5): if res['status'] == 'COMPLETE': return True elif res['status'] == 'FAILED': - raise Exception(f"Job failed. Details : {res['errorMessage']}") + raise Exception(f"Job failed.") timeout_seconds -= sleep_time if timeout_seconds <= 0: raise TimeoutError( @@ -565,6 +569,84 @@ def export_v2( task_id = res["taskId"] return Task.get_task(self.client, task_id) + def send_to_annotate_from_model( + self, destination_project_id: str, task_queue_id: Optional[str], + batch_name: str, data_rows: Union[DataRowIds, GlobalKeys], + params: SendToAnnotateFromModelParams) -> Task: + """ + Sends data rows from a model run to a project for annotation. + + Example Usage: + >>> task = model_run.send_to_annotate_from_model( + >>> destination_project_id=DESTINATION_PROJECT_ID, + >>> batch_name="batch", + >>> data_rows=UniqueIds([DATA_ROW_ID]), + >>> task_queue_id=TASK_QUEUE_ID, + >>> params={}) + >>> task.wait_till_done() + + Args: + destination_project_id: The ID of the project to send the data rows to. + task_queue_id: The ID of the task queue to send the data rows to. If not specified, the data rows will be + sent to the Done workflow state. + batch_name: The name of the batch to create. If more than one batch is created, additional batches will be + named with a monotonically increasing numerical suffix, starting at "_1". + data_rows: The data rows to send to the project. + params: Additional parameters for this operation. See SendToAnnotateFromModelParams for details. + + Returns: The created task for this operation. + + """ + + mutation_str = """mutation SendToAnnotateFromMeaPyApi($input: SendToAnnotateFromMeaInput!) { + sendToAnnotateFromMea(input: $input) { + taskId + } + } + """ + + destination_task_queue = build_destination_task_queue_input( + task_queue_id) + data_rows_query = self.client.build_catalog_query(data_rows) + + predictions_ontology_mapping = params.get( + "predictions_ontology_mapping", None) + predictions_input = build_predictions_input( + predictions_ontology_mapping, self.uid) + + batch_priority = params.get("batch_priority", 5) + exclude_data_rows_in_project = params.get( + "exclude_data_rows_in_project", False) + override_existing_annotations_rule = params.get( + "override_existing_annotations_rule", + ConflictResolutionStrategy.KeepExisting) + res = self.client.execute( + mutation_str, { + "input": { + "destinationProjectId": + destination_project_id, + "batchInput": { + "batchName": batch_name, + "batchPriority": batch_priority + }, + "destinationTaskQueue": + destination_task_queue, + "excludeDataRowsInProject": + exclude_data_rows_in_project, + "annotationsInput": + None, + "predictionsInput": + predictions_input, + "conflictLabelsResolutionStrategy": + override_existing_annotations_rule, + "searchQuery": [data_rows_query], + "sourceModelRunId": + self.uid + } + })['sendToAnnotateFromMea'] + + return Entity.Task.get_task(self.client, res['taskId']) + class ModelRunDataRow(DbObject): label_id = Field.String("label_id") diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index ad00a9c8c..648b84a37 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -61,7 +61,6 @@ class Project(DbObject, Updateable, Deletable): auto_audit_number_of_labels (int) auto_audit_percentage (float) - datasets (Relationship): `ToMany` relationship to Dataset created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization labeling_frontend (Relationship): `ToOne` relationship to LabelingFrontend @@ -86,12 +85,6 @@ class Project(DbObject, Updateable, Deletable): media_type = Field.Enum(MediaType, "media_type", "allowedMediaType") # Relationships - datasets = Relationship.ToMany( - "Dataset", - True, - deprecation_warning= - "This method does not return any data for batch-based projects and it will be deprecated on or around November 1, 2023." - ) created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization", False) labeling_frontend = Relationship.ToOne("LabelingFrontend") diff --git a/labelbox/schema/send_to_annotate_params.py b/labelbox/schema/send_to_annotate_params.py new file mode 100644 index 000000000..daf0bff9c --- /dev/null +++ b/labelbox/schema/send_to_annotate_params.py @@ -0,0 +1,94 @@ +import sys + +from typing import Optional, Dict + +from labelbox.schema.conflict_resolution_strategy import ConflictResolutionStrategy + +if sys.version_info >= (3, 8): + from typing import TypedDict +else: + from typing_extensions import TypedDict + + +class SendToAnnotateFromCatalogParams(TypedDict): + """ + Extra parameters for sending data rows to a project through catalog. At least one of source_model_run_id or + source_project_id must be provided. + + :param source_model_run_id: Optional[str] - The model run to use for predictions. Defaults to None. + :param predictions_ontology_mapping: Optional[Dict[str, str]] - A mapping of feature schema ids to feature schema + ids. Defaults to an empty dictionary. + :param source_project_id: Optional[str] - The project to use for predictions. Defaults to None. + :param annotations_ontology_mapping: Optional[Dict[str, str]] - A mapping of feature schema ids to feature schema + ids. Defaults to an empty dictionary. + :param exclude_data_rows_in_project: Optional[bool] - Exclude data rows that are already in the project. Defaults + to False. + :param override_existing_annotations_rule: Optional[ConflictResolutionStrategy] - The strategy defining how to + handle conflicts in classifications between the data rows that already exist in the project and incoming + predictions from the source model run or annotations from the source project. Defaults to + ConflictResolutionStrategy.KEEP_EXISTING. + :param batch_priority: Optional[int] - The priority of the batch. Defaults to 5. + """ + + source_model_run_id: Optional[str] + predictions_ontology_mapping: Optional[Dict[str, str]] + source_project_id: Optional[str] + annotations_ontology_mapping: Optional[Dict[str, str]] + exclude_data_rows_in_project: Optional[bool] + override_existing_annotations_rule: Optional[ConflictResolutionStrategy] + batch_priority: Optional[int] + + +class SendToAnnotateFromModelParams(TypedDict): + """ + Extra parameters for sending data rows to a project through a model run. + + :param predictions_ontology_mapping: Dict[str, str] - A mapping of feature schema ids to feature schema ids. + Defaults to an empty dictionary. + :param exclude_data_rows_in_project: Optional[bool] - Exclude data rows that are already in the project. Defaults + to False. + :param override_existing_annotations_rule: Optional[ConflictResolutionStrategy] - The strategy defining how to + handle conflicts in classifications between the data rows that already exist in the project and incoming + predictions from the source model run. Defaults to ConflictResolutionStrategy.KEEP_EXISTING. + :param batch_priority: Optional[int] - The priority of the batch. Defaults to 5. + """ + + predictions_ontology_mapping: Dict[str, str] + exclude_data_rows_in_project: Optional[bool] + override_existing_annotations_rule: Optional[ConflictResolutionStrategy] + batch_priority: Optional[int] + + +def build_annotations_input(project_ontology_mapping: Optional[Dict[str, str]], + source_project_id: str): + return { + "projectId": + source_project_id, + "featureSchemaIdsMapping": + project_ontology_mapping if project_ontology_mapping else {}, + } + + +def build_destination_task_queue_input(task_queue_id: str): + destination_task_queue = { + "type": "id", + "value": task_queue_id + } if task_queue_id else { + "type": "done" + } + return destination_task_queue + + +def build_predictions_input(model_run_ontology_mapping: Optional[Dict[str, + str]], + source_model_run_id: str): + return { + "featureSchemaIdsMapping": + model_run_ontology_mapping if model_run_ontology_mapping else {}, + "modelRunId": + source_model_run_id, + "minConfidence": + 0, + "maxConfidence": + 1 + } diff --git a/labelbox/schema/task.py b/labelbox/schema/task.py index 762845128..717f12ff5 100644 --- a/labelbox/schema/task.py +++ b/labelbox/schema/task.py @@ -42,6 +42,7 @@ class Task(DbObject): result_url = Field.String("result_url", "result") errors_url = Field.String("errors_url", "errors") type = Field.String("type") + metadata = Field.Json("metadata") _user: Optional["User"] = None # Relationships @@ -92,7 +93,9 @@ def errors(self) -> Optional[Dict[str, Any]]: return self.failed_data_rows elif self.type == "export-data-rows": return self._fetch_remote_json(remote_json_field='errors_url') - elif self.type == "add-data-rows-to-batch" or self.type == "send-to-task-queue": + elif (self.type == "add-data-rows-to-batch" or + self.type == "send-to-task-queue" or + self.type == "send-to-annotate"): if self.status == "FAILED": # for these tasks, the error is embedded in the result itself return json.loads(self.result_url) diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 4194af379..8f7bae8f2 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -122,11 +122,35 @@ def text_data_row(rand_gen): } +@pytest.fixture() +def llm_prompt_creation_data_row(rand_gen): + return { + "row_data": { + "type": "application/llm.prompt-creation", + "version": 1 + }, + "global_key": rand_gen(str) + } + + +@pytest.fixture() +def llm_prompt_response_data_row(rand_gen): + return { + "row_data": { + "type": "application/llm.prompt-response-creation", + "version": 1 + }, + "global_key": rand_gen(str) + } + + @pytest.fixture def data_row_json_by_data_type(audio_data_row, conversation_data_row, dicom_data_row, geospatial_data_row, html_data_row, image_data_row, document_data_row, - text_data_row, video_data_row): + text_data_row, video_data_row, + llm_prompt_creation_data_row, + llm_prompt_response_data_row): return { 'audio': audio_data_row, 'conversation': conversation_data_row, @@ -137,6 +161,9 @@ def data_row_json_by_data_type(audio_data_row, conversation_data_row, 'document': document_data_row, 'text': text_data_row, 'video': video_data_row, + 'llmpromptcreation': llm_prompt_creation_data_row, + 'llmpromptresponsecreation': llm_prompt_response_data_row, + 'llmresponsecreation': text_data_row } @@ -146,16 +173,33 @@ def exports_v2_by_data_type(expected_export_v2_image, expected_export_v2_audio, expected_export_v2_video, expected_export_v2_conversation, expected_export_v2_dicom, - expected_export_v2_document): + expected_export_v2_document, + expected_export_v2_llm_prompt_creation, + expected_export_v2_llm_prompt_response_creation, + expected_export_v2_llm_response_creation): return { - 'image': expected_export_v2_image, - 'audio': expected_export_v2_audio, - 'html': expected_export_v2_html, - 'text': expected_export_v2_text, - 'video': expected_export_v2_video, - 'conversation': expected_export_v2_conversation, - 'dicom': expected_export_v2_dicom, - 'document': expected_export_v2_document, + 'image': + expected_export_v2_image, + 'audio': + expected_export_v2_audio, + 'html': + expected_export_v2_html, + 'text': + expected_export_v2_text, + 'video': + expected_export_v2_video, + 'conversation': + expected_export_v2_conversation, + 'dicom': + expected_export_v2_dicom, + 'document': + expected_export_v2_document, + 'llmpromptcreation': + expected_export_v2_llm_prompt_creation, + 'llmpromptresponsecreation': + expected_export_v2_llm_prompt_response_creation, + 'llmresponsecreation': + expected_export_v2_llm_response_creation } @@ -179,7 +223,10 @@ def annotations_by_data_type(polygon_inference, rectangle_inference, checklist_inference, text_inference ], 'text': [entity_inference, checklist_inference, text_inference], - 'video': [video_checklist_inference] + 'video': [video_checklist_inference], + 'llmpromptcreation': [checklist_inference, text_inference], + 'llmpromptresponsecreation': [checklist_inference, text_inference], + 'llmresponsecreation': [checklist_inference, text_inference] } @@ -207,7 +254,10 @@ def annotations_by_data_type_v2( checklist_inference, text_inference ], 'text': [entity_inference, checklist_inference, text_inference], - 'video': [video_checklist_inference] + 'video': [video_checklist_inference], + 'llmpromptcreation': [checklist_inference, text_inference], + 'llmpromptresponsecreation': [checklist_inference, text_inference], + 'llmresponsecreation': [checklist_inference, text_inference] } @@ -529,6 +579,21 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): project.delete() +@pytest.fixture +def project_with_ontology(client, configured_project, ontology, rand_gen): + project = client.create_project(name=rand_gen(str), + queue_mode=QueueMode.Batch, + media_type=MediaType.Image) + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + project.setup(editor, ontology) + + yield project, ontology + + project.delete() + + @pytest.fixture def configured_project_pdf(client, ontology, rand_gen, pdf_url): project = client.create_project(name=rand_gen(str), diff --git a/tests/integration/annotation_import/fixtures/export_v2.py b/tests/integration/annotation_import/fixtures/export_v2.py index 23d4e6552..9bd96e8e6 100644 --- a/tests/integration/annotation_import/fixtures/export_v2.py +++ b/tests/integration/annotation_import/fixtures/export_v2.py @@ -317,3 +317,66 @@ def expected_export_v2_document(): 'relationships': [] } return expected_annotations + + +@pytest.fixture() +def expected_export_v2_llm_prompt_creation(): + expected_annotations = { + 'objects': [], + 'classifications': [{ + 'name': 'checklist', + 'checklist_answers': [{ + 'name': 'option1', + 'classifications': [] + }] + }, { + 'name': 'text', + 'text_answer': { + 'content': 'free form text...' + } + }], + 'relationships': [] + } + return expected_annotations + + +@pytest.fixture() +def expected_export_v2_llm_prompt_response_creation(): + expected_annotations = { + 'objects': [], + 'classifications': [{ + 'name': 'checklist', + 'checklist_answers': [{ + 'name': 'option1', + 'classifications': [] + }] + }, { + 'name': 'text', + 'text_answer': { + 'content': 'free form text...' + } + }], + 'relationships': [] + } + return expected_annotations + + +@pytest.fixture() +def expected_export_v2_llm_response_creation(): + expected_annotations = { + 'objects': [], + 'classifications': [{ + 'name': 'checklist', + 'checklist_answers': [{ + 'name': 'option1', + 'classifications': [] + }] + }, { + 'name': 'text', + 'text_answer': { + 'content': 'free form text...' + } + }], + 'relationships': [] + } + return expected_annotations diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index 7e65dac96..937e21088 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -8,7 +8,7 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.media_type import MediaType import labelbox.types as lb_types -from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData +from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData, LlmPromptCreationData, LlmPromptResponseCreationData, LlmResponseCreationData from labelbox.data.serialization import NDJsonConverter from labelbox.schema.annotation_import import AnnotationImportState from utils import remove_keys_recursive, rename_cuid_key_recursive @@ -134,7 +134,8 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): # TODO: Add VideoData. Currently label import job finishes without errors but project.export_labels() returns empty list. @pytest.mark.parametrize('data_type_class', [ AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, - TextData + TextData, LlmPromptCreationData, LlmPromptResponseCreationData, + LlmResponseCreationData ]) def test_import_data_types( client, @@ -243,12 +244,19 @@ def set_project_media_type_from_data_type(project, data_type_class): media_type = to_pascal_case(data_type_string) if media_type == 'Conversation': media_type = 'Conversational' + elif media_type == 'Llmpromptcreation': + media_type = 'LLMPromptCreation' + elif media_type == 'Llmpromptresponsecreation': + media_type = 'LLMPromptResponseCreation' + elif media_type == 'Llmresponsecreation': + media_type = 'Text' project.update(media_type=MediaType[media_type]) @pytest.mark.parametrize('data_type_class', [ AudioData, HTMLData, ImageData, TextData, VideoData, ConversationData, - DocumentData, DicomData + DocumentData, DicomData, LlmPromptCreationData, + LlmPromptResponseCreationData, LlmResponseCreationData ]) def test_import_data_types_v2(client, configured_project, initial_dataset, data_row_json_by_data_type, diff --git a/tests/integration/annotation_import/test_send_to_annotate_mea.py b/tests/integration/annotation_import/test_send_to_annotate_mea.py new file mode 100644 index 000000000..5396270a2 --- /dev/null +++ b/tests/integration/annotation_import/test_send_to_annotate_mea.py @@ -0,0 +1,63 @@ +import pytest + +from labelbox import UniqueIds, OntologyBuilder +from labelbox.schema.conflict_resolution_strategy import ConflictResolutionStrategy + + +def test_send_to_annotate_from_model(client, configured_project, + model_run_predictions, + model_run_with_data_rows, + project_with_ontology): + model_run = model_run_with_data_rows + data_row_ids = [p['dataRow']['id'] for p in model_run_predictions] + assert len(data_row_ids) > 0 + + destination_project, _ = project_with_ontology + + queues = destination_project.task_queues() + initial_review_task = next( + q for q in queues if q.name == "Initial review task") + + # build an ontology mapping using the top level tools and classifications + source_ontology_builder = OntologyBuilder.from_project(configured_project) + feature_schema_ids = list( + tool.feature_schema_id for tool in source_ontology_builder.tools) + # create a dictionary of feature schema id to itself + ontology_mapping = dict(zip(feature_schema_ids, feature_schema_ids)) + + classification_feature_schema_ids = list( + classification.feature_schema_id + for classification in source_ontology_builder.classifications) + # create a dictionary of feature schema id to itself + classification_ontology_mapping = dict( + zip(classification_feature_schema_ids, + classification_feature_schema_ids)) + + # combine the two ontology mappings + ontology_mapping.update(classification_ontology_mapping) + + task = model_run.send_to_annotate_from_model( + destination_project_id=destination_project.uid, + batch_name="batch", + data_rows=UniqueIds(data_row_ids), + task_queue_id=initial_review_task.uid, + params={ + "predictions_ontology_mapping": + ontology_mapping, + "override_existing_annotations_rule": + ConflictResolutionStrategy.OverrideWithPredictions + }) + + task.wait_till_done() + + # Check that the data row was sent to the new project + destination_batches = list(destination_project.batches()) + assert len(destination_batches) == 1 + + destination_data_rows = list(destination_batches[0].export_data_rows()) + assert len(destination_data_rows) == len(data_row_ids) + assert all([dr.uid in data_row_ids for dr in destination_data_rows]) + + # Since data rows were added to a review queue, predictions should be imported into the project as labels + destination_project_labels = (list(destination_project.labels())) + assert len(destination_project_labels) == len(data_row_ids) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 909f1446d..8fc514b29 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -508,7 +508,7 @@ def _setup_ontology(project): project.setup(editor, ontology_builder.asdict()) # TODO: ontology may not be synchronous after setup. remove sleep when api is more consistent time.sleep(2) - return ontology_builder.from_project(project) + return OntologyBuilder.from_project(project) @pytest.fixture diff --git a/tests/integration/export_v2/conftest.py b/tests/integration/export/conftest.py similarity index 98% rename from tests/integration/export_v2/conftest.py rename to tests/integration/export/conftest.py index af8b4c66f..104ee41dc 100644 --- a/tests/integration/export_v2/conftest.py +++ b/tests/integration/export/conftest.py @@ -314,6 +314,8 @@ def model_run_with_data_rows(client, configured_project_with_ontology, model_run_predictions, model_run, wait_for_label_processing): configured_project_with_ontology.enable_model_assisted_labeling() + use_data_row_ids = [p['dataRow']['id'] for p in model_run_predictions] + model_run.upsert_data_rows(use_data_row_ids) upload_task = LabelImport.create_from_objects( client, configured_project_with_ontology.uid, @@ -326,7 +328,7 @@ def model_run_with_data_rows(client, configured_project_with_ontology, labels = wait_for_label_processing(configured_project_with_ontology) label_ids = [label.uid for label in labels] model_run.upsert_labels(label_ids) - yield model_run + yield model_run, labels model_run.delete() # TODO: Delete resources when that is possible .. diff --git a/tests/integration/export_v2/test_export_data_rows.py b/tests/integration/export/legacy/test_export_data_rows.py similarity index 66% rename from tests/integration/export_v2/test_export_data_rows.py rename to tests/integration/export/legacy/test_export_data_rows.py index f78689fd2..bf8e64d21 100644 --- a/tests/integration/export_v2/test_export_data_rows.py +++ b/tests/integration/export/legacy/test_export_data_rows.py @@ -1,37 +1,29 @@ import time from labelbox import DataRow -from labelbox.schema.media_type import MediaType def test_export_data_rows(client, data_row, wait_for_data_row_processing): # Ensure created data rows are indexed data_row = wait_for_data_row_processing(client, data_row) time.sleep(7) # temp fix for ES indexing delay - params = { - "include_performance_details": True, - "include_labels": True, - "media_type_override": MediaType.Image, - "project_details": True, - "data_row_details": True - } task = DataRow.export_v2(client=client, data_rows=[data_row]) task.wait_till_done() assert task.status == "COMPLETE" assert task.errors is None assert len(task.result) == 1 - assert task.result[0]['data_row']['id'] == data_row.uid + assert task.result[0]["data_row"]["id"] == data_row.uid task = DataRow.export_v2(client=client, data_rows=[data_row.uid]) task.wait_till_done() assert task.status == "COMPLETE" assert task.errors is None assert len(task.result) == 1 - assert task.result[0]['data_row']['id'] == data_row.uid + assert task.result[0]["data_row"]["id"] == data_row.uid task = DataRow.export_v2(client=client, global_keys=[data_row.global_key]) task.wait_till_done() assert task.status == "COMPLETE" assert task.errors is None assert len(task.result) == 1 - assert task.result[0]['data_row']['id'] == data_row.uid + assert task.result[0]["data_row"]["id"] == data_row.uid diff --git a/tests/integration/export_v2/test_export_dataset.py b/tests/integration/export/legacy/test_export_dataset.py similarity index 100% rename from tests/integration/export_v2/test_export_dataset.py rename to tests/integration/export/legacy/test_export_dataset.py diff --git a/tests/integration/export/legacy/test_export_model_run.py b/tests/integration/export/legacy/test_export_model_run.py new file mode 100644 index 000000000..7dfd44f0c --- /dev/null +++ b/tests/integration/export/legacy/test_export_model_run.py @@ -0,0 +1,45 @@ +import time + + +def _model_run_export_v2_results(model_run, task_name, params, num_retries=5): + """Export model run results and retry if no results are returned.""" + while (num_retries > 0): + task = model_run.export_v2(task_name, params=params) + assert task.name == task_name + task.wait_till_done() + assert task.status == "COMPLETE" + assert task.errors is None + task_results = task.result + if len(task_results) == 0: + num_retries -= 1 + time.sleep(5) + else: + return task_results + return [] + + +def test_model_run_export_v2(model_run_with_data_rows): + model_run, labels = model_run_with_data_rows + label_ids = [label.uid for label in labels] + expected_data_rows = list(model_run.model_run_data_rows()) + + task_name = "test_task" + params = {"media_attributes": True, "predictions": True} + task_results = _model_run_export_v2_results(model_run, task_name, params) + assert len(task_results) == len(expected_data_rows) + + for task_result in task_results: + # Check export param handling + assert 'media_attributes' in task_result and task_result[ + 'media_attributes'] is not None + exported_model_run = task_result['experiments'][ + model_run.model_id]['runs'][model_run.uid] + task_label_ids_set = set( + map(lambda label: label['id'], exported_model_run['labels'])) + task_prediction_ids_set = set( + map(lambda prediction: prediction['id'], + exported_model_run['predictions'])) + for label_id in task_label_ids_set: + assert label_id in label_ids + for prediction_id in task_prediction_ids_set: + assert prediction_id in label_ids diff --git a/tests/integration/export_v2/test_export_project.py b/tests/integration/export/legacy/test_export_project.py similarity index 100% rename from tests/integration/export_v2/test_export_project.py rename to tests/integration/export/legacy/test_export_project.py diff --git a/tests/integration/export_v2/test_export_slice.py b/tests/integration/export/legacy/test_export_slice.py similarity index 100% rename from tests/integration/export_v2/test_export_slice.py rename to tests/integration/export/legacy/test_export_slice.py diff --git a/tests/integration/export_v2/test_export_video.py b/tests/integration/export/legacy/test_export_video.py similarity index 100% rename from tests/integration/export_v2/test_export_video.py rename to tests/integration/export/legacy/test_export_video.py diff --git a/tests/integration/export_v2/test_legacy_export.py b/tests/integration/export/legacy/test_legacy_export.py similarity index 100% rename from tests/integration/export_v2/test_legacy_export.py rename to tests/integration/export/legacy/test_legacy_export.py diff --git a/tests/integration/export/streamable/test_export_data_rows_streamable.py b/tests/integration/export/streamable/test_export_data_rows_streamable.py new file mode 100644 index 000000000..8da08a833 --- /dev/null +++ b/tests/integration/export/streamable/test_export_data_rows_streamable.py @@ -0,0 +1,82 @@ +import json +import time + +import pytest + +from labelbox import DataRow, ExportTask, StreamType + + +class TestExportDataRow: + + def test_with_data_row_object(self, client, data_row, + wait_for_data_row_processing): + data_row = wait_for_data_row_processing(client, data_row) + time.sleep(7) # temp fix for ES indexing delay + export_task = DataRow.export( + client=client, + data_rows=[data_row], + task_name="TestExportDataRow:test_with_data_row_object", + ) + export_task.wait_till_done() + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) == 1 + assert (json.loads(list(export_task.get_stream())[0].json_str) + ["data_row"]["id"] == data_row.uid) + + def test_with_id(self, client, data_row, wait_for_data_row_processing): + data_row = wait_for_data_row_processing(client, data_row) + time.sleep(7) # temp fix for ES indexing delay + export_task = DataRow.export(client=client, + data_rows=[data_row.uid], + task_name="TestExportDataRow:test_with_id") + export_task.wait_till_done() + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) == 1 + assert (json.loads(list(export_task.get_stream())[0].json_str) + ["data_row"]["id"] == data_row.uid) + + def test_with_global_key(self, client, data_row, + wait_for_data_row_processing): + data_row = wait_for_data_row_processing(client, data_row) + time.sleep(7) # temp fix for ES indexing delay + export_task = DataRow.export( + client=client, + global_keys=[data_row.global_key], + task_name="TestExportDataRow:test_with_global_key", + ) + export_task.wait_till_done() + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) == 1 + assert (json.loads(list(export_task.get_stream())[0].json_str) + ["data_row"]["id"] == data_row.uid) + + def test_with_invalid_id(self, client): + export_task = DataRow.export( + client=client, + data_rows=["invalid_id"], + task_name="TestExportDataRow:test_with_invalid_id", + ) + export_task.wait_till_done() + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() is False + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) is None + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) is None diff --git a/tests/integration/export/streamable/test_export_dataset_streamable.py b/tests/integration/export/streamable/test_export_dataset_streamable.py new file mode 100644 index 000000000..e31f17c44 --- /dev/null +++ b/tests/integration/export/streamable/test_export_dataset_streamable.py @@ -0,0 +1,78 @@ +import json + +import pytest + +from labelbox import ExportTask, StreamType + + +class TestExportDataset: + + @pytest.mark.parametrize("data_rows", [3], indirect=True) + def test_export(self, dataset, data_rows): + expected_data_row_ids = [dr.uid for dr in data_rows] + + export_task = dataset.export(task_name="TestExportDataset:test_export") + export_task.wait_till_done() + + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == len(expected_data_row_ids) + data_row_ids = list( + map(lambda x: json.loads(x.json_str)["data_row"]["id"], + export_task.get_stream())) + assert data_row_ids.sort() == expected_data_row_ids.sort() + + @pytest.mark.parametrize("data_rows", [3], indirect=True) + def test_with_data_row_filter(self, dataset, data_rows): + datarow_filter_size = 3 + expected_data_row_ids = [dr.uid for dr in data_rows + ][:datarow_filter_size] + filters = {"data_row_ids": expected_data_row_ids} + + export_task = dataset.export( + filters=filters, + task_name="TestExportDataset:test_with_data_row_filter") + export_task.wait_till_done() + + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == datarow_filter_size + data_row_ids = list( + map(lambda x: json.loads(x.json_str)["data_row"]["id"], + export_task.get_stream())) + assert data_row_ids.sort() == expected_data_row_ids.sort() + + @pytest.mark.parametrize("data_rows", [3], indirect=True) + def test_with_global_key_filter(self, dataset, data_rows): + datarow_filter_size = 2 + expected_global_keys = [dr.global_key for dr in data_rows + ][:datarow_filter_size] + filters = {"global_keys": expected_global_keys} + + export_task = dataset.export( + filters=filters, + task_name="TestExportDataset:test_with_global_key_filter") + export_task.wait_till_done() + + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == datarow_filter_size + global_keys = list( + map(lambda x: json.loads(x.json_str)["data_row"]["global_key"], + export_task.get_stream())) + assert global_keys.sort() == expected_global_keys.sort() diff --git a/tests/integration/export/streamable/test_export_model_run_streamable.py b/tests/integration/export/streamable/test_export_model_run_streamable.py new file mode 100644 index 000000000..0d1244660 --- /dev/null +++ b/tests/integration/export/streamable/test_export_model_run_streamable.py @@ -0,0 +1,43 @@ +import json +import time + +from labelbox import ExportTask, StreamType + + +class TestExportModelRun: + + def test_export(self, model_run_with_data_rows): + model_run, labels = model_run_with_data_rows + label_ids = [label.uid for label in labels] + expected_data_rows = list(model_run.model_run_data_rows()) + + task_name = "TestExportModelRun:test_export" + params = {"media_attributes": True, "predictions": True} + export_task = model_run.export(task_name, params=params) + assert export_task.name == task_name + export_task.wait_till_done() + + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == len(expected_data_rows) + + for data in export_task.get_stream(): + obj = json.loads(data.json_str) + assert "media_attributes" in obj and obj[ + "media_attributes"] is not None + exported_model_run = obj["experiments"][model_run.model_id]["runs"][ + model_run.uid] + task_label_ids_set = set( + map(lambda label: label["id"], exported_model_run["labels"])) + task_prediction_ids_set = set( + map(lambda prediction: prediction["id"], + exported_model_run["predictions"])) + for label_id in task_label_ids_set: + assert label_id in label_ids + for prediction_id in task_prediction_ids_set: + assert prediction_id in label_ids diff --git a/tests/integration/export/streamable/test_export_project_streamable.py b/tests/integration/export/streamable/test_export_project_streamable.py new file mode 100644 index 000000000..e32203a70 --- /dev/null +++ b/tests/integration/export/streamable/test_export_project_streamable.py @@ -0,0 +1,342 @@ +from datetime import datetime, timezone, timedelta +import json + +import pytest +import uuid +from typing import Tuple +from labelbox.schema.export_task import ExportTask, StreamType + +from labelbox.schema.media_type import MediaType +from labelbox import Project, Dataset +from labelbox.schema.data_row import DataRow +from labelbox.schema.label import Label + +IMAGE_URL = ( + "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" +) + + +class TestExportProject: + + @pytest.fixture + def project_export(self): + + def _project_export(project, task_name, filters=None, params=None): + export_task = project.export( + task_name=task_name, + filters=filters, + params=params, + ) + export_task.wait_till_done() + + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + return export_task + + return _project_export + + def test_export( + self, + client, + configured_project_with_label, + wait_for_data_row_processing, + project_export, + ): + project, dataset, data_row, label = configured_project_with_label + data_row = wait_for_data_row_processing(client, data_row) + label_id = label.uid + task_name = "TestExportProject:test_export" + params = { + "include_performance_details": True, + "include_labels": True, + "media_type_override": MediaType.Image, + "project_details": True, + "data_row_details": True, + } + export_task = project_export(project, task_name, params=params) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) > 0 + + for data in export_task.get_stream(): + obj = json.loads(data.json_str) + task_media_attributes = obj["media_attributes"] + task_project = obj["projects"][project.uid] + task_project_label_ids_set = set( + map(lambda prediction: prediction["id"], + task_project["labels"])) + task_project_details = task_project["project_details"] + task_data_row = obj["data_row"] + task_data_row_details = task_data_row["details"] + + assert label_id in task_project_label_ids_set + # data row + assert task_data_row["id"] == data_row.uid + assert task_data_row["external_id"] == data_row.external_id + assert task_data_row["row_data"] == data_row.row_data + + # data row details + assert task_data_row_details["dataset_id"] == dataset.uid + assert task_data_row_details["dataset_name"] == dataset.name + + actual_time = datetime.fromisoformat( + task_data_row_details["created_at"]) + expected_time = datetime.fromisoformat( + dataset.created_at.strftime("%Y-%m-%dT%H:%M:%S.%f")) + actual_time = actual_time.replace(tzinfo=timezone.utc) + expected_time = expected_time.replace(tzinfo=timezone.utc) + tolerance = timedelta(seconds=2) + assert abs(actual_time - expected_time) <= tolerance + + assert task_data_row_details["last_activity_at"] is not None + assert task_data_row_details["created_by"] is not None + + # media attributes + assert task_media_attributes[ + "mime_type"] == data_row.media_attributes["mimeType"] + + # project name and details + assert task_project["name"] == project.name + batch = next(project.batches()) + assert task_project_details["batch_id"] == batch.uid + assert task_project_details["batch_name"] == batch.name + assert task_project_details["priority"] is not None + assert task_project_details[ + "consensus_expected_label_count"] is not None + assert task_project_details["workflow_history"] is not None + + # label details + assert task_project["labels"][0]["id"] == label_id + + def test_with_date_filters( + self, + client, + configured_project_with_label, + wait_for_data_row_processing, + project_export, + ): + project, _, data_row, label = configured_project_with_label + data_row = wait_for_data_row_processing(client, data_row) + label_id = label.uid + task_name = "TestExportProject:test_with_date_filters" + filters = { + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "task_queue_status": "InReview", + } + include_performance_details = True + params = { + "performance_details": include_performance_details, + "include_labels": True, + "project_details": True, + "media_type_override": MediaType.Image, + } + task_queues = project.task_queues() + review_queue = next( + tq for tq in task_queues if tq.queue_type == "MANUAL_REVIEW_QUEUE") + project.move_data_rows_to_task_queue([data_row.uid], review_queue.uid) + export_task = project_export(project, + task_name, + filters=filters, + params=params) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) > 0 + + for data in export_task.get_stream(): + obj = json.loads(data.json_str) + task_project = obj["projects"][project.uid] + task_project_label_ids_set = set( + map(lambda prediction: prediction["id"], + task_project["labels"])) + assert label_id in task_project_label_ids_set + assert task_project["project_details"][ + "workflow_status"] == "IN_REVIEW" + + def test_with_iso_date_filters( + self, + client, + configured_project_with_label, + wait_for_data_row_processing, + project_export, + ): + project, _, data_row, label = configured_project_with_label + data_row = wait_for_data_row_processing(client, data_row) + label_id = label.uid + task_name = "TestExportProject:test_with_iso_date_filters" + filters = { + "last_activity_at": [ + "2000-01-01T00:00:00+0230", "2050-01-01T00:00:00+0230" + ], + "label_created_at": [ + "2000-01-01T00:00:00+0230", "2050-01-01T00:00:00+0230" + ], + } + export_task = project_export(project, task_name, filters=filters) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) > 0 + assert (label_id == json.loads( + list(export_task.get_stream())[0].json_str)["projects"][project.uid] + ["labels"][0]["id"]) + + def test_with_iso_date_filters_no_start_date( + self, + client, + configured_project_with_label, + wait_for_data_row_processing, + project_export, + ): + project, _, data_row, label = configured_project_with_label + data_row = wait_for_data_row_processing(client, data_row) + label_id = label.uid + task_name = "TestExportProject:test_with_iso_date_filters_no_start_date" + filters = {"last_activity_at": [None, "2050-01-01T00:00:00+0230"]} + export_task = project_export(project, task_name, filters=filters) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines(stream_type=StreamType.RESULT) > 0 + assert (label_id == json.loads( + list(export_task.get_stream())[0].json_str)["projects"][project.uid] + ["labels"][0]["id"]) + + def test_with_iso_date_filters_and_future_start_date( + self, + client, + configured_project_with_label, + wait_for_data_row_processing, + project_export, + ): + project, _, data_row, _label = configured_project_with_label + data_row = wait_for_data_row_processing(client, data_row) + task_name = "TestExportProject:test_with_iso_date_filters_and_future_start_date" + filters = {"label_created_at": ["2050-01-01T00:00:00+0230", None]} + export_task = project_export(project, task_name, filters=filters) + assert export_task.has_result() is False + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) is None + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) is None + + @pytest.mark.parametrize("data_rows", [3], indirect=True) + def test_with_data_row_filter( + self, configured_batch_project_with_multiple_datarows, + project_export): + project, _, data_rows = configured_batch_project_with_multiple_datarows + datarow_filter_size = 2 + expected_data_row_ids = [dr.uid for dr in data_rows + ][:datarow_filter_size] + task_name = "TestExportProject:test_with_data_row_filter" + filters = { + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "data_row_ids": expected_data_row_ids, + } + params = { + "data_row_details": True, + "media_type_override": MediaType.Image + } + export_task = project_export(project, + task_name, + filters=filters, + params=params) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + # only 2 datarows should be exported + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == datarow_filter_size + data_row_ids = list( + map(lambda x: json.loads(x.json_str)["data_row"]["id"], + export_task.get_stream())) + assert data_row_ids.sort() == expected_data_row_ids.sort() + + @pytest.mark.parametrize("data_rows", [3], indirect=True) + def test_with_global_key_filter( + self, configured_batch_project_with_multiple_datarows, + project_export): + project, _, data_rows = configured_batch_project_with_multiple_datarows + datarow_filter_size = 2 + expected_global_keys = [dr.global_key for dr in data_rows + ][:datarow_filter_size] + task_name = "TestExportProject:test_with_global_key_filter" + filters = { + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "global_keys": expected_global_keys, + } + params = { + "data_row_details": True, + "media_type_override": MediaType.Image + } + export_task = project_export(project, + task_name, + filters=filters, + params=params) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + # only 2 datarows should be exported + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == datarow_filter_size + global_keys = list( + map(lambda x: json.loads(x.json_str)["data_row"]["global_key"], + export_task.get_stream())) + assert global_keys.sort() == expected_global_keys.sort() + + def test_batch( + self, + configured_batch_project_with_label: Tuple[Project, Dataset, DataRow, + Label], + dataset: Dataset, + image_url: str, + project_export, + ): + project, dataset, *_ = configured_batch_project_with_label + batch = list(project.batches())[0] + filters = { + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "batch_ids": [batch.uid], + } + params = { + "include_performance_details": True, + "include_labels": True, + "media_type_override": MediaType.Image, + } + task_name = "TestExportProject:test_batch" + task = dataset.create_data_rows([ + { + "row_data": image_url, + "external_id": "my-image" + }, + ] * 2) + task.wait_till_done() + data_rows = [dr.uid for dr in list(dataset.export_data_rows())] + batch_one = f"batch one {uuid.uuid4()}" + + # This test creates two batches, only one batch should be exporter + # Creatin second batch that will not be used in the export due to the filter: batch_id + project.create_batch(batch_one, data_rows) + + export_task = project_export(project, + task_name, + filters=filters, + params=params) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + assert export_task.get_total_lines( + stream_type=StreamType.RESULT) == batch.size diff --git a/tests/integration/export/streamable/test_export_video_streamable.py b/tests/integration/export/streamable/test_export_video_streamable.py new file mode 100644 index 000000000..de32509bd --- /dev/null +++ b/tests/integration/export/streamable/test_export_video_streamable.py @@ -0,0 +1,251 @@ +import json + +import pytest + +import labelbox as lb +import labelbox.types as lb_types +from labelbox.data.annotation_types.data.video import VideoData +from labelbox.schema.annotation_import import AnnotationImportState +from labelbox.schema.export_task import ExportTask, StreamType + + +class TestExportVideo: + + @pytest.fixture + def user_id(self, client): + return client.get_user().uid + + @pytest.fixture + def org_id(self, client): + return client.get_organization().uid + + def test_export( + self, + client, + configured_project_without_data_rows, + video_data, + video_data_row, + bbox_video_annotation_objects, + rand_gen, + ): + project = configured_project_without_data_rows + project_id = project.uid + labels = [] + + _, data_row_uids = video_data + project.create_batch( + rand_gen(str), + data_row_uids, # sample of data row objects + 5, # priority between 1(Highest) - 5(lowest) + ) + + for data_row_uid in data_row_uids: + labels = [ + lb_types.Label(data=VideoData(uid=data_row_uid), + annotations=bbox_video_annotation_objects) + ] + + label_import = lb.LabelImport.create_from_objects( + client, project_id, f"test-import-{project_id}", labels) + label_import.wait_until_done() + + assert label_import.state == AnnotationImportState.FINISHED + assert len(label_import.errors) == 0 + + export_task = project.export( + params={ + "performance_details": False, + "label_details": True, + "interpolated_frames": True, + }, + task_name="TestExportVideo:test_export", + ) + export_task.wait_till_done() + assert export_task.status == "COMPLETE" + assert isinstance(export_task, ExportTask) + assert export_task.has_result() + assert export_task.has_errors() is False + assert export_task.get_total_file_size( + stream_type=StreamType.RESULT) > 0 + + export_data = json.loads(list(export_task.get_stream())[0].json_str) + data_row_export = export_data["data_row"] + assert data_row_export["global_key"] == video_data_row["global_key"] + assert data_row_export["row_data"] == video_data_row["row_data"] + assert export_data["media_attributes"]["mime_type"] == "video/mp4" + assert export_data["media_attributes"][ + "frame_rate"] == 10 # as per the video_data fixture + assert (export_data["media_attributes"]["frame_count"] == 100 + ) # as per the video_data fixture + expected_export_label = { + "label_kind": "Video", + "version": "1.0.0", + "id": "clgjnpysl000xi3zxtnp29fug", + "label_details": { + "created_at": "2023-04-16T17:04:23+00:00", + "updated_at": "2023-04-16T17:04:23+00:00", + "created_by": "vbrodsky@labelbox.com", + "content_last_updated_at": "2023-04-16T17:04:23+00:00", + "reviews": [], + }, + "annotations": { + "frames": { + "13": { + "objects": { + "clgjnpyse000ui3zx6fr1d880": { + "feature_id": "clgjnpyse000ui3zx6fr1d880", + "name": "bbox", + "annotation_kind": "VideoBoundingBox", + "classifications": [{ + "feature_id": "clgjnpyse000vi3zxtgtfh01y", + "name": "nested", + "radio_answer": { + "feature_id": + "clgjnpyse000wi3zxnxgv53ps", + "name": + "radio_option_1", + "classifications": [], + }, + }], + "bounding_box": { + "top": 98.0, + "left": 146.0, + "height": 243.0, + "width": 236.0, + }, + } + }, + "classifications": [], + }, + "18": { + "objects": { + "clgjnpyse000ui3zx6fr1d880": { + "feature_id": "clgjnpyse000ui3zx6fr1d880", + "name": "bbox", + "annotation_kind": "VideoBoundingBox", + "classifications": [{ + "feature_id": "clgjnpyse000vi3zxtgtfh01y", + "name": "nested", + "radio_answer": { + "feature_id": + "clgjnpyse000wi3zxnxgv53ps", + "name": + "radio_option_1", + "classifications": [], + }, + }], + "bounding_box": { + "top": 98.0, + "left": 146.0, + "height": 243.0, + "width": 236.0, + }, + } + }, + "classifications": [], + }, + "19": { + "objects": { + "clgjnpyse000ui3zx6fr1d880": { + "feature_id": "clgjnpyse000ui3zx6fr1d880", + "name": "bbox", + "annotation_kind": "VideoBoundingBox", + "classifications": [], + "bounding_box": { + "top": 98.0, + "left": 146.0, + "height": 243.0, + "width": 236.0, + }, + } + }, + "classifications": [], + }, + }, + "segments": { + "clgjnpyse000ui3zx6fr1d880": [[13, 13], [18, 19]] + }, + "key_frame_feature_map": { + "clgjnpyse000ui3zx6fr1d880": { + "13": True, + "18": False, + "19": True + } + }, + "classifications": [], + }, + } + + project_export_labels = export_data["projects"][project_id]["labels"] + assert len(project_export_labels) == len( + labels + ) # note we create 1 label per data row, 1 data row so 1 label + export_label = project_export_labels[0] + assert (export_label["label_kind"]) == "Video" + + assert (export_label["label_details"].keys() + ) == expected_export_label["label_details"].keys() + + expected_frames_ids = [ + vannotation.frame for vannotation in bbox_video_annotation_objects + ] + export_annotations = export_label["annotations"] + export_frames = export_annotations["frames"] + export_frames_ids = [int(frame_id) for frame_id in export_frames.keys()] + all_frames_exported = [] + for (value) in ( + expected_frames_ids + ): # note need to understand why we are exporting more frames than we created + if value not in export_frames_ids: + all_frames_exported.append(value) + assert len(all_frames_exported) == 0 + + # BEGINNING OF THE VIDEO INTERPOLATION ASSERTIONS + first_frame_id = bbox_video_annotation_objects[0].frame + last_frame_id = bbox_video_annotation_objects[-1].frame + + # Generate list of frames with frames in between, e.g. 13, 14, 15, 16, 17, 18, 19 + expected_frame_ids = list(range(first_frame_id, last_frame_id + 1)) + + assert export_frames_ids == expected_frame_ids + + exported_objects_dict = export_frames[str(first_frame_id)]["objects"] + + # Get the label ID + first_exported_label_id = list(exported_objects_dict.keys())[0] + + # Since the bounding box moves to the right, the interpolated frame content should start + # a little bit more far to the right + assert (export_frames[str(first_frame_id + 1)]["objects"] + [first_exported_label_id]["bounding_box"]["left"] + > export_frames[str(first_frame_id)]["objects"] + [first_exported_label_id]["bounding_box"]["left"]) + # But it shouldn't be further than the last frame + assert (export_frames[str(first_frame_id + 1)]["objects"] + [first_exported_label_id]["bounding_box"]["left"] + < export_frames[str(last_frame_id)]["objects"] + [first_exported_label_id]["bounding_box"]["left"]) + # END OF THE VIDEO INTERPOLATION ASSERTIONS + + frame_with_nested_classifications = export_frames["13"] + annotation = None + for _, a in frame_with_nested_classifications["objects"].items(): + if a["name"] == "bbox": + annotation = a + break + assert annotation is not None + assert annotation["annotation_kind"] == "VideoBoundingBox" + assert annotation["classifications"] + assert annotation["bounding_box"] == { + "top": 98.0, + "left": 146.0, + "height": 243.0, + "width": 236.0, + } + classifications = annotation["classifications"] + classification = classifications[0]["radio_answer"] + assert classification["name"] == "radio_option_1" + subclassifications = classification["classifications"] + # NOTE predictions services does not support nested classifications at the moment, see + # https://labelbox.atlassian.net/browse/AL-5588 + assert len(subclassifications) == 0 diff --git a/tests/integration/export_v2/test_export_model_run.py b/tests/integration/export_v2/test_export_model_run.py deleted file mode 100644 index caaef2e7f..000000000 --- a/tests/integration/export_v2/test_export_model_run.py +++ /dev/null @@ -1,50 +0,0 @@ -import time - - -def _model_run_export_v2_results(model_run, task_name, params, num_retries=5): - """Export model run results and retry if no results are returned.""" - while (num_retries > 0): - task = model_run.export_v2(task_name, params=params) - assert task.name == task_name - task.wait_till_done() - assert task.status == "COMPLETE" - assert task.errors is None - task_results = task.result - if len(task_results) == 0: - num_retries -= 1 - time.sleep(5) - else: - return task_results - return [] - - -def test_model_run_export_v2(model_run_with_data_rows, configured_project): - task_name = "test_task" - media_attributes = True - params = {"media_attributes": media_attributes, "predictions": True} - task_results = _model_run_export_v2_results(model_run_with_data_rows, - task_name, params) - label_ids = [label.uid for label in configured_project.labels()] - label_ids_set = set(label_ids) - - assert len(task_results) == len(label_ids) - - for task_result in task_results: - # Check export param handling - if media_attributes: - assert 'media_attributes' in task_result and task_result[ - 'media_attributes'] is not None - else: - assert 'media_attributes' not in task_result or task_result[ - 'media_attributes'] is None - model_run = task_result['experiments'][ - model_run_with_data_rows.model_id]['runs'][ - model_run_with_data_rows.uid] - task_label_ids_set = set( - map(lambda label: label['id'], model_run['labels'])) - task_prediction_ids_set = set( - map(lambda prediction: prediction['id'], model_run['predictions'])) - for label_id in task_label_ids_set: - assert label_id in label_ids_set - for prediction_id in task_prediction_ids_set: - assert prediction_id in label_ids_set diff --git a/tests/integration/support/integration_client.py b/tests/integration/support/integration_client.py index 64fc3b6e1..3ca82d1f5 100644 --- a/tests/integration/support/integration_client.py +++ b/tests/integration/support/integration_client.py @@ -98,7 +98,8 @@ def __init__(self, environ: str) -> None: def execute(self, query=None, params=None, check_naming=True, **kwargs): if check_naming and query is not None: - assert re.match(r"(?:query|mutation) \w+PyApi", query) is not None + assert re.match(r"\s*(?:query|mutation) \w+PyApi", + query) is not None self.queries.append((query, params)) return super().execute(query, params, **kwargs) diff --git a/tests/integration/test_client_errors.py b/tests/integration/test_client_errors.py index 32141af5f..411b9e3b0 100644 --- a/tests/integration/test_client_errors.py +++ b/tests/integration/test_client_errors.py @@ -77,17 +77,10 @@ def test_network_error(client): client.create_project(name="Project name") -@pytest.fixture -def project_for_test_invalid_attribute_error(client): - project = client.create_project(name="Project name") - yield project - project.delete() - - -def test_invalid_attribute_error(client, rand_gen, - project_for_test_invalid_attribute_error): - project = project_for_test_invalid_attribute_error - +def test_invalid_attribute_error( + client, + rand_gen, +): # Creation with pytest.raises(labelbox.exceptions.InvalidAttributeError) as excinfo: client.create_project(name="Name", invalid_field="Whatever") @@ -101,18 +94,6 @@ def test_invalid_attribute_error(client, rand_gen, assert excinfo.value.db_object_type == Project assert excinfo.value.field == "invalid_field" - # Relationship expansion filtering - with pytest.raises(labelbox.exceptions.InvalidAttributeError) as excinfo: - project.datasets(where=User.email == "email") - assert excinfo.value.db_object_type == Dataset - assert excinfo.value.field == {User.email} - - # Relationship order-by - with pytest.raises(labelbox.exceptions.InvalidAttributeError) as excinfo: - project.datasets(order_by=User.email.asc) - assert excinfo.value.db_object_type == Dataset - assert excinfo.value.field == {User.email} - # Top-level-fetch with pytest.raises(labelbox.exceptions.InvalidAttributeError) as excinfo: client.get_projects(where=User.email == "email") @@ -129,9 +110,9 @@ def get(arg): except labelbox.exceptions.ApiLimitError as e: return e - #Rate limited at 1500 + buffer + # Rate limited at 1500 + buffer n = 1600 - #max of 30 concurrency before the service becomes unavailable + # max of 30 concurrency before the service becomes unavailable with Pool(30) as pool: start = time.time() results = list(pool.imap(get, range(n)), total=n) diff --git a/tests/integration/test_foundry.py b/tests/integration/test_foundry.py new file mode 100644 index 000000000..fbefae24c --- /dev/null +++ b/tests/integration/test_foundry.py @@ -0,0 +1,120 @@ +import labelbox as lb +import pytest +from labelbox.schema.foundry.app import App + +from labelbox.schema.foundry.foundry_client import FoundryClient + +# Yolo object detection model id +TEST_MODEL_ID = "e8b352ce-8f3a-4cd6-93a5-8af904307346" + + +@pytest.fixture() +def random_str(rand_gen): + return rand_gen(str) + + +@pytest.fixture(scope="module") +def foundry_client(client): + return FoundryClient(client) + + +@pytest.fixture() +def ontology(client, random_str): + object_features = [ + lb.Tool(tool=lb.Tool.Type.BBOX, + name="text", + color="#ff0000", + classifications=[ + lb.Classification(class_type=lb.Classification.Type.TEXT, + name="value") + ]) + ] + + ontology_builder = lb.OntologyBuilder(tools=object_features,) + + ontology = client.create_ontology( + f"Test ontology for tesseract model {random_str}", + ontology_builder.asdict(), + media_type=lb.MediaType.Image) + return ontology + + +@pytest.fixture() +def unsaved_app(random_str, ontology): + return App(model_id=TEST_MODEL_ID, + name=f"Test App {random_str}", + description="Test App Description", + inference_params={"confidence": 0.2}, + class_to_schema_id={}, + ontology_id=ontology.uid) + + +@pytest.fixture() +def app(foundry_client, unsaved_app): + app = foundry_client._create_app(unsaved_app) + yield app + foundry_client._delete_app(app.id) + + +def test_create_app(foundry_client, unsaved_app): + app = foundry_client._create_app(unsaved_app) + retrieved_dict = app.dict(exclude={'id', 'created_by'}) + expected_dict = app.dict(exclude={'id', 'created_by'}) + assert retrieved_dict == expected_dict + + +def test_get_app(foundry_client, app): + retrieved_app = foundry_client._get_app(app.id) + retrieved_dict = retrieved_app.dict(exclude={'created_by'}) + expected_dict = app.dict(exclude={'created_by'}) + assert retrieved_dict == expected_dict + + +def test_get_app_with_invalid_id(foundry_client): + with pytest.raises(lb.exceptions.ResourceNotFoundError): + foundry_client._get_app("invalid-id") + + +def test_run_foundry_app_with_data_row_id(foundry_client, data_row, app, + random_str): + data_rows = lb.DataRowIds([data_row.uid]) + task = foundry_client.run_app( + model_run_name=f"test-app-with-datarow-id-{random_str}", + data_rows=data_rows, + app_id=app.id) + task.wait_till_done() + assert task.status == 'COMPLETE' + + +def test_run_foundry_app_with_global_key(foundry_client, data_row, app, + random_str): + data_rows = lb.GlobalKeys([data_row.global_key]) + task = foundry_client.run_app( + model_run_name=f"test-app-with-global-key-{random_str}", + data_rows=data_rows, + app_id=app.id) + task.wait_till_done() + assert task.status == 'COMPLETE' + + +def test_run_foundry_app_returns_model_run_id(foundry_client, data_row, app): + data_rows = lb.GlobalKeys([data_row.global_key]) + task = foundry_client.run_app( + model_run_name=f"test-app-with-global-key-{random_str}", + data_rows=data_rows, + app_id=app.id) + model_run_id = task.metadata['modelRunId'] + model_run = foundry_client.client.get_model_run(model_run_id) + assert model_run.uid == model_run_id + + +def test_run_foundry_app_with_non_existent_data_rows(foundry_client, data_row, + app, random_str): + data_rows = lb.GlobalKeys([data_row.global_key, "non-existent-global-key"]) + task = foundry_client.run_app( + model_run_name=f"test-app-with-wrong-key-{random_str}", + data_rows=data_rows, + app_id=app.id) + task.wait_till_done() + # The incorrect data row is filtered out and the task still completes with the correct data row + assert task.status == 'COMPLETE' diff --git a/tests/integration/test_legacy_project.py b/tests/integration/test_legacy_project.py index af85e1988..6573beffd 100644 --- a/tests/integration/test_legacy_project.py +++ b/tests/integration/test_legacy_project.py @@ -15,11 +15,6 @@ def test_project_dataset(client, rand_gen): ) -def test_legacy_project_dataset_relationships(project, dataset): - assert [ds for ds in project.datasets()] == [] - assert [p for p in dataset.projects()] == [] - - def test_project_auto_audit_parameters(client, rand_gen): with pytest.raises( ValueError, diff --git a/tests/integration/test_send_to_annotate.py b/tests/integration/test_send_to_annotate.py new file mode 100644 index 000000000..af789568d --- /dev/null +++ b/tests/integration/test_send_to_annotate.py @@ -0,0 +1,56 @@ +from labelbox import UniqueIds, OntologyBuilder, LabelingFrontend +from labelbox.schema.conflict_resolution_strategy import ConflictResolutionStrategy + + +def test_send_to_annotate_include_annotations( + client, configured_batch_project_with_label, project_pack): + [source_project, _, data_row, _] = configured_batch_project_with_label + destination_project = project_pack[0] + + source_ontology_builder = OntologyBuilder.from_project(source_project) + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + destination_project.setup(editor, source_ontology_builder.asdict()) + + # build an ontology mapping using the top level tools + feature_schema_ids = list( + tool.feature_schema_id for tool in source_ontology_builder.tools) + # create a dictionary of feature schema id to itself + ontology_mapping = dict(zip(feature_schema_ids, feature_schema_ids)) + + try: + queues = destination_project.task_queues() + initial_review_task = next( + q for q in queues if q.name == "Initial review task") + + # Send the data row to the new project + task = client.send_to_annotate_from_catalog( + destination_project_id=destination_project.uid, + task_queue_id=initial_review_task.uid, + batch_name="test-batch", + data_rows=UniqueIds([data_row.uid]), + params={ + "source_project_id": + source_project.uid, + "annotations_ontology_mapping": + ontology_mapping, + "override_existing_annotations_rule": + ConflictResolutionStrategy.OverrideWithAnnotations + }) + + task.wait_till_done() + + # Check that the data row was sent to the new project + destination_batches = list(destination_project.batches()) + assert len(destination_batches) == 1 + + destination_data_rows = list(destination_batches[0].export_data_rows()) + assert len(destination_data_rows) == 1 + assert destination_data_rows[0].uid == data_row.uid + + # Verify annotations were copied into the destination project + destination_project_labels = (list(destination_project.labels())) + assert len(destination_project_labels) == 1 + finally: + destination_project.delete() diff --git a/tests/integration/test_sorting.py b/tests/integration/test_sorting.py index 542289455..cdecd6f2d 100644 --- a/tests/integration/test_sorting.py +++ b/tests/integration/test_sorting.py @@ -1,44 +1,6 @@ import pytest from labelbox import Project -from labelbox.schema.media_type import MediaType -from labelbox.schema.queue_mode import QueueMode - - -@pytest.mark.xfail(reason="Relationship sorting not implemented correctly " - "on the server-side") -def test_relationship_sorting(client): - a = client.create_project(name="a", - description="b", - queue_mode=QueueMode.Batch, - media_type=MediaType.Image) - b = client.create_project(name="b", - description="c", - queue_mode=QueueMode.Batch, - media_type=MediaType.Image) - c = client.create_project(name="c", - description="a", - queue_mode=QueueMode.Batch, - media_type=MediaType.Image) - - dataset = client.create_dataset(name="Dataset") - a.datasets.connect(dataset) - b.datasets.connect(dataset) - c.datasets.connect(dataset) - - def get(order_by): - where = Project.created_at >= a.created_at - return list(dataset.projects(where=where, order_by=order_by)) - - assert get(Project.name.asc) == [a, b, c] - assert get(Project.name.desc) == [c, b, a] - assert get(Project.description.asc) == [c, a, b] - assert get(Project.description.desc) == [b, a, c] - - dataset.delete() - a.delete() - b.delete() - c.delete() @pytest.mark.xfail(reason="Sorting not supported on top-level fetches")