diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb
index 04e107ecb..5fc7c0472 100644
--- a/examples/annotation_import/conversational.ipynb
+++ b/examples/annotation_import/conversational.ipynb
@@ -140,11 +140,11 @@
" )\n",
")\n",
"\n",
- "ner_annotation_ndjson = { \n",
+ "ner_annotation_ndjson = {\n",
" \"name\": \"ner\",\n",
- " \"location\": { \n",
- " \"start\": 0, \n",
- " \"end\": 8 \n",
+ " \"location\": {\n",
+ " \"start\": 0,\n",
+ " \"end\": 8\n",
" },\n",
" \"messageId\": \"4\"\n",
" }"
@@ -178,7 +178,7 @@
{
"metadata": {},
"source": [
- "##### Checklist Classification ####### \n",
+ "##### Checklist Classification #######\n",
"\n",
"checklist_annotation= lb_types.ClassificationAnnotation(\n",
" name=\"checklist_convo\", # must match your ontology feature\"s name\n",
@@ -186,7 +186,7 @@
" answer = [\n",
" lb_types.ClassificationAnswer(\n",
" name = \"first_checklist_answer\"\n",
- " ), \n",
+ " ),\n",
" lb_types.ClassificationAnswer(\n",
" name = \"second_checklist_answer\"\n",
" )\n",
@@ -215,7 +215,7 @@
"######## Radio Classification ######\n",
"\n",
"radio_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"radio_convo\", \n",
+ " name=\"radio_convo\",\n",
" value=lb_types.Radio(answer = lb_types.ClassificationAnswer(name = \"first_radio_answer\")),\n",
" message_id=\"0\"\n",
")\n",
@@ -264,12 +264,12 @@
" \"name\": \"first_checklist_answer\",\n",
" \"classifications\" : [\n",
" {\n",
- " \"name\": \"sub_checklist_question\", \n",
+ " \"name\": \"sub_checklist_question\",\n",
" \"answer\": {\n",
" \"name\": \"first_sub_checklist_answer\"\n",
" }\n",
- " } \n",
- " ] \n",
+ " }\n",
+ " ]\n",
" }]\n",
"}\n",
"# Global\n",
@@ -360,27 +360,27 @@
"metadata": {},
"source": [
"ontology_builder = lb.OntologyBuilder(\n",
- " tools=[ \n",
+ " tools=[\n",
" lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n",
- " ], \n",
- " classifications=[ \n",
- " lb.Classification( \n",
+ " ],\n",
+ " classifications=[\n",
+ " lb.Classification(\n",
" class_type=lb.Classification.Type.TEXT,\n",
- " scope=lb.Classification.Scope.INDEX, \n",
- " name=\"text_convo\"), \n",
- " lb.Classification( \n",
- " class_type=lb.Classification.Type.CHECKLIST, \n",
- " scope=lb.Classification.Scope.INDEX, \n",
- " name=\"checklist_convo\", \n",
+ " scope=lb.Classification.Scope.INDEX,\n",
+ " name=\"text_convo\"),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " scope=lb.Classification.Scope.INDEX,\n",
+ " name=\"checklist_convo\",\n",
" options=[\n",
" lb.Option(value=\"first_checklist_answer\"),\n",
- " lb.Option(value=\"second_checklist_answer\") \n",
+ " lb.Option(value=\"second_checklist_answer\")\n",
" ]\n",
- " ), \n",
- " lb.Classification( \n",
- " class_type=lb.Classification.Type.RADIO, \n",
- " name=\"radio_convo\", \n",
- " scope=lb.Classification.Scope.INDEX, \n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " name=\"radio_convo\",\n",
+ " scope=lb.Classification.Scope.INDEX,\n",
" options=[\n",
" lb.Option(value=\"first_radio_answer\"),\n",
" lb.Option(value=\"second_radio_answer\")\n",
@@ -395,7 +395,7 @@
" options=[\n",
" lb.Classification(\n",
" class_type=lb.Classification.Type.CHECKLIST,\n",
- " name=\"sub_checklist_question\", \n",
+ " name=\"sub_checklist_question\",\n",
" options=[lb.Option(\"first_sub_checklist_answer\")]\n",
" )\n",
" ])\n",
@@ -438,10 +438,10 @@
"metadata": {},
"source": [
"# Create Labelbox project\n",
- "project = client.create_project(name=\"Conversational Text Annotation Import Demo\", \n",
+ "project = client.create_project(name=\"Conversational Text Annotation Import Demo\",\n",
" media_type=lb.MediaType.Conversational)\n",
"\n",
- "# Setup your ontology \n",
+ "# Setup your ontology\n",
"project.setup_editor(ontology) # Connect your ontology and editor to your project"
],
"cell_type": "code",
@@ -458,8 +458,6 @@
{
"metadata": {},
"source": [
- "# Setup Batches and Ontology\n",
- "\n",
"# Create a batch to send to your MAL project\n",
"batch = project.create_batch(\n",
" \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n",
@@ -566,9 +564,9 @@
"source": [
"# Upload our label using Model-Assisted Labeling\n",
"upload_job = lb.MALPredictionImport.create_from_objects(\n",
- " client = client, \n",
- " project_id = project.uid, \n",
- " name=f\"mal_job-{str(uuid.uuid4())}\", \n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
" predictions=label)\n",
"\n",
"upload_job.wait_until_done()\n",
@@ -589,11 +587,11 @@
{
"metadata": {},
"source": [
- "# Upload label for this data row in project \n",
+ "# Upload label for this data row in project\n",
"upload_job = lb.LabelImport.create_from_objects(\n",
- " client = client, \n",
- " project_id = project.uid, \n",
- " name=\"label_import_job\"+str(uuid.uuid4()), \n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=\"label_import_job\"+str(uuid.uuid4()),\n",
" labels=label)\n",
"\n",
"upload_job.wait_until_done()\n",
diff --git a/examples/annotation_import/conversational_LLM.ipynb b/examples/annotation_import/conversational_LLM.ipynb
new file mode 100644
index 000000000..bb811a1b8
--- /dev/null
+++ b/examples/annotation_import/conversational_LLM.ipynb
@@ -0,0 +1,649 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {},
+ "cells": [
+ {
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ " | \n",
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " | \n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n",
+ "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "!pip install -q \"labelbox[data]\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Set up"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "import labelbox as lb\n",
+ "import labelbox.types as lb_types\n",
+ "import uuid"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Replace with your API key"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "API_KEY = \"\"\n",
+ "client = lb.Client(api_key=API_KEY)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Supported annotations for conversational text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Entity "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "ner_annotation = lb_types.ObjectAnnotation(\n",
+ " name=\"ner\",\n",
+ " value=lb_types.ConversationEntity(\n",
+ " start=0,\n",
+ " end=8,\n",
+ " message_id=\"message-1\"\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "ner_annotation_ndjson = {\n",
+ " \"name\": \"ner\",\n",
+ " \"location\": {\n",
+ " \"start\": 0,\n",
+ " \"end\": 8\n",
+ " },\n",
+ " \"messageId\": \"message-1\"\n",
+ " }"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Radio (single-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"Choose the best response\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"Response B\")))\n",
+ "\n",
+ "\n",
+ "\n",
+ "radio_annotation_ndjson = {\n",
+ " \"name\": \"Choose the best response\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"Response B\"\n",
+ " }\n",
+ "}\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Free-form text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "text_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"Provide a reason for your choice\",\n",
+ " value=lb_types.Text(answer=\"the answer to the text questions right here\")\n",
+ ")\n",
+ "\n",
+ "\n",
+ "text_annotation_ndjson = {\n",
+ " \"name\": \"Provide a reason for your choice\",\n",
+ " \"answer\": \"This is the more concise answer\"\n",
+ "\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Checklist (multi-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "checklist_annotation= lb_types.ClassificationAnnotation(\n",
+ " name=\"checklist_convo\", # must match your ontology feature\"s name\n",
+ " value=lb_types.Checklist(\n",
+ " answer = [\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"first_checklist_answer\"\n",
+ " ),\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"second_checklist_answer\"\n",
+ " )\n",
+ " ]\n",
+ " ),\n",
+ " message_id=\"message-1\" # Message specific annotation\n",
+ " )\n",
+ "\n",
+ "\n",
+ "checklist_annotation_ndjson = {\n",
+ " \"name\": \"checklist_convo\",\n",
+ " \"answers\": [\n",
+ " {\"name\": \"first_checklist_answer\"},\n",
+ " {\"name\": \"second_checklist_answer\"}\n",
+ " ],\n",
+ " \"messageId\": \"message-1\"\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Nested radio and checklist"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ "# Message based\n",
+ "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_checklist_question\",\n",
+ " message_id=\"message-1\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_checklist_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_checklist_question\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_checklist_answer\"\n",
+ " )]\n",
+ " ))\n",
+ " ]\n",
+ " )]\n",
+ " )\n",
+ ")\n",
+ "# Message based\n",
+ "nested_checklist_annotation_ndjson = {\n",
+ " \"name\": \"nested_checklist_question\",\n",
+ " \"messageId\": \"message-1\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"first_checklist_answer\",\n",
+ " \"classifications\" : [\n",
+ " {\n",
+ " \"name\": \"sub_checklist_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_sub_checklist_answer\",\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }]\n",
+ "}\n",
+ "# Global\n",
+ "nested_radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_radio_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_radio_answer\"\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " )\n",
+ ")\n",
+ "#Global\n",
+ "nested_radio_annotation_ndjson = {\n",
+ " \"name\": \"nested_radio_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_radio_answer\",\n",
+ " \"classifications\": [{\n",
+ " \"name\":\"sub_radio_question\",\n",
+ " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n",
+ " }]\n",
+ " }\n",
+ "}\n",
+ "\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 1: Import data rows with \"modelOutputs\" into Catalog"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n",
+ "\n",
+ "```\n",
+ "\"modelOutputs\" : [\n",
+ " {\n",
+ " \"title\": \"Name of the response option\",\n",
+ " \"content\": \"Content of the response\",\n",
+ " \"modelConfigName\": \"Name of model configuration\"\n",
+ " }\n",
+ "]\n",
+ "```\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "#### Example of row_data with model outputs"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "pairwise_shopping_2 = \"\"\"\n",
+ " {\n",
+ " \"type\": \"application/vnd.labelbox.conversational\",\n",
+ " \"version\": 1,\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"messageId\": \"message-0\",\n",
+ " \"timestampUsec\": 1530718491,\n",
+ " \"content\": \"Hi! How can I help?\",\n",
+ " \"user\": {\n",
+ " \"userId\": \"Bot 002\",\n",
+ " \"name\": \"Bot\"\n",
+ " },\n",
+ " \"align\": \"left\",\n",
+ " \"canLabel\": false\n",
+ " },\n",
+ " {\n",
+ " \"messageId\": \"message-1\",\n",
+ " \"timestampUsec\": 1530718503,\n",
+ " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n",
+ " \"user\": {\n",
+ " \"userId\": \"User 00686\",\n",
+ " \"name\": \"User\"\n",
+ " },\n",
+ " \"align\": \"right\",\n",
+ " \"canLabel\": true\n",
+ " }\n",
+ "\n",
+ " ],\n",
+ " \"modelOutputs\": [\n",
+ " {\n",
+ " \"title\": \"Response A\",\n",
+ " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n",
+ " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n",
+ " },\n",
+ " {\n",
+ " \"title\": \"Response B\",\n",
+ " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n",
+ " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n",
+ " }\n",
+ " ]\n",
+ "}\n",
+ "\"\"\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "global_key = \"pairwise_shooping_asset\"\n",
+ "\n",
+ "# Upload data rows\n",
+ "convo_data = {\n",
+ " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n",
+ " \"global_key\": global_key\n",
+ "}\n",
+ "\n",
+ "# Create a dataset\n",
+ "dataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n",
+ "# Create a datarows\n",
+ "task = dataset.create_data_rows([convo_data])\n",
+ "task.wait_till_done()\n",
+ "print(\"Errors:\",task.errors)\n",
+ "print(\"Failed data rows:\", task.failed_data_rows)\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 2: Create/select an Ontology"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Create an ontology with relevant classifications\n",
+ "\n",
+ "ontology_builder = lb.OntologyBuilder(\n",
+ " tools=[\n",
+ " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n",
+ " ],\n",
+ " classifications=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " scope=lb.Classification.Scope.GLOBAL,\n",
+ " name=\"Choose the best response\",\n",
+ " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.TEXT,\n",
+ " name=\"Provide a reason for your choice\"\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " scope=lb.Classification.Scope.INDEX,\n",
+ " name=\"checklist_convo\",\n",
+ " options=[\n",
+ " lb.Option(value=\"first_checklist_answer\"),\n",
+ " lb.Option(value=\"second_checklist_answer\")\n",
+ " ]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"nested_checklist_question\",\n",
+ " scope = lb.Classification.Scope.INDEX,\n",
+ " options=[\n",
+ " lb.Option(\"first_checklist_answer\",\n",
+ " options=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"sub_checklist_question\",\n",
+ " options=[lb.Option(\"first_sub_checklist_answer\")]\n",
+ " )\n",
+ " ])\n",
+ " ]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " name=\"nested_radio_question\",\n",
+ " scope = lb.Classification.Scope.GLOBAL,\n",
+ " options=[\n",
+ " lb.Option(\"first_radio_answer\",\n",
+ " options=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " name=\"sub_radio_question\",\n",
+ " options=[lb.Option(\"first_sub_radio_answer\")]\n",
+ " )\n",
+ " ])\n",
+ " ]\n",
+ " )\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n",
+ "\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 3: Create a labeling project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Create Labelbox project\n",
+ "project = client.create_project(name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n",
+ " media_type=lb.MediaType.Conversational)\n",
+ "\n",
+ "# Setup your ontology\n",
+ "project.setup_editor(ontology) # Connect your ontology and editor to your project"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 4: Send a batch of data rows to the project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Create a batch to send to your project\n",
+ "batch = project.create_batch(\n",
+ " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n",
+ " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n",
+ " priority=5 # priority between 1(Highest) - 5(lowest)\n",
+ ")\n",
+ "\n",
+ "print(\"Batch: \", batch)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 5: Create the annotations payload"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "Python annotation"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label = []\n",
+ "label.append(\n",
+ " lb_types.Label(\n",
+ " data=lb_types.ConversationData(\n",
+ " global_key=global_key\n",
+ " ),\n",
+ " annotations=[\n",
+ " ner_annotation,\n",
+ " text_annotation,\n",
+ " checklist_annotation,\n",
+ " radio_annotation,\n",
+ " nested_radio_annotation,\n",
+ " nested_checklist_annotation\n",
+ " ]\n",
+ " )\n",
+ ")"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "NDJSON annotation"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label_ndjson = []\n",
+ "for annotations in [\n",
+ " ner_annotation_ndjson,\n",
+ " text_annotation_ndjson,\n",
+ " checklist_annotation_ndjson,\n",
+ " radio_annotation_ndjson,\n",
+ " nested_checklist_annotation_ndjson,\n",
+ " nested_radio_annotation_ndjson\n",
+ " ]:\n",
+ " annotations.update({\n",
+ " \"dataRow\": {\n",
+ " \"globalKey\": global_key\n",
+ " }\n",
+ " })\n",
+ " label_ndjson.append(annotations)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 6: Upload annotations to a project as pre-labels or complete labels "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Model Assisted Labeling (MAL)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "upload_job = lb.MALPredictionImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
+ " predictions=label)\n",
+ "\n",
+ "upload_job.wait_until_done()\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Label Import"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "upload_job = lb.LabelImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=\"label_import_job\"+str(uuid.uuid4()),\n",
+ " labels=label)\n",
+ "\n",
+ "upload_job.wait_until_done();\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/examples/annotation_import/conversational_LLM_data_generation.ipynb b/examples/annotation_import/conversational_LLM_data_generation.ipynb
new file mode 100644
index 000000000..c07a7a0da
--- /dev/null
+++ b/examples/annotation_import/conversational_LLM_data_generation.ipynb
@@ -0,0 +1,453 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "metadata": {},
+ "cells": [
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " | \n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# LLM Data Generation with MAL and Ground Truth\n",
+ "This demo is meant to showcase how to generate prompts and responses to fine-tune large language models (LLMs) using MAL and Ground truth"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "!pip install -q \"labelbox[data]\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Set up "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "import labelbox as lb\n",
+ "import uuid"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Replace with your API key"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "API_KEY = \"\"\n",
+ "client = lb.Client(api_key=API_KEY)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Supported annotations for LLM data generation\n",
+ "Currently, we only support NDJson format for prompt and responses"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Prompt:"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Free-form text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "prompt_annotation_ndjson = {\n",
+ " \"name\": \"Follow the prompt and select answers\",\n",
+ " \"answer\": \"This is an example of a prompt\"\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Responses:"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Radio (single-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "response_radio_annotation_ndjson= {\n",
+ " \"name\": \"response_radio\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"response_a\"\n",
+ " }\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Free-form text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Only NDJson is currently supported\n",
+ "response_text_annotation_ndjson = {\n",
+ " \"name\": \"Provide a reason for your choice\",\n",
+ " \"answer\": \"This is an example of a response text\"\n",
+ "}\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Checklist (multi-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "response_checklist_annotation_ndjson = {\n",
+ " \"name\": \"response_checklist\",\n",
+ " \"answer\": [\n",
+ " {\n",
+ " \"name\": \"response_a\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"response_c\"\n",
+ " }\n",
+ " ]\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 1: Create a project and data rows in Labelbox UI\n",
+ "\n",
+ "Currently we do not support this workflow through the SDK.\n",
+ "#### Workflow:\n",
+ "\n",
+ "1. Navigate to annotate and select ***New project***\n",
+ "\n",
+ "2. Select ***LLM data generation*** and then select ***Humans generate prompts and responses***\n",
+ "\n",
+ "3. Name your project, select ***create a new dataset*** and name your dataset. (data rows will be generated automatically in \n",
+ "this step)\n",
+ "\n",
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Enter the project id\n",
+ "project_id = \"\"\n",
+ "\n",
+ "# Select one of the global keys from the data rows generated\n",
+ "global_key = \"\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 2 : Create/select an Ontology in Labelbox UI"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "Currently we do not support this workflow through the SDK\n",
+ "#### Workflow: \n",
+ "1. In your project, navigate to ***Settings*** and ***Label editor***\n",
+ "\n",
+ "2. Click on ***Edit***\n",
+ "\n",
+ "3. Create a new ontology and add the features used in this demo\n",
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "#### For this demo the following ontology was generated in the UI: "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "ontology_json = \"\"\"\n",
+ "{\n",
+ " \"tools\": [],\n",
+ " \"relationships\": [],\n",
+ " \"classifications\": [\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0002yt07zy0khq42rp\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002ys07zyf2eo9p14\",\n",
+ " \"type\": \"prompt\",\n",
+ " \"name\": \"Follow the prompt and select answers\",\n",
+ " \"archived\": false,\n",
+ " \"required\": true,\n",
+ " \"options\": [],\n",
+ " \"instructions\": \"Follow the prompt and select answers\",\n",
+ " \"minCharacters\": 5,\n",
+ " \"maxCharacters\": 100\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0002yz07zy0fjg28z7\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002yu07zy28ik5w3i\",\n",
+ " \"type\": \"response-radio\",\n",
+ " \"name\": \"response_radio\",\n",
+ " \"instructions\": \"response_radio\",\n",
+ " \"scope\": \"global\",\n",
+ " \"required\": true,\n",
+ " \"archived\": false,\n",
+ " \"options\": [\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0002yw07zyci2q5adq\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002yv07zyevmz1yoj\",\n",
+ " \"value\": \"response_a\",\n",
+ " \"label\": \"response_a\",\n",
+ " \"position\": 0,\n",
+ " \"options\": []\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0002yy07zy8pe48zdj\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002yx07zy0jvmdxk8\",\n",
+ " \"value\": \"response_b\",\n",
+ " \"label\": \"response_b\",\n",
+ " \"position\": 1,\n",
+ " \"options\": []\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0002z107zygf8l62ys\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002z007zyg26115f9\",\n",
+ " \"type\": \"response-text\",\n",
+ " \"name\": \"provide_a_reason_for_your_choice\",\n",
+ " \"instructions\": \"Provide a reason for your choice\",\n",
+ " \"scope\": \"global\",\n",
+ " \"required\": true,\n",
+ " \"archived\": false,\n",
+ " \"options\": [],\n",
+ " \"minCharacters\": 5,\n",
+ " \"maxCharacters\": 100\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0102z907zy8b10hjcj\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002z207zy6xla7f82\",\n",
+ " \"type\": \"response-checklist\",\n",
+ " \"name\": \"response_checklist\",\n",
+ " \"instructions\": \"response_checklist\",\n",
+ " \"scope\": \"global\",\n",
+ " \"required\": true,\n",
+ " \"archived\": false,\n",
+ " \"options\": [\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0102z407zy0adq0rfr\",\n",
+ " \"featureSchemaId\": \"clpvq9d0002z307zy6dqb8xsw\",\n",
+ " \"value\": \"response_a\",\n",
+ " \"label\": \"response_a\",\n",
+ " \"position\": 0,\n",
+ " \"options\": []\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0102z607zych8b2z5d\",\n",
+ " \"featureSchemaId\": \"clpvq9d0102z507zyfwfgacrn\",\n",
+ " \"value\": \"response_c\",\n",
+ " \"label\": \"response_c\",\n",
+ " \"position\": 1,\n",
+ " \"options\": []\n",
+ " },\n",
+ " {\n",
+ " \"schemaNodeId\": \"clpvq9d0102z807zy03y7gysp\",\n",
+ " \"featureSchemaId\": \"clpvq9d0102z707zyh61y5o3u\",\n",
+ " \"value\": \"response_d\",\n",
+ " \"label\": \"response_d\",\n",
+ " \"position\": 2,\n",
+ " \"options\": []\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ " ],\n",
+ " \"realTime\": false\n",
+ "}\n",
+ "\n",
+ "\"\"\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 3: Create the annotations payload"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label_ndjson = []\n",
+ "for annotations in [\n",
+ " prompt_annotation_ndjson,\n",
+ " response_radio_annotation_ndjson,\n",
+ " response_text_annotation_ndjson,\n",
+ " response_checklist_annotation_ndjson\n",
+ " ]:\n",
+ " annotations.update({\n",
+ " \"dataRow\": {\n",
+ " \"globalKey\": global_key\n",
+ " }\n",
+ " })\n",
+ " label_ndjson.append(annotations)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 4: Upload annotations to a project as pre-labels or complete labels"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "project = client.get_project(project_id=project_id)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "#### Model Assisted Labeling (MAL)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "upload_job = lb.MALPredictionImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
+ " predictions=label_ndjson)\n",
+ "\n",
+ "upload_job.wait_until_done()\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "#### Label Import"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "upload_job = lb.LabelImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=\"label_import_job\"+str(uuid.uuid4()),\n",
+ " labels=label_ndjson)\n",
+ "\n",
+ "upload_job.wait_until_done();\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/examples/llm_asset_import/conversational_MAL_GT.ipynb b/examples/llm_asset_import/conversational_MAL_GT.ipynb
deleted file mode 100644
index a01a7a2af..000000000
--- a/examples/llm_asset_import/conversational_MAL_GT.ipynb
+++ /dev/null
@@ -1,386 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {},
- "cells": [
- {
- "metadata": {},
- "source": [
- "\n",
- " \n",
- " | \n",
- "\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "\n",
- "\n",
- "\n",
- "\n",
- " | \n",
- "\n",
- "\n",
- "\n",
- "\n",
- " | "
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n",
- "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "!pip install -q \"labelbox[data]\""
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Setup"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "import labelbox as lb\n",
- "import uuid"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Replace with your API Key"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "API_KEY = \"\"\n",
- "client = lb.Client(api_key=API_KEY)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 1: Create annotations"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Create a gobal radio and text annotation\n",
- "radio_annotation_ndjson = {\n",
- " \"name\": \"Choose the best response\",\n",
- " \"answer\": {\n",
- " \"name\": \"Response B\"\n",
- " }\n",
- "}\n",
- "\n",
- "text_annotation_ndjson = {\n",
- " \"name\": \"Provide a reason for your choice\",\n",
- " \"answer\": \"This is the more concise answer\",\n",
- "\n",
- "}"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 2: Setup a project"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "## Import data rows with \"modelOutputs\" into Catalog\n",
- "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n",
- "\n",
- "```\n",
- "\"modelOutputs\" : [\n",
- " {\n",
- " \"title\": \"Name of the response option\",\n",
- " \"content\": \"Content of the response\",\n",
- " \"modelConfigName\": \"Name of model configuration\"\n",
- " }\n",
- "]\n",
- "```\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "### Example of row_data with model outputs"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "pairwise_shopping_2 = \"\"\"\n",
- " {\n",
- " \"type\": \"application/vnd.labelbox.conversational\",\n",
- " \"version\": 1,\n",
- " \"messages\": [\n",
- " {\n",
- " \"messageId\": \"message-0\",\n",
- " \"timestampUsec\": 1530718491,\n",
- " \"content\": \"Hi! How can I help?\",\n",
- " \"user\": {\n",
- " \"userId\": \"Bot 002\",\n",
- " \"name\": \"Bot\"\n",
- " },\n",
- " \"align\": \"left\",\n",
- " \"canLabel\": false\n",
- " },\n",
- " {\n",
- " \"messageId\": \"message-1\",\n",
- " \"timestampUsec\": 1530718503,\n",
- " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n",
- " \"user\": {\n",
- " \"userId\": \"User 00686\",\n",
- " \"name\": \"User\"\n",
- " },\n",
- " \"align\": \"right\",\n",
- " \"canLabel\": true\n",
- " }\n",
- "\n",
- " ],\n",
- " \"modelOutputs\": [\n",
- " {\n",
- " \"title\": \"Response A\",\n",
- " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n",
- " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n",
- " },\n",
- " {\n",
- " \"title\": \"Response B\",\n",
- " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n",
- " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n",
- " }\n",
- " ]\n",
- "}\n",
- "\"\"\""
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "\n",
- "### Create dataset and data rows using a cloud hosted JSON file with \"modelOutputs\""
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Generate dummy global keys\n",
- "global_key_1 = str(uuid.uuid4())\n",
- "global_key_2 = str(uuid.uuid4())\n",
- "global_key_3 = str(uuid.uuid4())\n",
- "\n",
- "# Create a dataset\n",
- "dataset = client.create_dataset(\n",
- " name=\"pairwise_demo_\"+str(uuid.uuid4()),\n",
- " iam_integration=None\n",
- ")\n",
- "# Upload data rows\n",
- "task = dataset.create_data_rows([\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_1.json\",\n",
- " \"global_key\": global_key_1\n",
- " },\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n",
- " \"global_key\": global_key_2\n",
- " },\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_3.json\",\n",
- " \"global_key\": global_key_3\n",
- " }\n",
- " ])\n",
- "task.wait_till_done()\n",
- "print(\"Errors:\",task.errors)\n",
- "print(\"Failed data rows:\", task.failed_data_rows)\n"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Create/select an ontology"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Create an ontology with relevant classifications\n",
- "\n",
- "ontology_builder = lb.OntologyBuilder(\n",
- " classifications=[\n",
- " lb.Classification(\n",
- " class_type=lb.Classification.Type.RADIO,\n",
- " scope=lb.Classification.Scope.GLOBAL,\n",
- " name=\"Choose the best response\",\n",
- " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n",
- " ),\n",
- " lb.Classification(\n",
- " class_type=lb.Classification.Type.TEXT,\n",
- " name=\"Provide a reason for your choice\"\n",
- " )\n",
- " ]\n",
- ")\n",
- "\n",
- "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n",
- "\n"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Create a labeling project and send a batch of data rows to the project"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "project = client.create_project(name=\"Pairwise Conversational Text Demo\",\n",
- " media_type=lb.MediaType.Conversational)\n",
- "project.setup_editor(ontology)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "project.create_batch(\n",
- " \"batch_conversational\",\n",
- " global_keys=[global_key_1, global_key_2, global_key_3],\n",
- " priority=5\n",
- ")"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 3: Upload annotations to a project as pre-labels or complete labels"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "Setup the payload with the annotations that were created in Step 1."
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "labels = []\n",
- "for key in [global_key_1, global_key_2, global_key_3]:\n",
- " for ann in [radio_annotation_ndjson, text_annotation_ndjson]:\n",
- " ann_copy = ann.copy()\n",
- " ann_copy.update({\n",
- " \"dataRow\": {\n",
- " \"globalKey\": key\n",
- " }\n",
- " })\n",
- " labels.append(ann_copy)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Model Assisted Labeling (MAL)"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "upload_job = lb.MALPredictionImport.create_from_objects(\n",
- " client = client,\n",
- " project_id = project.uid,\n",
- " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
- " predictions=labels)\n",
- "\n",
- "upload_job.wait_until_done()\n",
- "print(\"Errors:\", upload_job.errors)\n",
- "print(\"Status of uploads: \", upload_job.statuses)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "## Label Import"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "upload_job = lb.LabelImport.create_from_objects(\n",
- " client = client,\n",
- " project_id = project.uid,\n",
- " name=\"label_import_job\"+str(uuid.uuid4()),\n",
- " labels=labels)\n",
- "\n",
- "upload_job.wait_until_done();\n",
- "print(\"Errors:\", upload_job.errors)\n",
- "print(\"Status of uploads: \", upload_job.statuses)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- }
- ]
-}
\ No newline at end of file
diff --git a/examples/llm_asset_import/conversational_MEA.ipynb b/examples/llm_asset_import/conversational_MEA.ipynb
deleted file mode 100644
index aa75e49ca..000000000
--- a/examples/llm_asset_import/conversational_MEA.ipynb
+++ /dev/null
@@ -1,366 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {},
- "cells": [
- {
- "metadata": {},
- "source": [
- "\n",
- " \n",
- " | \n",
- "\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "\n",
- "\n",
- " | \n",
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- " | "
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# LLM pairwise comparison with Conversational text using Model\n",
- "\n",
- "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "!pip install \"labelbox[data]\" -q"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Setup"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "import labelbox as lb\n",
- "import uuid"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Replace with your API Key"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "API_KEY = \"\"\n",
- "client = lb.Client(api_key=API_KEY)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 1: Create annotations"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Create a gobal radio and text annotation\n",
- "radio_annotation_ndjson = {\n",
- " \"name\": \"Choose the best response\",\n",
- " \"answer\": {\n",
- " \"name\": \"Response B\"\n",
- " }\n",
- "}\n",
- "\n",
- "text_annotation_ndjson = {\n",
- " \"name\": \"Provide a reason for your choice\",\n",
- " \"answer\": \"This is the more concise answer\",\n",
- "\n",
- "}"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 2: Setup a model\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "## Import data rows with \"modelOutputs\" into Catalog\n",
- "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n",
- "\n",
- "```\n",
- "\"modelOutputs\" : [\n",
- " {\n",
- " \"title\": \"Name of the response option\",\n",
- " \"content\": \"Content of the response\",\n",
- " \"modelConfigName\": \"Name of model configuration\"\n",
- " }\n",
- "]\n",
- "```\n"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "### Example of row_data with model outputs"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "pairwise_shopping_2 = \"\"\"\n",
- " {\n",
- " \"type\": \"application/vnd.labelbox.conversational\",\n",
- " \"version\": 1,\n",
- " \"messages\": [\n",
- " {\n",
- " \"messageId\": \"message-0\",\n",
- " \"timestampUsec\": 1530718491,\n",
- " \"content\": \"Hi! How can I help?\",\n",
- " \"user\": {\n",
- " \"userId\": \"Bot 002\",\n",
- " \"name\": \"Bot\"\n",
- " },\n",
- " \"align\": \"left\",\n",
- " \"canLabel\": false\n",
- " },\n",
- " {\n",
- " \"messageId\": \"message-1\",\n",
- " \"timestampUsec\": 1530718503,\n",
- " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n",
- " \"user\": {\n",
- " \"userId\": \"User 00686\",\n",
- " \"name\": \"User\"\n",
- " },\n",
- " \"align\": \"right\",\n",
- " \"canLabel\": true\n",
- " }\n",
- "\n",
- " ],\n",
- " \"modelOutputs\": [\n",
- " {\n",
- " \"title\": \"Response A\",\n",
- " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n",
- " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n",
- " },\n",
- " {\n",
- " \"title\": \"Response B\",\n",
- " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n",
- " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n",
- " }\n",
- " ]\n",
- "}\n",
- "\"\"\""
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "\n",
- "### Create dataset and data rows using a cloud hosted JSON file with \"modelOutputs\""
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Generate dummy global keys\n",
- "global_key_1 = str(uuid.uuid4())\n",
- "global_key_2 = str(uuid.uuid4())\n",
- "global_key_3 = str(uuid.uuid4())\n",
- "\n",
- "# Create a dataset\n",
- "dataset = client.create_dataset(\n",
- " name=\"pairwise_demo_\"+str(uuid.uuid4()),\n",
- " iam_integration=None\n",
- ")\n",
- "# Upload data rows\n",
- "task = dataset.create_data_rows([\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_1.json\",\n",
- " \"global_key\": global_key_1\n",
- " },\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n",
- " \"global_key\": global_key_2\n",
- " },\n",
- " {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_3.json\",\n",
- " \"global_key\": global_key_3\n",
- " }\n",
- " ])\n",
- "task.wait_till_done()\n",
- "print(\"Errors:\",task.errors)\n",
- "print(\"Failed data rows:\", task.failed_data_rows)\n"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Create/select an ontology"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Create an ontology with relevant classifications\n",
- "\n",
- "ontology_builder = lb.OntologyBuilder(\n",
- " classifications=[\n",
- " lb.Classification(\n",
- " class_type=lb.Classification.Type.RADIO,\n",
- " scope=lb.Classification.Scope.GLOBAL,\n",
- " name=\"Choose the best response\",\n",
- " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n",
- " ),\n",
- " lb.Classification(\n",
- " class_type=lb.Classification.Type.TEXT,\n",
- " name=\"Provide a reason for your choice\"\n",
- " )\n",
- " ]\n",
- ")\n",
- "\n",
- "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n",
- "\n"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Create a model and model run"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# create model\n",
- "model = client.create_model(name=\"Comparison_model_run_\"+ str(uuid.uuid4()),\n",
- " ontology_id=ontology.uid)\n",
- "# create model run\n",
- "model_run = model.create_model_run(\"iteration 1\")"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "Upsert data rows"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "model_run.upsert_data_rows(global_keys=[global_key_1, global_key_2, global_key_3])"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "# Step 3: Upload annotations to a project as pre-labels or complete labels"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "Setup the payload with the annotations that were created in Step 1."
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "predictions = []\n",
- "for key in [global_key_1, global_key_2, global_key_3]:\n",
- " for ann in [radio_annotation_ndjson, text_annotation_ndjson]:\n",
- " ann_copy = ann.copy()\n",
- " ann_copy.update({\n",
- " \"dataRow\": {\n",
- " \"globalKey\": key\n",
- " }\n",
- " })\n",
- " predictions.append(ann_copy)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- },
- {
- "metadata": {},
- "source": [
- "### Upload predictions to model run"
- ],
- "cell_type": "markdown"
- },
- {
- "metadata": {},
- "source": [
- "# Upload the prediction label to the Model Run\n",
- "upload_job_prediction = model_run.add_predictions(\n",
- " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n",
- " predictions=predictions)\n",
- "\n",
- "# Errors will appear for annotation uploads that failed.\n",
- "print(\"Errors:\", upload_job_prediction.errors)\n",
- "print(\"Status of uploads: \", upload_job_prediction.statuses)"
- ],
- "cell_type": "code",
- "outputs": [],
- "execution_count": null
- }
- ]
-}
\ No newline at end of file
diff --git a/examples/prediction_upload/conversational_LLM_predictions.ipynb b/examples/prediction_upload/conversational_LLM_predictions.ipynb
new file mode 100644
index 000000000..0056d525c
--- /dev/null
+++ b/examples/prediction_upload/conversational_LLM_predictions.ipynb
@@ -0,0 +1,830 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {},
+ "cells": [
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ " | \n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# LLM pairwise comparison with Conversational text using Model\n",
+ "\n",
+ "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis in the model product.\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "!pip install \"labelbox[data]\" -q"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Set up"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "import labelbox as lb\n",
+ "import labelbox.types as lb_types\n",
+ "import uuid"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Replace with your API Key"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "API_KEY = \"\"\n",
+ "client = lb.Client(api_key=API_KEY)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Supported annotations for conversational text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Entity"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "ner_prediction = lb_types.ObjectAnnotation(\n",
+ " name=\"ner\",\n",
+ " confidence=0.5,\n",
+ " value=lb_types.ConversationEntity(\n",
+ " start=0,\n",
+ " end=8,\n",
+ " message_id=\"message-1\"\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "ner_prediction_ndjson = {\n",
+ " \"name\": \"ner\",\n",
+ " \"confidence\": 0.5,\n",
+ " \"location\": {\n",
+ " \"start\": 0,\n",
+ " \"end\": 8\n",
+ " },\n",
+ " \"messageId\": \"message-1\"\n",
+ " }"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Radio (single-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "radio_prediction = lb_types.ClassificationAnnotation(\n",
+ " name=\"Choose the best response\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"Response B\", confidence=0.5)))\n",
+ "\n",
+ "\n",
+ "\n",
+ "radio_prediction_ndjson = {\n",
+ " \"name\": \"Choose the best response\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"Response B\",\n",
+ " \"confidence\": 0.5\n",
+ " }\n",
+ "}\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Free-form text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "text_prediction = lb_types.ClassificationAnnotation(\n",
+ " name=\"Provide a reason for your choice\",\n",
+ " value=lb_types.Text(answer=\"the answer to the text questions right here\", confidence=0.5)\n",
+ ")\n",
+ "\n",
+ "\n",
+ "text_prediction_ndjson = {\n",
+ " \"name\": \"Provide a reason for your choice\",\n",
+ " \"answer\": \"This is the more concise answer\",\n",
+ " \"confidence\": 0.5\n",
+ "\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Checklist (multi-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "checklist_prediction= lb_types.ClassificationAnnotation(\n",
+ " name=\"checklist_convo\", # must match your ontology feature\"s name\n",
+ " value=lb_types.Checklist(\n",
+ " answer = [\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"first_checklist_answer\",\n",
+ " confidence=0.5\n",
+ " ),\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"second_checklist_answer\",\n",
+ " confidence=0.5\n",
+ " )\n",
+ " ]\n",
+ " ),\n",
+ " message_id=\"message-1\" # Message specific annotation\n",
+ " )\n",
+ "\n",
+ "\n",
+ "checklist_prediction_ndjson = {\n",
+ " \"name\": \"checklist_convo\",\n",
+ " \"answers\": [\n",
+ " {\"name\": \"first_checklist_answer\",\"confidence\":0.5},\n",
+ " {\"name\": \"second_checklist_answer\",\"confidence\":0.5}\n",
+ " ],\n",
+ " \"messageId\": \"message-1\"\n",
+ "}"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Nested radio and checklist"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ "# Message based\n",
+ "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_checklist_question\",\n",
+ " message_id=\"message-1\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_checklist_answer\",\n",
+ " confidence=0.5, # Confidence scores should be added to the answer\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_checklist_question\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_checklist_answer\",\n",
+ " confidence=0.5 # Confidence scores should be added to the answer\n",
+ " )]\n",
+ " ))\n",
+ " ]\n",
+ " )]\n",
+ " )\n",
+ ")\n",
+ "# Message based\n",
+ "nested_checklist_prediction_ndjson = {\n",
+ " \"name\": \"nested_checklist_question\",\n",
+ " \"messageId\": \"message-1\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"first_checklist_answer\",\n",
+ " \"confidence\": 0.5, # Confidence scores should be added to the answer\n",
+ " \"classifications\" : [\n",
+ " {\n",
+ " \"name\": \"sub_checklist_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_sub_checklist_answer\",\n",
+ " \"confidence\": 0.5, # Confidence scores should be added to the answer\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }]\n",
+ "}\n",
+ "# Global\n",
+ "nested_radio_prediction = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_radio_answer\",\n",
+ " confidence=0.5, # Confidence scores should be added to the answer\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_radio_answer\",\n",
+ " confidence=0.5 # Confidence scores should be added to the answer\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " )\n",
+ ")\n",
+ "#Global\n",
+ "nested_radio_prediction_ndjson = {\n",
+ " \"name\": \"nested_radio_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_radio_answer\",\n",
+ " \"confidence\": 0.5,\n",
+ " \"classifications\": [{\n",
+ " \"name\":\"sub_radio_question\",\n",
+ " \"answer\": { \"name\" : \"first_sub_radio_answer\",\n",
+ " \"confidence\": 0.5}\n",
+ " }]\n",
+ " }\n",
+ "}\n",
+ "\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 1: Import data rows with \"modelOutputs\" into Catalog\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n",
+ "\n",
+ "```\n",
+ "\"modelOutputs\" : [\n",
+ " {\n",
+ " \"title\": \"Name of the response option\",\n",
+ " \"content\": \"Content of the response\",\n",
+ " \"modelConfigName\": \"Name of model configuration\"\n",
+ " }\n",
+ "]\n",
+ "```\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "#### Example of row_data with model outputs"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "pairwise_shopping_2 = \"\"\"\n",
+ " {\n",
+ " \"type\": \"application/vnd.labelbox.conversational\",\n",
+ " \"version\": 1,\n",
+ " \"messages\": [\n",
+ " {\n",
+ " \"messageId\": \"message-0\",\n",
+ " \"timestampUsec\": 1530718491,\n",
+ " \"content\": \"Hi! How can I help?\",\n",
+ " \"user\": {\n",
+ " \"userId\": \"Bot 002\",\n",
+ " \"name\": \"Bot\"\n",
+ " },\n",
+ " \"align\": \"left\",\n",
+ " \"canLabel\": false\n",
+ " },\n",
+ " {\n",
+ " \"messageId\": \"message-1\",\n",
+ " \"timestampUsec\": 1530718503,\n",
+ " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n",
+ " \"user\": {\n",
+ " \"userId\": \"User 00686\",\n",
+ " \"name\": \"User\"\n",
+ " },\n",
+ " \"align\": \"right\",\n",
+ " \"canLabel\": true\n",
+ " }\n",
+ "\n",
+ " ],\n",
+ " \"modelOutputs\": [\n",
+ " {\n",
+ " \"title\": \"Response A\",\n",
+ " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n",
+ " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n",
+ " },\n",
+ " {\n",
+ " \"title\": \"Response B\",\n",
+ " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n",
+ " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n",
+ " }\n",
+ " ]\n",
+ "}\n",
+ "\"\"\""
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "global_key = \"pairwise_shooping_asset\"\n",
+ "convo_data = {\n",
+ " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n",
+ " \"global_key\": global_key\n",
+ "}\n",
+ "# Create a dataset\n",
+ "dataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n",
+ "# Create a datarows\n",
+ "task = dataset.create_data_rows([convo_data])\n",
+ "print(\"Errors:\",task.errors)\n",
+ "print(\"Failed data rows:\", task.failed_data_rows)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 2: Create/select an Ontology for your model predictions"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Create an ontology with relevant classifications\n",
+ "\n",
+ "ontology_builder = lb.OntologyBuilder(\n",
+ " tools=[\n",
+ " lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n",
+ " ],\n",
+ " classifications=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " scope=lb.Classification.Scope.GLOBAL,\n",
+ " name=\"Choose the best response\",\n",
+ " options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.TEXT,\n",
+ " name=\"Provide a reason for your choice\"\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " scope=lb.Classification.Scope.INDEX,\n",
+ " name=\"checklist_convo\",\n",
+ " options=[\n",
+ " lb.Option(value=\"first_checklist_answer\"),\n",
+ " lb.Option(value=\"second_checklist_answer\")\n",
+ " ]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"nested_checklist_question\",\n",
+ " scope = lb.Classification.Scope.INDEX,\n",
+ " options=[\n",
+ " lb.Option(\"first_checklist_answer\",\n",
+ " options=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"sub_checklist_question\",\n",
+ " options=[lb.Option(\"first_sub_checklist_answer\")]\n",
+ " )\n",
+ " ])\n",
+ " ]\n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " name=\"nested_radio_question\",\n",
+ " scope = lb.Classification.Scope.GLOBAL,\n",
+ " options=[\n",
+ " lb.Option(\"first_radio_answer\",\n",
+ " options=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.RADIO,\n",
+ " name=\"sub_radio_question\",\n",
+ " options=[lb.Option(\"first_sub_radio_answer\")]\n",
+ " )\n",
+ " ])\n",
+ " ]\n",
+ " )\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n",
+ "\n"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 3: Create a Model and Model Run"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# create model\n",
+ "model = client.create_model(name=\"Comparison_model_run_\"+ str(uuid.uuid4()),\n",
+ " ontology_id=ontology.uid)\n",
+ "# create model run\n",
+ "model_run = model.create_model_run(\"iteration 1\")"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 4: Send data rows to the Model run"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "model_run.upsert_data_rows(global_keys=[global_key])"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Step 5: Create the predictions payload"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label_prediction = []\n",
+ "label_prediction.append(lb_types.Label(\n",
+ " data=lb_types.ConversationData(\n",
+ " global_key=global_key\n",
+ " ),\n",
+ " annotations= [\n",
+ " ner_prediction,\n",
+ " text_prediction,\n",
+ " checklist_prediction,\n",
+ " radio_prediction,\n",
+ " nested_radio_prediction,\n",
+ " nested_checklist_prediction\n",
+ " ]\n",
+ "))"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "Setup the payload with the annotations that were created in Step 1."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label_ndjson = []\n",
+ "for annotations in [\n",
+ " ner_prediction_ndjson,\n",
+ " text_prediction_ndjson,\n",
+ " checklist_prediction_ndjson,\n",
+ " radio_prediction_ndjson,\n",
+ " nested_checklist_prediction_ndjson,\n",
+ " nested_radio_prediction_ndjson\n",
+ " ]:\n",
+ " annotations.update({\n",
+ " \"dataRow\": {\n",
+ " \"globalKey\": global_key\n",
+ " }\n",
+ " })\n",
+ " label_ndjson.append(annotations)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 6: Upload the predictions payload to the Model Run"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Upload the prediction label to the Model Run\n",
+ "upload_job_prediction = model_run.add_predictions(\n",
+ " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n",
+ " predictions=label_prediction)\n",
+ "\n",
+ "# Errors will appear for annotation uploads that failed.\n",
+ "print(\"Errors:\", upload_job_prediction.errors)\n",
+ "print(\"Status of uploads: \", upload_job_prediction.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 7: Send annotations to the Model Run "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.1 Create a labelbox project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "project = client.create_project(name=\"Conversational Human Evaluation Demo\",\n",
+ " media_type=lb.MediaType.Conversational)\n",
+ "project.setup_editor(ontology)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.2 Create a batch to send to the project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "project.create_batch(\n",
+ " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n",
+ " global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n",
+ " priority=5 # priority between 1(Highest) - 5(lowest)\n",
+ ")"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.3 Create the annotations payload"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "ner_annotation = lb_types.ObjectAnnotation(\n",
+ " name=\"ner\",\n",
+ " value=lb_types.ConversationEntity(\n",
+ " start=0,\n",
+ " end=8,\n",
+ " message_id=\"message-1\"\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"Choose the best response\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"Response B\"))\n",
+ ")\n",
+ "\n",
+ "text_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"Provide a reason for your choice\",\n",
+ " value=lb_types.Text(answer=\"the answer to the text questions right here\")\n",
+ ")\n",
+ "\n",
+ "checklist_annotation= lb_types.ClassificationAnnotation(\n",
+ " name=\"checklist_convo\", # must match your ontology feature\"s name\n",
+ " value=lb_types.Checklist(\n",
+ " answer = [\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"first_checklist_answer\"\n",
+ " ),\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name = \"second_checklist_answer\"\n",
+ " )\n",
+ " ]\n",
+ " ),\n",
+ " message_id=\"message-1\" # Message specific annotation\n",
+ " )\n",
+ "\n",
+ "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_checklist_question\",\n",
+ " message_id=\"message-1\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_checklist_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_checklist_question\",\n",
+ " value=lb_types.Checklist(\n",
+ " answer=[lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_checklist_answer\"\n",
+ " )]\n",
+ " ))\n",
+ " ]\n",
+ " )]\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "nested_radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_radio_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_radio_question\",\n",
+ " value=lb_types.Radio(\n",
+ " answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_radio_answer\"\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " )\n",
+ ")"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.4 Create the label object"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "label_annotation = []\n",
+ "label_annotation.append(lb_types.Label(\n",
+ " data=lb_types.ConversationData(\n",
+ " global_key=global_key\n",
+ " ),\n",
+ " annotations= [\n",
+ " ner_annotation,\n",
+ " text_annotation,\n",
+ " checklist_annotation,\n",
+ " radio_annotation,\n",
+ " nested_radio_annotation,\n",
+ " nested_checklist_annotation\n",
+ " ]\n",
+ "))"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.5 Upload annotations to the project using Label Import"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "upload_job_annotation = lb.LabelImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=\"label_import_job\"+ str(uuid.uuid4()),\n",
+ " labels=label_annotation)\n",
+ "\n",
+ "upload_job_annotation.wait_until_done()\n",
+ "# Errors will appear for annotation uploads that failed.\n",
+ "print(\"Errors:\", upload_job_annotation.errors)\n",
+ "print(\"Status of uploads: \", upload_job_annotation.statuses)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "7.6 Send the annotations to the Model Run"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# get the labels id from the project\n",
+ "model_run.upsert_labels(project_id=project.uid)"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Option deletions for cleanup"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# project.delete()\n",
+ "# dataset.delete()"
+ ],
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ }
+ ]
+}
\ No newline at end of file