Skip to content

Commit e79b71c

Browse files
authored
Merge pull request #130 from Labelbox/ms/bulk-export-status
Easier fetch bulk import ndjson
2 parents 206c5b0 + 26ee709 commit e79b71c

File tree

4 files changed

+104
-7
lines changed

4 files changed

+104
-7
lines changed

labelbox/client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,15 @@ def upload_file(self, path: str) -> str:
253253
def upload_data(self,
254254
content: bytes,
255255
filename: str = None,
256-
content_type: str = None) -> str:
256+
content_type: str = None,
257+
sign: bool = False) -> str:
257258
""" Uploads the given data (bytes) to Labelbox.
258259
259260
Args:
260261
content: bytestring to upload
261262
filename: name of the upload
262263
content_type: content type of data uploaded
264+
sign: whether or not to sign the url
263265
264266
Returns:
265267
str, the URL of uploaded data.
@@ -274,7 +276,7 @@ def upload_data(self,
274276
"variables": {
275277
"file": None,
276278
"contentLength": len(content),
277-
"sign": False
279+
"sign": sign
278280
},
279281
"query":
280282
"""mutation UploadFile($file: Upload!, $contentLength: Int!,

labelbox/schema/bulk_import_request.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import time
33
from uuid import UUID, uuid4
4+
import functools
45

56
import logging
67
from pathlib import Path
@@ -9,6 +10,7 @@
910
import ndjson
1011
import requests
1112
from pydantic import BaseModel, validator
13+
from requests.api import request
1214
from typing_extensions import Literal
1315
from typing import (Any, List, Optional, BinaryIO, Dict, Iterable, Tuple, Union,
1416
Type, Set)
@@ -113,6 +115,76 @@ class BulkImportRequest(DbObject):
113115
project = Relationship.ToOne("Project")
114116
created_by = Relationship.ToOne("User", False, "created_by")
115117

118+
@property
119+
def inputs(self) -> List[Dict[str, Any]]:
120+
"""
121+
Inputs for each individual annotation uploaded.
122+
This should match the ndjson annotations that you have uploaded.
123+
124+
Returns:
125+
Uploaded ndjson.
126+
127+
* This information will expire after 24 hours.
128+
"""
129+
return self._fetch_remote_ndjson(self.input_file_url)
130+
131+
@property
132+
def errors(self) -> List[Dict[str, Any]]:
133+
"""
134+
Errors for each individual annotation uploaded. This is a subset of statuses
135+
136+
Returns:
137+
List of dicts containing error messages. Empty list means there were no errors
138+
See `BulkImportRequest.statuses` for more details.
139+
140+
* This information will expire after 24 hours.
141+
"""
142+
self.wait_until_done()
143+
return self._fetch_remote_ndjson(self.error_file_url)
144+
145+
@property
146+
def statuses(self) -> List[Dict[str, Any]]:
147+
"""
148+
Status for each individual annotation uploaded.
149+
150+
Returns:
151+
A status for each annotation if the upload is done running.
152+
See below table for more details
153+
154+
.. list-table::
155+
:widths: 15 150
156+
:header-rows: 1
157+
158+
* - Field
159+
- Description
160+
* - uuid
161+
- Specifies the annotation for the status row.
162+
* - dataRow
163+
- JSON object containing the Labelbox data row ID for the annotation.
164+
* - status
165+
- Indicates SUCCESS or FAILURE.
166+
* - errors
167+
- An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info.
168+
169+
* This information will expire after 24 hours.
170+
"""
171+
self.wait_until_done()
172+
return self._fetch_remote_ndjson(self.status_file_url)
173+
174+
@functools.lru_cache()
175+
def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]:
176+
"""
177+
Fetches the remote ndjson file and caches the results.
178+
179+
Args:
180+
url (str): Can be any url pointing to an ndjson file.
181+
Returns:
182+
ndjson as a list of dicts.
183+
"""
184+
response = requests.get(url)
185+
response.raise_for_status()
186+
return ndjson.loads(response.text)
187+
116188
def refresh(self) -> None:
117189
"""Synchronizes values of all fields with the database.
118190
"""

labelbox/schema/enums.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,19 @@
33

44
class BulkImportRequestState(Enum):
55
""" State of the import job when importing annotations (RUNNING, FAILED, or FINISHED).
6+
7+
.. list-table::
8+
:widths: 15 150
9+
:header-rows: 1
10+
11+
* - State
12+
- Description
13+
* - RUNNING
14+
- Indicates that the import job is not done yet.
15+
* - FAILED
16+
- Indicates the import job failed. Check `BulkImportRequest.errors` for more information
17+
* - FINISHED
18+
- Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information
619
"""
720
RUNNING = "RUNNING"
821
FAILED = "FAILED"

tests/integration/bulk_import/test_bulk_import_request.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,27 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
121121

122122

123123
@pytest.mark.slow
124-
def test_wait_till_done(configured_project):
124+
def test_wait_till_done(rectangle_inference, configured_project):
125125
name = str(uuid.uuid4())
126-
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
126+
url = configured_project.client.upload_data(content=ndjson.dumps(
127+
[rectangle_inference]),
128+
sign=True)
127129
bulk_import_request = configured_project.upload_annotations(name=name,
128130
annotations=url,
129131
validate=False)
130132

133+
assert len(bulk_import_request.inputs) == 1
131134
bulk_import_request.wait_until_done()
132-
133-
assert (bulk_import_request.state == BulkImportRequestState.FINISHED or
134-
bulk_import_request.state == BulkImportRequestState.FAILED)
135+
assert bulk_import_request.state == BulkImportRequestState.FINISHED
136+
137+
# Check that the status files are being returned as expected
138+
assert len(bulk_import_request.errors) == 0
139+
assert len(bulk_import_request.inputs) == 1
140+
assert bulk_import_request.inputs[0]['uuid'] == rectangle_inference['uuid']
141+
assert len(bulk_import_request.statuses) == 1
142+
assert bulk_import_request.statuses[0]['status'] == 'SUCCESS'
143+
assert bulk_import_request.statuses[0]['uuid'] == rectangle_inference[
144+
'uuid']
135145

136146

137147
def assert_file_content(url: str, predictions):

0 commit comments

Comments
 (0)