Skip to content

Commit 5eff529

Browse files
authored
Merge pull request #551 from Labelbox/farkob/batch-features
[AL-2075] Batch list and export
2 parents 55b96ba + 386e163 commit 5eff529

File tree

8 files changed

+212
-9
lines changed

8 files changed

+212
-9
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
# Changelog
22

3+
# Version 3.20.0 (2022-04-27)
4+
## Added
5+
* Batches in a project can be retrieved with `project.batches()`
6+
* Added `Batch.remove_queued_data_rows()` to cancel remaining data rows in batch
7+
* Added `Batch.export_data_rows()` which returns `DataRow`s for a batch
8+
9+
## Updated
10+
* NDJsonConverter now supports Video bounding box annotations.
11+
* Note: Currently does not support nested classifications.
12+
* Note: Converting an export into Labelbox annotation types, and back to export will result in only keyframe annotations. This is to support correct import format.
13+
14+
15+
## Fix
16+
* `batch.project()` now works
17+
318
# Version 3.19.1 (2022-04-14)
419
## Fix
520
* `create_data_rows` and `create_data_rows_sync` now uploads the file with a mimetype

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.19.1"
2+
__version__ = "3.20.0"
33

44
import sys
55
import warnings

labelbox/orm/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ class Entity(metaclass=EntityMeta):
347347
Invite: Type[labelbox.Invite]
348348
InviteLimit: Type[labelbox.InviteLimit]
349349
ProjectRole: Type[labelbox.ProjectRole]
350+
Project: Type[labelbox.Project]
350351
Batch: Type[labelbox.Batch]
351352

352353
@classmethod

labelbox/pagination.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def __init__(self,
2525
params: Dict[str, str],
2626
dereferencing: Union[List[str], Dict[str, Any]],
2727
obj_class: Union[Type["DbObject"], Callable[[Any, Any], Any]],
28-
cursor_path: Optional[Dict[str, Any]] = None,
28+
cursor_path: Optional[List[str]] = None,
2929
experimental: bool = False):
3030
""" Creates a PaginatedCollection.
3131
@@ -105,7 +105,7 @@ def get_next_page(self) -> Tuple[Dict[str, Any], bool]:
105105

106106
class _CursorPagination(_Pagination):
107107

108-
def __init__(self, cursor_path: Dict[str, Any], *args, **kwargs):
108+
def __init__(self, cursor_path: List[str], *args, **kwargs):
109109
super().__init__(*args, **kwargs)
110110
self.cursor_path = cursor_path
111111
self.next_cursor: Optional[Any] = None

labelbox/schema/batch.py

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
1-
from labelbox.orm.db_object import DbObject
2-
from labelbox.orm.model import Field, Relationship
1+
from typing import Generator, TYPE_CHECKING
2+
from labelbox.orm.db_object import DbObject, experimental
3+
from labelbox.orm import query
4+
from labelbox.orm.model import Entity, Field, Relationship
5+
from labelbox.exceptions import LabelboxError, ResourceNotFoundError
6+
from io import StringIO
7+
import ndjson
8+
import requests
9+
import logging
10+
import time
11+
12+
if TYPE_CHECKING:
13+
from labelbox import Project
14+
15+
logger = logging.getLogger(__name__)
316

417

518
class Batch(DbObject):
@@ -21,5 +34,87 @@ class Batch(DbObject):
2134
size = Field.Int("size")
2235

2336
# Relationships
24-
project = Relationship.ToOne("Project")
2537
created_by = Relationship.ToOne("User")
38+
39+
def __init__(self, client, project_id, *args, **kwargs):
40+
super().__init__(client, *args, **kwargs)
41+
self.project_id = project_id
42+
43+
def project(self) -> 'Project': # type: ignore
44+
""" Returns Project which this Batch belongs to
45+
46+
Raises:
47+
LabelboxError: if the project is not found
48+
"""
49+
query_str = """query getProjectPyApi($projectId: ID!) {
50+
project(
51+
where: {id: $projectId}){
52+
%s
53+
}}""" % query.results_query_part(Entity.Project)
54+
params = {"projectId": self.project_id}
55+
response = self.client.execute(query_str, params)
56+
57+
if response is None:
58+
raise ResourceNotFoundError(Entity.Project, params)
59+
60+
return Entity.Project(self.client, response["project"])
61+
62+
def remove_queued_data_rows(self) -> None:
63+
""" Removes remaining queued data rows from the batch and labeling queue.
64+
65+
Args:
66+
batch (Batch): Batch to remove queued data rows from
67+
"""
68+
69+
project_id_param = "projectId"
70+
batch_id_param = "batchId"
71+
self.client.execute("""mutation ArchiveBatchPyApi($%s: ID!, $%s: ID!) {
72+
project(where: {id: $%s}) { archiveBatch(batchId: $%s) { id archivedAt } }
73+
}""" % (project_id_param, batch_id_param, project_id_param,
74+
batch_id_param), {
75+
project_id_param: self.project_id,
76+
batch_id_param: self.uid
77+
},
78+
experimental=True)
79+
80+
def export_data_rows(self, timeout_seconds=120) -> Generator:
81+
""" Returns a generator that produces all data rows that are currently
82+
in this batch.
83+
84+
Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
85+
until the end of the cache period.
86+
87+
Args:
88+
timeout_seconds (float): Max waiting time, in seconds.
89+
Returns:
90+
Generator that yields DataRow objects belonging to this batch.
91+
Raises:
92+
LabelboxError: if the export fails or is unable to download within the specified time.
93+
"""
94+
id_param = "batchId"
95+
query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!)
96+
{exportBatchDataRows(data:{batchId: $%s }) {downloadUrl createdAt status}}
97+
""" % (id_param, id_param)
98+
sleep_time = 2
99+
while True:
100+
res = self.client.execute(query_str, {id_param: self.uid})
101+
res = res["exportBatchDataRows"]
102+
if res["status"] == "COMPLETE":
103+
download_url = res["downloadUrl"]
104+
response = requests.get(download_url)
105+
response.raise_for_status()
106+
reader = ndjson.reader(StringIO(response.text))
107+
return (
108+
Entity.DataRow(self.client, result) for result in reader)
109+
elif res["status"] == "FAILED":
110+
raise LabelboxError("Data row export failed.")
111+
112+
timeout_seconds -= sleep_time
113+
if timeout_seconds <= 0:
114+
raise LabelboxError(
115+
f"Unable to export data rows within {timeout_seconds} seconds."
116+
)
117+
118+
logger.debug("Batch '%s' data row export, waiting for server...",
119+
self.uid)
120+
time.sleep(sleep_time)

labelbox/schema/project.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def export_labels(self,
298298

299299
def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
300300
"""Returns a concatenated string of the dictionary's keys and values
301-
301+
302302
The string will be formatted as {key}: 'value' for each key. Value will be inclusive of
303303
quotations while key will not. This can be toggled with `value_with_quotes`"""
304304

@@ -609,7 +609,7 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
609609
experimental=True)["project"][method]
610610

611611
res['size'] = len(dr_ids)
612-
return Entity.Batch(self.client, res)
612+
return Entity.Batch(self.client, self.uid, res)
613613

614614
def _update_queue_mode(self,
615615
mode: "Project.QueueMode") -> "Project.QueueMode":
@@ -840,6 +840,24 @@ def bulk_import_requests(self) -> PaginatedCollection:
840840
["bulkImportRequests"],
841841
Entity.BulkImportRequest)
842842

843+
def batches(self) -> PaginatedCollection:
844+
""" Fetch all batches that belong to this project
845+
846+
Returns:
847+
A `PaginatedCollection of `Batch`es
848+
"""
849+
id_param = "projectId"
850+
query_str = """query GetProjectBatchesPyApi($from: String, $first: PageSize, $%s: ID!) {
851+
project(where: {id: $%s}) {id
852+
batches(after: $from, first: $first) { nodes { %s } pageInfo { endCursor }}}}
853+
""" % (id_param, id_param, query.results_query_part(Entity.Batch))
854+
return PaginatedCollection(
855+
self.client,
856+
query_str, {id_param: self.uid}, ['project', 'batches', 'nodes'],
857+
lambda client, res: Entity.Batch(client, self.uid, res),
858+
cursor_path=['project', 'batches', 'pageInfo', 'endCursor'],
859+
experimental=True)
860+
843861
def upload_annotations(
844862
self,
845863
name: str,

tests/integration/test_batch.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,65 @@ def big_dataset(dataset: Dataset):
1818
yield dataset
1919

2020

21+
@pytest.fixture
22+
def small_dataset(dataset: Dataset):
23+
task = dataset.create_data_rows([
24+
{
25+
"row_data": IMAGE_URL,
26+
"external_id": "my-image"
27+
},
28+
] * 3)
29+
task.wait_till_done()
30+
31+
yield dataset
32+
33+
2134
def test_create_batch(configured_project: Project, big_dataset: Dataset):
2235
configured_project.update(queue_mode=Project.QueueMode.Batch)
2336

2437
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
2538
batch = configured_project.create_batch("test-batch", data_rows, 3)
2639
assert batch.name == 'test-batch'
2740
assert batch.size == len(data_rows)
41+
42+
43+
def test_archive_batch(configured_project: Project, small_dataset: Dataset):
44+
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
45+
configured_project.update(queue_mode=Project.QueueMode.Batch)
46+
batch = configured_project.create_batch("batch to archive", data_rows)
47+
batch.remove_queued_data_rows()
48+
exported_data_rows = list(batch.export_data_rows())
49+
50+
assert len(exported_data_rows) == 0
51+
52+
53+
def test_batch_project(configured_project: Project, small_dataset: Dataset):
54+
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
55+
configured_project.update(queue_mode=Project.QueueMode.Batch)
56+
batch = configured_project.create_batch(
57+
"batch to test project relationship", data_rows)
58+
project_from_batch = batch.project()
59+
60+
assert project_from_batch.uid == configured_project.uid
61+
assert project_from_batch.name == configured_project.name
62+
63+
64+
def test_export_data_rows(configured_project: Project, dataset: Dataset):
65+
n_data_rows = 5
66+
task = dataset.create_data_rows([
67+
{
68+
"row_data": IMAGE_URL,
69+
"external_id": "my-image"
70+
},
71+
] * n_data_rows)
72+
task.wait_till_done()
73+
74+
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
75+
configured_project.update(queue_mode=Project.QueueMode.Batch)
76+
batch = configured_project.create_batch("batch test", data_rows)
77+
78+
result = list(batch.export_data_rows())
79+
exported_data_rows = [dr.uid for dr in result]
80+
81+
assert len(result) == n_data_rows
82+
assert set(data_rows) == set(exported_data_rows)

tests/integration/test_project.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
import requests
66

7-
from labelbox import Project, LabelingFrontend
7+
from labelbox import Project, LabelingFrontend, Dataset
88
from labelbox.exceptions import InvalidQueryError
99

1010

@@ -201,3 +201,22 @@ def test_queue_mode(configured_project: Project):
201201
) == configured_project.QueueMode.Dataset
202202
configured_project.update(queue_mode=configured_project.QueueMode.Batch)
203203
assert configured_project.queue_mode() == configured_project.QueueMode.Batch
204+
205+
206+
def test_batches(configured_project: Project, dataset: Dataset, image_url):
207+
task = dataset.create_data_rows([
208+
{
209+
"row_data": image_url,
210+
"external_id": "my-image"
211+
},
212+
] * 2)
213+
task.wait_till_done()
214+
configured_project.update(queue_mode=configured_project.QueueMode.Batch)
215+
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
216+
batch_one = 'batch one'
217+
batch_two = 'batch two'
218+
configured_project.create_batch(batch_one, [data_rows[0]])
219+
configured_project.create_batch(batch_two, [data_rows[1]])
220+
221+
names = set([batch.name for batch in list(configured_project.batches())])
222+
assert names == set([batch_one, batch_two])

0 commit comments

Comments
 (0)