Skip to content

Commit 52b7b20

Browse files
Merge pull request #584 from Labelbox/develop
Release 3.22.1
2 parents 99b764b + 94d2d91 commit 52b7b20

File tree

6 files changed

+122
-38
lines changed

6 files changed

+122
-38
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
# Version 3.22.1 (2022-05-23)
4+
## Updated
5+
* Renamed `custom_metadata` to `metadata_fields` in DataRow
6+
37
# Version 3.22.0 (2022-05-20)
48
## Added
59
* `Dataset.create_data_row()` and `Dataset.create_data_rows()` now uploads metadata to data row

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.22.0"
2+
__version__ = "3.22.1"
33

44
import sys
55
import warnings

labelbox/schema/batch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ def export_data_rows(self, timeout_seconds=120) -> Generator:
104104
response = requests.get(download_url)
105105
response.raise_for_status()
106106
reader = ndjson.reader(StringIO(response.text))
107-
# TODO: Update result to parse customMetadata when resolver returns
107+
# TODO: Update result to parse metadataFields when resolver returns
108108
return (Entity.DataRow(self.client, {
109-
**result, 'customMetadata': []
109+
**result, 'metadataFields': []
110110
}) for result in reader)
111111
elif res["status"] == "FAILED":
112112
raise LabelboxError("Data row export failed.")

labelbox/schema/data_row.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class DataRow(DbObject, Updateable, BulkDeletable):
2222
updated_at (datetime)
2323
created_at (datetime)
2424
media_attributes (dict): generated media attributes for the datarow
25-
custom_metadata (list): metadata associated with the datarow
25+
metadata_fields (list): metadata associated with the datarow
2626
2727
dataset (Relationship): `ToOne` relationship to Dataset
2828
created_by (Relationship): `ToOne` relationship to User
@@ -35,11 +35,11 @@ class DataRow(DbObject, Updateable, BulkDeletable):
3535
updated_at = Field.DateTime("updated_at")
3636
created_at = Field.DateTime("created_at")
3737
media_attributes = Field.Json("media_attributes")
38-
custom_metadata = Field.List(
38+
metadata_fields = Field.List(
3939
DataRowMetadataField,
4040
graphql_type="DataRowCustomMetadataUpsertInput!",
41-
name="custom_metadata",
42-
result_subquery="customMetadata { value schemaId }")
41+
name="metadata_fields",
42+
result_subquery="metadataFields { schemaId name value kind }")
4343

4444
# Relationships
4545
dataset = Relationship.ToOne("Dataset")

labelbox/schema/dataset.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,40 +52,57 @@ class Dataset(DbObject, Updateable, Deletable):
5252
iam_integration = Relationship.ToOne("IAMIntegration", False,
5353
"iam_integration", "signer")
5454

55-
def create_data_row(self, **kwargs) -> "DataRow":
55+
def create_data_row(self, items=None, **kwargs) -> "DataRow":
5656
""" Creates a single DataRow belonging to this dataset.
5757
5858
>>> dataset.create_data_row(row_data="http://my_site.com/photos/img_01.jpg")
5959
6060
Args:
61+
items: Dictionary containing new `DataRow` data. At a minimum,
62+
must contain `row_data` or `DataRow.row_data`.
6163
**kwargs: Key-value arguments containing new `DataRow` data. At a minimum,
6264
must contain `row_data`.
6365
6466
Raises:
67+
InvalidQueryError: If both dictionary and `kwargs` are provided as inputs
6568
InvalidQueryError: If `DataRow.row_data` field value is not provided
6669
in `kwargs`.
6770
InvalidAttributeError: in case the DB object type does not contain
6871
any of the field names given in `kwargs`.
6972
7073
"""
74+
invalid_argument_error = "Argument to create_data_row() must be either a dictionary, or kwargs containing `row_data` at minimum"
75+
76+
def convert_field_keys(items):
77+
if not isinstance(items, dict):
78+
raise InvalidQueryError(invalid_argument_error)
79+
return {
80+
key.name if isinstance(key, Field) else key: value
81+
for key, value in items.items()
82+
}
83+
84+
if items is not None and len(kwargs) > 0:
85+
raise InvalidQueryError(invalid_argument_error)
86+
7187
DataRow = Entity.DataRow
72-
if DataRow.row_data.name not in kwargs:
88+
args = convert_field_keys(items) if items is not None else kwargs
89+
90+
if DataRow.row_data.name not in args:
7391
raise InvalidQueryError(
7492
"DataRow.row_data missing when creating DataRow.")
7593

7694
# If row data is a local file path, upload it to server.
77-
row_data = kwargs[DataRow.row_data.name]
95+
row_data = args[DataRow.row_data.name]
7896
if os.path.exists(row_data):
79-
kwargs[DataRow.row_data.name] = self.client.upload_file(row_data)
80-
kwargs[DataRow.dataset.name] = self
97+
args[DataRow.row_data.name] = self.client.upload_file(row_data)
98+
args[DataRow.dataset.name] = self
8199

82100
# Parse metadata fields, if they are provided
83-
if DataRow.custom_metadata.name in kwargs:
101+
if DataRow.metadata_fields.name in args:
84102
mdo = self.client.get_data_row_metadata_ontology()
85-
kwargs[DataRow.custom_metadata.name] = mdo.parse_upsert_metadata(
86-
kwargs[DataRow.custom_metadata.name])
87-
88-
return self.client._create(DataRow, kwargs)
103+
args[DataRow.metadata_fields.name] = mdo.parse_upsert_metadata(
104+
args[DataRow.metadata_fields.name])
105+
return self.client._create(DataRow, args)
89106

90107
def create_data_rows_sync(self, items) -> None:
91108
""" Synchronously bulk upload data rows.
@@ -264,10 +281,10 @@ def validate_attachments(item):
264281
return attachments
265282

266283
def parse_metadata_fields(item):
267-
metadata_fields = item.get('custom_metadata')
284+
metadata_fields = item.get('metadata_fields')
268285
if metadata_fields:
269286
mdo = self.client.get_data_row_metadata_ontology()
270-
item['custom_metadata'] = mdo.parse_upsert_metadata(
287+
item['metadata_fields'] = mdo.parse_upsert_metadata(
271288
metadata_fields)
272289

273290
def format_row(item):
@@ -413,9 +430,9 @@ def export_data_rows(self, timeout_seconds=120) -> Generator:
413430
response = requests.get(download_url)
414431
response.raise_for_status()
415432
reader = ndjson.reader(StringIO(response.text))
416-
# TODO: Update result to parse customMetadata when resolver returns
433+
# TODO: Update result to parse metadataFields when resolver returns
417434
return (Entity.DataRow(self.client, {
418-
**result, 'customMetadata': []
435+
**result, 'metadataFields': []
419436
}) for result in reader)
420437
elif res["status"] == "FAILED":
421438
raise LabelboxError("Data row export failed.")

tests/integration/test_data_rows.py

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import imghdr
12
from tempfile import NamedTemporaryFile
23
import uuid
34
import time
@@ -56,6 +57,12 @@ def make_metadata_fields_dict():
5657
return fields
5758

5859

60+
def filter_precomputed_embeddings(metadata_fields):
61+
return list(
62+
filter(lambda md: md["name"] != "precomputedImageEmbedding",
63+
metadata_fields))
64+
65+
5966
def test_get_data_row(datarow, client):
6067
assert client.get_data_row(datarow.uid)
6168

@@ -177,12 +184,63 @@ def test_data_row_single_creation(dataset, rand_gen, image_url):
177184
assert requests.get(data_row_2.row_data).content == data
178185

179186

187+
def test_create_data_row_with_dict(dataset, image_url):
188+
client = dataset.client
189+
assert len(list(dataset.data_rows())) == 0
190+
dr = {"row_data": image_url}
191+
data_row = dataset.create_data_row(dr)
192+
assert len(list(dataset.data_rows())) == 1
193+
assert data_row.dataset() == dataset
194+
assert data_row.created_by() == client.get_user()
195+
assert data_row.organization() == client.get_organization()
196+
assert requests.get(image_url).content == \
197+
requests.get(data_row.row_data).content
198+
assert data_row.media_attributes is not None
199+
200+
201+
def test_create_data_row_with_dict_containing_field(dataset, image_url):
202+
client = dataset.client
203+
assert len(list(dataset.data_rows())) == 0
204+
dr = {DataRow.row_data: image_url}
205+
data_row = dataset.create_data_row(dr)
206+
assert len(list(dataset.data_rows())) == 1
207+
assert data_row.dataset() == dataset
208+
assert data_row.created_by() == client.get_user()
209+
assert data_row.organization() == client.get_organization()
210+
assert requests.get(image_url).content == \
211+
requests.get(data_row.row_data).content
212+
assert data_row.media_attributes is not None
213+
214+
215+
def test_create_data_row_with_dict_unpacked(dataset, image_url):
216+
client = dataset.client
217+
assert len(list(dataset.data_rows())) == 0
218+
dr = {"row_data": image_url}
219+
data_row = dataset.create_data_row(**dr)
220+
assert len(list(dataset.data_rows())) == 1
221+
assert data_row.dataset() == dataset
222+
assert data_row.created_by() == client.get_user()
223+
assert data_row.organization() == client.get_organization()
224+
assert requests.get(image_url).content == \
225+
requests.get(data_row.row_data).content
226+
assert data_row.media_attributes is not None
227+
228+
229+
def test_create_data_row_with_invalid_input(dataset, image_url):
230+
with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc:
231+
dataset.create_data_row("asdf")
232+
233+
dr = {"row_data": image_url}
234+
with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc:
235+
dataset.create_data_row(dr, row_data=image_url)
236+
237+
180238
def test_create_data_row_with_metadata(dataset, image_url):
181239
client = dataset.client
182240
assert len(list(dataset.data_rows())) == 0
183241

184242
data_row = dataset.create_data_row(row_data=image_url,
185-
custom_metadata=make_metadata_fields())
243+
metadata_fields=make_metadata_fields())
186244

187245
assert len(list(dataset.data_rows())) == 1
188246
assert data_row.dataset() == dataset
@@ -191,8 +249,9 @@ def test_create_data_row_with_metadata(dataset, image_url):
191249
assert requests.get(image_url).content == \
192250
requests.get(data_row.row_data).content
193251
assert data_row.media_attributes is not None
194-
assert len(data_row.custom_metadata) == 5
195-
assert [m["schemaId"] for m in data_row.custom_metadata
252+
filtered_md_fields = filter_precomputed_embeddings(data_row.metadata_fields)
253+
assert len(filtered_md_fields) == 4
254+
assert [m["schemaId"] for m in filtered_md_fields
196255
].sort() == EXPECTED_METADATA_SCHEMA_IDS
197256

198257

@@ -201,7 +260,7 @@ def test_create_data_row_with_metadata_dict(dataset, image_url):
201260
assert len(list(dataset.data_rows())) == 0
202261

203262
data_row = dataset.create_data_row(
204-
row_data=image_url, custom_metadata=make_metadata_fields_dict())
263+
row_data=image_url, metadata_fields=make_metadata_fields_dict())
205264

206265
assert len(list(dataset.data_rows())) == 1
207266
assert data_row.dataset() == dataset
@@ -210,8 +269,9 @@ def test_create_data_row_with_metadata_dict(dataset, image_url):
210269
assert requests.get(image_url).content == \
211270
requests.get(data_row.row_data).content
212271
assert data_row.media_attributes is not None
213-
assert len(data_row.custom_metadata) == 5
214-
assert [m["schemaId"] for m in data_row.custom_metadata
272+
filtered_md_fields = filter_precomputed_embeddings(data_row.metadata_fields)
273+
assert len(filtered_md_fields) == 4
274+
assert [m["schemaId"] for m in filtered_md_fields
215275
].sort() == EXPECTED_METADATA_SCHEMA_IDS
216276

217277

@@ -221,7 +281,7 @@ def test_create_data_row_with_invalid_metadata(dataset, image_url):
221281
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128))
222282

223283
with pytest.raises(labelbox.exceptions.MalformedQueryException) as excinfo:
224-
dataset.create_data_row(row_data=image_url, custom_metadata=fields)
284+
dataset.create_data_row(row_data=image_url, metadata_fields=fields)
225285

226286

227287
def test_create_data_rows_with_metadata(dataset, image_url):
@@ -232,22 +292,22 @@ def test_create_data_rows_with_metadata(dataset, image_url):
232292
{
233293
DataRow.row_data: image_url,
234294
DataRow.external_id: "row1",
235-
DataRow.custom_metadata: make_metadata_fields()
295+
DataRow.metadata_fields: make_metadata_fields()
236296
},
237297
{
238298
DataRow.row_data: image_url,
239299
DataRow.external_id: "row2",
240-
"custom_metadata": make_metadata_fields()
300+
"metadata_fields": make_metadata_fields()
241301
},
242302
{
243303
DataRow.row_data: image_url,
244304
DataRow.external_id: "row3",
245-
DataRow.custom_metadata: make_metadata_fields_dict()
305+
DataRow.metadata_fields: make_metadata_fields_dict()
246306
},
247307
{
248308
DataRow.row_data: image_url,
249309
DataRow.external_id: "row4",
250-
"custom_metadata": make_metadata_fields_dict()
310+
"metadata_fields": make_metadata_fields_dict()
251311
},
252312
])
253313
task.wait_till_done()
@@ -261,8 +321,11 @@ def test_create_data_rows_with_metadata(dataset, image_url):
261321
assert requests.get(image_url).content == \
262322
requests.get(row.row_data).content
263323
assert row.media_attributes is not None
264-
assert len(row.custom_metadata) == 5
265-
assert [m["schemaId"] for m in row.custom_metadata
324+
325+
# Remove 'precomputedImageEmbedding' metadata if automatically added
326+
filtered_md_fields = filter_precomputed_embeddings(row.metadata_fields)
327+
assert len(filtered_md_fields) == 4
328+
assert [m["schemaId"] for m in filtered_md_fields
266329
].sort() == EXPECTED_METADATA_SCHEMA_IDS
267330

268331

@@ -273,7 +336,7 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url):
273336

274337
task = dataset.create_data_rows([{
275338
DataRow.row_data: image_url,
276-
DataRow.custom_metadata: fields
339+
DataRow.metadata_fields: fields
277340
}])
278341
task.wait_till_done()
279342
assert task.status == "FAILED"
@@ -288,7 +351,7 @@ def test_create_data_rows_with_metadata_missing_value(dataset, image_url):
288351
{
289352
DataRow.row_data: image_url,
290353
DataRow.external_id: "row1",
291-
DataRow.custom_metadata: fields
354+
DataRow.metadata_fields: fields
292355
},
293356
])
294357

@@ -302,7 +365,7 @@ def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url):
302365
{
303366
DataRow.row_data: image_url,
304367
DataRow.external_id: "row1",
305-
DataRow.custom_metadata: fields
368+
DataRow.metadata_fields: fields
306369
},
307370
])
308371

@@ -316,7 +379,7 @@ def test_create_data_rows_with_metadata_wrong_type(dataset, image_url):
316379
{
317380
DataRow.row_data: image_url,
318381
DataRow.external_id: "row1",
319-
DataRow.custom_metadata: fields
382+
DataRow.metadata_fields: fields
320383
},
321384
])
322385

0 commit comments

Comments
 (0)