From 3696f7fb0875ab2bd9cb425fa421ce808e853acb Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Wed, 24 Feb 2021 19:11:38 -0500 Subject: [PATCH 1/6] restore tests --- .github/workflows/python-package.yml | 2 +- Dockerfile | 5 +-- Makefile | 4 +-- labelbox/client.py | 24 ++++++++----- labelbox/schema/labeling_frontend.py | 1 - labelbox/schema/ontology.py | 45 +++++++++++++++++++++++++ labelbox/schema/project.py | 10 ++++++ tests/integration/test_client_errors.py | 25 ++++++++------ tests/integration/test_data_upload.py | 10 +++--- tests/integration/test_label.py | 25 ++++++++------ tests/integration/test_webhook.py | 2 -- 11 files changed, 110 insertions(+), 43 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0f6a87e4c..c1ce19937 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -67,7 +67,7 @@ jobs: env: # make sure to tell tox to use these environs in tox.ini # - # randall@labelbox.com + # msokoloff+prod-python@labelbox.com LABELBOX_TEST_API_KEY_PROD: ${{ secrets.LABELBOX_API_KEY }} # randall+staging-python@labelbox.com diff --git a/Dockerfile b/Dockerfile index 91c97e336..b889c7c1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ -FROM python:3.6 +FROM python:3.7 + +RUN pip install pytest COPY . /usr/src/labelbox WORKDIR /usr/src/labelbox -RUN pip install pytest RUN python setup.py install diff --git a/Makefile b/Makefile index 60ceb6697..e2c41de8d 100644 --- a/Makefile +++ b/Makefile @@ -5,11 +5,11 @@ build: test-staging: build docker run -it -v ${PWD}:/usr/src -w /usr/src \ -e LABELBOX_TEST_ENVIRON="staging" \ - -e LABELBOX_TEST_API_KEY_STAGING="" \ + -e LABELBOX_TEST_API_KEY_STAGING=${LABELBOX_TEST_API_KEY_STAGING} \ local/labelbox-python:test pytest $(PATH_TO_TEST) -svvx test-prod: build docker run -it -v ${PWD}:/usr/src -w /usr/src \ -e LABELBOX_TEST_ENVIRON="prod" \ - -e LABELBOX_TEST_API_KEY_PROD="" \ + -e LABELBOX_TEST_API_KEY_PROD=${LABELBOX_TEST_API_KEY_PROD} \ local/labelbox-python:test pytest $(PATH_TO_TEST) -svvx diff --git a/labelbox/client.py b/labelbox/client.py index cd4bc97a2..5728bb674 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -1,5 +1,6 @@ from datetime import datetime, timezone import json +from labelbox.schema.ontology import Ontology import logging import mimetypes import os @@ -71,6 +72,7 @@ def __init__(self, 'X-User-Agent': f'python-sdk {SDK_VERSION}' } + #TODO: Add exponential backoff so we don'tt overwhelm the api @retry.Retry(predicate=retry.if_exception_type( labelbox.exceptions.InternalServerError)) def execute(self, query, params=None, timeout=10.0): @@ -126,25 +128,23 @@ def convert_value(value): logger.debug("Response: %s", response.text) except requests.exceptions.Timeout as e: raise labelbox.exceptions.TimeoutError(str(e)) - except requests.exceptions.RequestException as e: logger.error("Unknown error: %s", str(e)) raise labelbox.exceptions.NetworkError(e) - except Exception as e: raise labelbox.exceptions.LabelboxError( "Unknown error during Client.query(): " + str(e), e) - try: r_json = response.json() except: - error_502 = '502 Bad Gateway' - if error_502 in response.text: - raise labelbox.exceptions.InternalServerError(error_502) if "upstream connect error or disconnect/reset before headers" \ in response.text: raise labelbox.exceptions.InternalServerError( "Connection reset") + elif response.status_code == 502: + error_502 = '502 Bad Gateway' + raise labelbox.exceptions.InternalServerError(error_502) + raise labelbox.exceptions.LabelboxError( "Failed to parse response as JSON: %s" % response.text) @@ -189,6 +189,7 @@ def check_errors(keywords, *path): # Check if API limit was exceeded response_msg = r_json.get("message", "") + if response_msg.startswith("You have exceeded"): raise labelbox.exceptions.ApiLimitError(response_msg) @@ -292,7 +293,6 @@ def upload_data(self, "1": (filename, content, content_type) if (filename and content_type) else content }) - try: file_data = response.json().get("data", None) except ValueError as e: # response is not valid JSON @@ -415,6 +415,12 @@ def get_datasets(self, where=None): """ return self._get_all(Dataset, where) + def get_ontologies(self, where=None): + """ + #TODO + """ + return self._get_all(Ontology, where) + def get_labeling_frontends(self, where=None): """ Fetches all the labeling frontends. @@ -473,7 +479,9 @@ def create_dataset(self, **kwargs): """ return self._create(Dataset, kwargs) - def create_project(self, **kwargs): + def create_project(self, + ontology=None, + **kwargs): #<<<<< TODO: Do we want that signature """ Creates a Project object on the server. Attribute values are passed as keyword arguments. diff --git a/labelbox/schema/labeling_frontend.py b/labelbox/schema/labeling_frontend.py index a7fb7f943..7df678107 100644 --- a/labelbox/schema/labeling_frontend.py +++ b/labelbox/schema/labeling_frontend.py @@ -35,7 +35,6 @@ class LabelingFrontendOptions(DbObject): organization (Relationship): `ToOne` relationship to Organization """ customization_options = Field.String("customization_options") - project = Relationship.ToOne("Project") labeling_frontend = Relationship.ToOne("LabelingFrontend") organization = Relationship.ToOne("Organization") diff --git a/labelbox/schema/ontology.py b/labelbox/schema/ontology.py index 9b28f6d01..424ed29df 100644 --- a/labelbox/schema/ontology.py +++ b/labelbox/schema/ontology.py @@ -63,6 +63,51 @@ def from_json(cls, json_dict): return cls(**_dict) +""" +* The reason that an ontology is read only is because it is a second class citizen to labeling front end options. +** This is because it is a more specific implementation of this. + +- However, we want to support ontologies as if they were labeling front ends. +- With this special relationship we can override the default behavior to mock the appropriate changes to the labeling front end + + +###Note: The only problem is that you can't just create a stand alone ontology. right? +# - Since you need to create a project and query the project ontology before one exists. + +^^^^^^^ This is the worst. Even with hackery, you can't force a DB entry without create a new proj :( + However, labeling front-ends cannot be created without projects either! So maybe we just copy the use cases of that. + Use this as the simpler interface and make it clear that this is just a limited version + +""" + + +class OntologyRelationship(Relationship): + + def __get__(self, parent): + if not self.parent: + self.parent = parent + return self + + def __init__(self): + super(OntologyRelationship, self).__init__() + self.parent = None + + def __call__(self): + if self.parent.setup_complete is None: + #As it currently stands, it creates a new ontology with no new tools and the ontology cannot be edited. + return None + return super().__call__ + + def connect(self, other_ontology): + if not isinstance(other_ontology, OntologyRelationship): + raise Exception("only support ") + + def disconnect(self): + raise Exception( + "Disconnect is not supported for Onotlogy. Instead connect another ontology to replace the current one." + ) + + class Ontology(DbObject): """An ontology specifies which tools and classifications are available to a project. This is read only for now. diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index b8afa1ec9..348f4b9c9 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -221,6 +221,16 @@ def review_metrics(self, net_score): res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["reviewMetrics"]["labelAggregate"]["count"] + def setup_from_ontology(self, ontology): + """ + * An ontology is a specific kind of a labeling frontend option that is compatible with the Editor. + * Most customers use this interface + """ + frontend = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "Editor"))[0] + return self.setup(frontend, ontology.normalized) + def setup(self, labeling_frontend, labeling_frontend_options): """ Finalizes the Project setup. diff --git a/tests/integration/test_client_errors.py b/tests/integration/test_client_errors.py index f1fbbc652..533743a34 100644 --- a/tests/integration/test_client_errors.py +++ b/tests/integration/test_client_errors.py @@ -1,10 +1,9 @@ from multiprocessing.dummy import Pool import os import time - import pytest -from labelbox import Project, Dataset, User +from labelbox import Project, Dataset, User, LabelingFrontend import labelbox.client import labelbox.exceptions @@ -111,22 +110,26 @@ def test_invalid_attribute_error(client, rand_gen): project.delete() -@pytest.mark.slow -# TODO improve consistency -@pytest.mark.skip(reason="Inconsistent test") -def test_api_limit_error(client, rand_gen): - project_id = client.create_project(name=rand_gen(str)).uid +@pytest.mark.skip("timeouts cause failure before rate limit") +def test_api_limit_error(client): + global limited + limited = False def get(arg): try: - return client.get_project(project_id) + return client.get_user() except labelbox.exceptions.ApiLimitError as e: return e - with Pool(300) as pool: - results = pool.map(get, list(range(2000))) + n = 1600 + with Pool(30) as pool: + start = time.time() + results = list(pool.imap(get, range(n)), total=n) + elapsed = time.time() - start + assert elapsed < 60, "Didn't finish fast enough" assert labelbox.exceptions.ApiLimitError in {type(r) for r in results} + del limited # Sleep at the end of this test to allow other tests to execute. - time.sleep(60) + #time.sleep(60) diff --git a/tests/integration/test_data_upload.py b/tests/integration/test_data_upload.py index 60ce78272..e10dac943 100644 --- a/tests/integration/test_data_upload.py +++ b/tests/integration/test_data_upload.py @@ -2,10 +2,8 @@ import requests -# TODO it seems that at some point Google Storage (gs prefix) started being -# returned, and we can't just download those with requests. Fix this -@pytest.mark.skip -def test_file_upload(client, rand_gen): +def test_file_upload(client, rand_gen, dataset): data = rand_gen(str) - url = client.upload_data(data.encode()) - assert requests.get(url).text == data + uri = client.upload_data(data.encode()) + data_row = dataset.create_data_row(row_data=uri) + assert requests.get(data_row.row_data).text == data diff --git a/tests/integration/test_label.py b/tests/integration/test_label.py index 4dc404f08..dec4fccec 100644 --- a/tests/integration/test_label.py +++ b/tests/integration/test_label.py @@ -1,3 +1,4 @@ +from labelbox.schema.labeling_frontend import LabelingFrontend import time import pytest @@ -30,18 +31,22 @@ def test_labels(label_pack): assert list(data_row.labels()) == [] -# TODO check if this is supported or not -@pytest.mark.skip -def test_label_export(label_pack): +def test_label_export(client, label_pack): project, dataset, data_row, label = label_pack - project.create_label(data_row=data_row, label="l2") - - exported_labels_url = project.export_labels(5) + #Old create_label works even with projects setup using the new editor. + #It will appear in the export, just not in the new editor + project.create_label(data_row=data_row, label="export_label") + #Project has to be setup for export to be possible + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + empty_ontology = {"tools": [], "classifications": []} + project.setup(editor, empty_ontology) + exported_labels_url = project.export_labels() assert exported_labels_url is not None - if exported_labels_url is not None: - exported_labels = requests.get(exported_labels_url) - # TODO check content - assert False + exported_labels = requests.get(exported_labels_url) + labels = [example['Label'] for example in exported_labels.json()] + assert 'export_label' in labels def test_label_update(label_pack): diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py index 66fb86312..ebe32341e 100644 --- a/tests/integration/test_webhook.py +++ b/tests/integration/test_webhook.py @@ -3,8 +3,6 @@ from labelbox import Webhook -# TODO investigate why this fails -@pytest.mark.skip def test_webhook_create_update(project, rand_gen): client = project.client url = "https:/" + rand_gen(str) From a989935363059dbea134a5f5c90c13f9bc682c98 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Thu, 25 Feb 2021 09:07:10 -0500 Subject: [PATCH 2/6] cleaned up --- labelbox/client.py | 11 +------ labelbox/schema/labeling_frontend.py | 1 + labelbox/schema/ontology.py | 47 ---------------------------- labelbox/schema/project.py | 10 ------ 4 files changed, 2 insertions(+), 67 deletions(-) diff --git a/labelbox/client.py b/labelbox/client.py index 5728bb674..cd36ec3e8 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -1,6 +1,5 @@ from datetime import datetime, timezone import json -from labelbox.schema.ontology import Ontology import logging import mimetypes import os @@ -415,12 +414,6 @@ def get_datasets(self, where=None): """ return self._get_all(Dataset, where) - def get_ontologies(self, where=None): - """ - #TODO - """ - return self._get_all(Ontology, where) - def get_labeling_frontends(self, where=None): """ Fetches all the labeling frontends. @@ -479,9 +472,7 @@ def create_dataset(self, **kwargs): """ return self._create(Dataset, kwargs) - def create_project(self, - ontology=None, - **kwargs): #<<<<< TODO: Do we want that signature + def create_project(self, **kwargs): """ Creates a Project object on the server. Attribute values are passed as keyword arguments. diff --git a/labelbox/schema/labeling_frontend.py b/labelbox/schema/labeling_frontend.py index 7df678107..a7fb7f943 100644 --- a/labelbox/schema/labeling_frontend.py +++ b/labelbox/schema/labeling_frontend.py @@ -35,6 +35,7 @@ class LabelingFrontendOptions(DbObject): organization (Relationship): `ToOne` relationship to Organization """ customization_options = Field.String("customization_options") + project = Relationship.ToOne("Project") labeling_frontend = Relationship.ToOne("LabelingFrontend") organization = Relationship.ToOne("Organization") diff --git a/labelbox/schema/ontology.py b/labelbox/schema/ontology.py index 424ed29df..6fea2fa8d 100644 --- a/labelbox/schema/ontology.py +++ b/labelbox/schema/ontology.py @@ -63,55 +63,9 @@ def from_json(cls, json_dict): return cls(**_dict) -""" -* The reason that an ontology is read only is because it is a second class citizen to labeling front end options. -** This is because it is a more specific implementation of this. - -- However, we want to support ontologies as if they were labeling front ends. -- With this special relationship we can override the default behavior to mock the appropriate changes to the labeling front end - - -###Note: The only problem is that you can't just create a stand alone ontology. right? -# - Since you need to create a project and query the project ontology before one exists. - -^^^^^^^ This is the worst. Even with hackery, you can't force a DB entry without create a new proj :( - However, labeling front-ends cannot be created without projects either! So maybe we just copy the use cases of that. - Use this as the simpler interface and make it clear that this is just a limited version - -""" - - -class OntologyRelationship(Relationship): - - def __get__(self, parent): - if not self.parent: - self.parent = parent - return self - - def __init__(self): - super(OntologyRelationship, self).__init__() - self.parent = None - - def __call__(self): - if self.parent.setup_complete is None: - #As it currently stands, it creates a new ontology with no new tools and the ontology cannot be edited. - return None - return super().__call__ - - def connect(self, other_ontology): - if not isinstance(other_ontology, OntologyRelationship): - raise Exception("only support ") - - def disconnect(self): - raise Exception( - "Disconnect is not supported for Onotlogy. Instead connect another ontology to replace the current one." - ) - - class Ontology(DbObject): """An ontology specifies which tools and classifications are available to a project. This is read only for now. - Attributes: name (str) description (str) @@ -120,7 +74,6 @@ class Ontology(DbObject): normalized (json) object_schema_count (int) classification_schema_count (int) - projects (Relationship): `ToMany` relationship to Project created_by (Relationship): `ToOne` relationship to User """ diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 348f4b9c9..b8afa1ec9 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -221,16 +221,6 @@ def review_metrics(self, net_score): res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["reviewMetrics"]["labelAggregate"]["count"] - def setup_from_ontology(self, ontology): - """ - * An ontology is a specific kind of a labeling frontend option that is compatible with the Editor. - * Most customers use this interface - """ - frontend = list( - client.get_labeling_frontends( - where=LabelingFrontend.name == "Editor"))[0] - return self.setup(frontend, ontology.normalized) - def setup(self, labeling_frontend, labeling_frontend_options): """ Finalizes the Project setup. From 8e42e44c12b8c7ec540a0d0a3af8a3c86aa5949c Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Mon, 1 Mar 2021 06:24:57 -0500 Subject: [PATCH 3/6] cleanup --- tests/integration/test_client_errors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_client_errors.py b/tests/integration/test_client_errors.py index 533743a34..319c86c9a 100644 --- a/tests/integration/test_client_errors.py +++ b/tests/integration/test_client_errors.py @@ -3,7 +3,7 @@ import time import pytest -from labelbox import Project, Dataset, User, LabelingFrontend +from labelbox import Project, Dataset, User import labelbox.client import labelbox.exceptions @@ -129,7 +129,7 @@ def get(arg): assert elapsed < 60, "Didn't finish fast enough" assert labelbox.exceptions.ApiLimitError in {type(r) for r in results} - del limited # Sleep at the end of this test to allow other tests to execute. - #time.sleep(60) + time.sleep(60) + From a1dc5def8b1f6999ceb49917d66e10a5365808bb Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Mon, 1 Mar 2021 06:32:35 -0500 Subject: [PATCH 4/6] yapf --- tests/integration/test_client_errors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_client_errors.py b/tests/integration/test_client_errors.py index 319c86c9a..5f16b0f9d 100644 --- a/tests/integration/test_client_errors.py +++ b/tests/integration/test_client_errors.py @@ -132,4 +132,3 @@ def get(arg): # Sleep at the end of this test to allow other tests to execute. time.sleep(60) - From b007f9a25810db5e48954f9fbc51f33f698c3ccf Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Sun, 7 Mar 2021 10:12:19 -0500 Subject: [PATCH 5/6] increase timeout. Minor changes --- labelbox/client.py | 2 +- tests/integration/test_client_errors.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/labelbox/client.py b/labelbox/client.py index cd36ec3e8..a25ab8afb 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -74,7 +74,7 @@ def __init__(self, #TODO: Add exponential backoff so we don'tt overwhelm the api @retry.Retry(predicate=retry.if_exception_type( labelbox.exceptions.InternalServerError)) - def execute(self, query, params=None, timeout=10.0): + def execute(self, query, params=None, timeout=30.0): """ Sends a request to the server for the execution of the given query. diff --git a/tests/integration/test_client_errors.py b/tests/integration/test_client_errors.py index 533743a34..c8d8f4b5d 100644 --- a/tests/integration/test_client_errors.py +++ b/tests/integration/test_client_errors.py @@ -112,8 +112,6 @@ def test_invalid_attribute_error(client, rand_gen): @pytest.mark.skip("timeouts cause failure before rate limit") def test_api_limit_error(client): - global limited - limited = False def get(arg): try: @@ -121,7 +119,9 @@ def get(arg): except labelbox.exceptions.ApiLimitError as e: return e + #Rate limited at 1500 + buffer n = 1600 + #max of 30 concurrency before the service becomes unavailable with Pool(30) as pool: start = time.time() results = list(pool.imap(get, range(n)), total=n) @@ -129,7 +129,6 @@ def get(arg): assert elapsed < 60, "Didn't finish fast enough" assert labelbox.exceptions.ApiLimitError in {type(r) for r in results} - del limited # Sleep at the end of this test to allow other tests to execute. - #time.sleep(60) + time.sleep(60) From e33e2361eae3b38c1f99929dab6d47c44d61b5b9 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Sun, 7 Mar 2021 10:38:50 -0500 Subject: [PATCH 6/6] update changelog for release --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1350462d..a7f564b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,12 @@ * Comparing a Labelbox object (e.g. Project) to None doesn't raise an exception * Adding `order_by` to `Project.labels` doesn't raise an exception -## Version 2.4.10 (2021-01-05) +## Version 2.4.11 (2021-03-07) +### Fix +* Increase query timeout +* Retry 502s + +## Version 2.4.10 (2021-02-05) ### Added * SDK version added to request headers