From 6a0457ae5b1e25bf0fd0bf547da791e575eb4656 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 08:25:29 -0600 Subject: [PATCH 01/10] Allow users to handle invalid nodes coming from a search against the API --- docs/faq.md | 53 ++++++++++++++++++++++++++++++++++++-- src/cript/api/api.py | 2 +- src/cript/api/paginator.py | 30 ++++++++++++++++----- tests/api/test_search.py | 17 +++++++++++- 4 files changed, 91 insertions(+), 11 deletions(-) diff --git a/docs/faq.md b/docs/faq.md index 8786dd986..867abc356 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -55,7 +55,7 @@ tab for developer documentation._ **Q:** Is there documentation detailing the internal workings of the code? -**A:** _Absolutely! For an in-depth look at the CRIPT Python SDK code, +**A:** _Absolutely! For an in-depth look at the CRIPT Python SDK code, consult the [GitHub repository wiki internal documentation](https://github.com/C-Accel-CRIPT/Python-SDK/wiki)._ --- @@ -84,7 +84,7 @@ A GitHub account is required._ **Q:** Where can I find the release notes for each SDK version? -**A:** _The release notes can be found on our +**A:** _The release notes can be found on our [CRIPT Python SDK repository releases section](https://github.com/C-Accel-CRIPT/Python-SDK/releases)_ --- @@ -97,6 +97,55 @@ the code is written to get a better grasp of it? There you will find documentation on everything from how our code is structure, how we aim to write our documentation, CI/CD, and more._ +--- + +**Q:** What can I do, when my `api.search(...)` fails with a `cript.nodes.exception.CRIPTJsonDeserializationError` or similar? + +**A:** _There is a solution for you. Sometimes CRIPT can contain nodes formatted in a way that the Python SDK does not understand. We can disable the automatic conversion from the API response into SDK nodes. Here is an example of how to achieve this: +```python +# Create API object in with statement, here it assumes host, token, and storage token are in your environment variables +with cript.API() as api: + # Find the paginator object, which is a python iterator over the search results. + materials_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.NODE_TYPE) + # Usually you would do + # `materials_list = list(materials_paginator)` + # or + # for node in materials_paginator: + # #do node stuff + # But now we want more control over the iteration to ignore failing node decoding. + # And store the result in a list of valid nodes + materials_list = [] + # We use a while True loop to iterate over the results + while True: + # This first try catches, when we reach the end of the search results. + # The `next()` function raises a StopIteration exception in that case + try: + # First we try to convert the current response into a node directly + try: + material_node = next(materials_paginator) + # But if that fails, we catch the exception from CRIPT + except cript.CRIPTException as exc: + # In case of failure, we disable the auto_load_function temporarily + materials_paginator.auto_load_nodes = False + # And only obtain the unloaded node JSON instead + material_json = next(materials_paginator) + # Here you can inspect and manually handle the problem. + # In the example, we just print it and ignore it otherwise + print(exc, material_json) + else: + # After a valid node is loaded (try block didn't fail) + # we store the valid node in the list + materials_list += [material_node] + finally: + # No matter what happened, for the next iteration we want to try to obtain + # an auto loaded node again, so we reset the paginator state. + materials_paginator.auto_load_nodes = True + except StopIteration: + # If next() of the paginator indicates an end of the search results, break the loop + break +``` + + _We try to also have type hinting, comments, and docstrings for all the code that we work on so it is clear and easy for anyone reading it to easily understand._ _if all else fails, contact us on our [GitHub Repository](https://github.com/C-Accel-CRIPT/Python-SDK)._ diff --git a/src/cript/api/api.py b/src/cript/api/api.py index 980c08876..3081399f8 100644 --- a/src/cript/api/api.py +++ b/src/cript/api/api.py @@ -988,7 +988,7 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, response: requests.Response = requests.request(url=url, method=method, headers=headers, timeout=timeout, **kwargs) post_log_message: str = f"Request return with {response.status_code}" if self.extra_api_log_debug_info: - post_log_message += f" {response.raw}" + post_log_message += f" {response.text}" self.logger.debug(post_log_message) return response diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py index 929954727..0625138a1 100644 --- a/src/cript/api/paginator.py +++ b/src/cript/api/paginator.py @@ -1,4 +1,4 @@ -from json import JSONDecodeError +import json from typing import Dict, Union from urllib.parse import quote @@ -33,6 +33,7 @@ class Paginator: _current_position: int _fetched_nodes: list _number_fetched_pages: int = 0 + auto_load_nodes: bool = True @beartype def __init__( @@ -119,8 +120,11 @@ def _fetch_next_page(self) -> None: # if converting API response to JSON gives an error # then there must have been an API error, so raise the requests error # this is to avoid bad indirect errors and make the errors more direct for users - except JSONDecodeError: - response.raise_for_status() + except json.JSONDecodeError as json_exc: + try: + response.raise_for_status() + except Exception as exc: + raise exc from json_exc # handling both cases in case there is result inside of data or just data try: @@ -137,8 +141,10 @@ def _fetch_next_page(self) -> None: if api_response["code"] != 200: raise APIError(api_error=str(response.json()), http_method="GET", api_url=temp_url_path) - node_list = load_nodes_from_json(current_page_results) - self._fetched_nodes += node_list + # Here we only load the JSON into the temporary results. + # This delays error checking, and allows users to disable auto node conversion + json_list = current_page_results + self._fetched_nodes += json_list def __next__(self): if self._current_position >= len(self._fetched_nodes): @@ -147,14 +153,24 @@ def __next__(self): raise StopIteration self._fetch_next_page() - self._current_position += 1 try: - return self._fetched_nodes[self._current_position - 1] + next_node_json = self._fetched_nodes[self._current_position - 1] except IndexError: # This is not a random access iteration. # So if fetching a next page wasn't enough to get the index inbound, # The iteration stops raise StopIteration + if self.auto_load_nodes: + return_data = load_nodes_from_json(next_node_json) + else: + return_data = next_node_json + + # Advance position last, so if an exception occurs, for example when + # node decoding fails, we do not advance, and users can try again without decoding + self._current_position += 1 + + return return_data + def __iter__(self): self._current_position = 0 return self diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 860ab43ea..47d3de802 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -24,7 +24,22 @@ def test_api_search_node_type(cript_api: cript.API) -> None: # test search results assert isinstance(materials_paginator, Paginator) - materials_list = list(materials_paginator) + materials_list = [] + while True: + try: + try: + material_node = next(materials_paginator) + except cript.CRIPTException as exc: + materials_paginator.auto_load_nodes = False + material_json = next(materials_paginator) + print(exc, material_json) + else: + materials_list += [material_node] + finally: + materials_paginator.auto_load_nodes = True + except StopIteration: + break + # Assure that we paginated more then one page assert materials_paginator._current_page_number > 0 assert len(materials_list) > 5 From e7bfa35570de7b0555e96cd63cc526e08c0a2163 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 08:35:55 -0600 Subject: [PATCH 02/10] fix issues and shorten test --- tests/api/test_search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 47d3de802..7030e434a 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -39,11 +39,14 @@ def test_api_search_node_type(cript_api: cript.API) -> None: materials_paginator.auto_load_nodes = True except StopIteration: break + # We don't need to search for a million pages here. + if materials_paginator._number_fetched_pages > 6: + break # Assure that we paginated more then one page - assert materials_paginator._current_page_number > 0 + assert materials_paginator._number_fetched_pages > 0 assert len(materials_list) > 5 - first_page_first_result = materials_list[0]["name"] + first_page_first_result = materials_list[0].name # just checking that the word has a few characters in it assert len(first_page_first_result) > 3 From ef52db0f4e445afaaf3e66eeb99f9693b3aaa702 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 09:14:16 -0600 Subject: [PATCH 03/10] first steps --- src/cript/api/api.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/cript/api/api.py b/src/cript/api/api.py index 3081399f8..6bbd7ffd4 100644 --- a/src/cript/api/api.py +++ b/src/cript/api/api.py @@ -63,6 +63,7 @@ class API: _db_schema: Optional[DataSchema] = None _api_prefix: str = "api" _api_version: str = "v1" + _api_request_session: Union[None, requests.Session] = None # trunk-ignore-begin(cspell) # AWS S3 constants @@ -322,6 +323,12 @@ def connect(self): CRIPTConnectionError raised when the host does not give the expected response """ + + # Establish a requests session object + if self._api_request_session: + self.disconnect() + self._api_request_session = requests.Session() + # As a form to check our connection, we pull and establish the data schema try: self._db_schema = DataSchema(self) @@ -344,6 +351,10 @@ def disconnect(self): For manual connection: nested API object are discouraged. """ + # Disconnect request session + if self._api_request_session: + self._api_request_session.close() + # Restore the previously active global API (might be None) global _global_cached_api _global_cached_api = self._previous_global_cached_api @@ -946,7 +957,7 @@ def delete_node_by_uuid(self, node_type: str, node_uuid: str) -> None: self.logger.info(f"Deleted '{node_type.title()}' with UUID of '{node_uuid}' from CRIPT API.") - def _capsule_request(self, url_path: str, method: str, api_request: bool = True, headers: Optional[Dict] = None, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response: + def _capsule_request(self, url_path: str, method: str, api_request: bool = True, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response: """Helper function that capsules every request call we make against the backend. Please *always* use this methods instead of `requests` directly. From 0102829cd249eca811d24e8c905f9014ab14b0f5 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 16:31:27 -0600 Subject: [PATCH 04/10] trying somehting --- src/cript/api/api.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/cript/api/api.py b/src/cript/api/api.py index 6bbd7ffd4..c7191d63a 100644 --- a/src/cript/api/api.py +++ b/src/cript/api/api.py @@ -59,7 +59,6 @@ class API: _host: str = "" _api_token: str = "" _storage_token: str = "" - _http_headers: dict = {} _db_schema: Optional[DataSchema] = None _api_prefix: str = "api" _api_version: str = "v1" @@ -214,9 +213,6 @@ def __init__(self, host: Union[str, None] = None, api_token: Union[str, None] = self._api_token = api_token # type: ignore self._storage_token = storage_token # type: ignore - # add Bearer to token for HTTP requests - self._http_headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"} - # set a logger instance to use for the class logs self._init_logger(default_log_level) @@ -328,6 +324,8 @@ def connect(self): if self._api_request_session: self.disconnect() self._api_request_session = requests.Session() + # add Bearer to token for HTTP requests + self._api_request_session.headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"} # As a form to check our connection, we pull and establish the data schema try: @@ -981,10 +979,6 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, kwargs additional keyword arguments that are passed to `request.request` """ - - if headers is None: - headers = self._http_headers - url: str = self.host if api_request: url += f"/{self.api_prefix}/{self.api_version}" @@ -996,7 +990,7 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, pre_log_message += "..." self.logger.debug(pre_log_message) - response: requests.Response = requests.request(url=url, method=method, headers=headers, timeout=timeout, **kwargs) + response: requests.Response = self._api_request_session.request(url=url, method=method, timeout=timeout, **kwargs) post_log_message: str = f"Request return with {response.status_code}" if self.extra_api_log_debug_info: post_log_message += f" {response.text}" From ea8c833348f8dd93b5d4a595593de2af6f11636d Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 19:01:42 -0600 Subject: [PATCH 05/10] add exception raising for uninitialized API usage --- src/cript/api/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cript/api/api.py b/src/cript/api/api.py index c7191d63a..14f864bb9 100644 --- a/src/cript/api/api.py +++ b/src/cript/api/api.py @@ -979,6 +979,7 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, kwargs additional keyword arguments that are passed to `request.request` """ + url: str = self.host if api_request: url += f"/{self.api_prefix}/{self.api_version}" @@ -990,6 +991,8 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, pre_log_message += "..." self.logger.debug(pre_log_message) + if self._api_request_session is None: + raise CRIPTAPIRequiredError response: requests.Response = self._api_request_session.request(url=url, method=method, timeout=timeout, **kwargs) post_log_message: str = f"Request return with {response.status_code}" if self.extra_api_log_debug_info: From 7aa8b3013f0b9e4b768dbc578c3a1255d81b7303 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Tue, 27 Feb 2024 19:04:40 -0600 Subject: [PATCH 06/10] add more documentation for user --- src/cript/api/exceptions.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cript/api/exceptions.py b/src/cript/api/exceptions.py index b303903a9..2d1f326a0 100644 --- a/src/cript/api/exceptions.py +++ b/src/cript/api/exceptions.py @@ -68,6 +68,8 @@ class CRIPTAPIRequiredError(CRIPTException): """ ## Definition Exception to be raised when the API object is requested, but no cript.API object exists yet. + Also make sure to use it in a context manager `with cript.API as api:` or manually call + `connect` and `disconnect`. The CRIPT Python SDK relies on a cript.API object for creation, validation, and modification of nodes. The cript.API object may be explicitly called by the user to perform operations to the API, or @@ -83,6 +85,9 @@ class CRIPTAPIRequiredError(CRIPTException): my_token = "123456" # To use your token securely, please consider using environment variables my_api = cript.API(host=my_host, token=my_token) + my_api.connect() + # Your code + my_api.disconnect() ``` """ @@ -90,7 +95,11 @@ def __init__(self): pass def __str__(self) -> str: - error_message = "cript.API object is required for an operation, but it does not exist." "Please instantiate a cript.API object to continue." "See the documentation for more details." + error_message = ( + "cript.API object is required for an operation, but it does not exist." + "Please instantiate a cript.API object and connect it to API for example with a context manager `with cript.API() as api:` to continue." + "See the documentation for more details." + ) return error_message From b520b7d6c1b18bb5f8fc90b1a878c46fa036d697 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Wed, 28 Feb 2024 15:48:31 -0600 Subject: [PATCH 07/10] add some advanced features to the paginator --- src/cript/api/paginator.py | 69 +++++++++++++++++++++++++++++++++++++- tests/api/test_search.py | 7 ++-- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py index 0625138a1..f57b4eb34 100644 --- a/src/cript/api/paginator.py +++ b/src/cript/api/paginator.py @@ -33,6 +33,8 @@ class Paginator: _current_position: int _fetched_nodes: list _number_fetched_pages: int = 0 + _limit_page_fetches: Union[int, None] = None + _num_skip_pages: int = 0 auto_load_nodes: bool = True @beartype @@ -103,11 +105,15 @@ def _fetch_next_page(self) -> None: None """ + # Check if we are supposed to fetch more pages + if self._limit_page_fetches and self._number_fetched_pages >= self._limit_page_fetches: + raise StopIteration + # Composition of the query URL temp_url_path: str = self._url_path temp_url_path += f"/?q={self._query}" if self._initial_page_number is not None: - temp_url_path += f"&page={self._initial_page_number + self._number_fetched_pages}" + temp_url_path += f"&page={self.page_number}" self._number_fetched_pages += 1 response: requests.Response = self._api._capsule_request(url_path=temp_url_path, method="GET") @@ -174,3 +180,64 @@ def __next__(self): def __iter__(self): self._current_position = 0 return self + + @property + def page_number(self) -> Union[int, None]: + """Obtain the current page number the paginator is fetching next. + + Returns + ------- + int + positive number of the next page this paginator is fetching. + None + if no page number is associated with the pagination + """ + if self._initial_page_number is not None: + return self._num_skip_pages + self._initial_page_number + self._number_fetched_pages + + @beartype + def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None: + """Limit pagination to a maximum number of pages. + + This can be used for very large searches with the paginator, so the search can be split into + smaller portions. + + Parameters + ---------- + max_num_pages: Union[int, None], + positive integer with maximum number of page fetches. + or None, indicating unlimited number of page fetches are permitted. + """ + self._limit_page_fetches = max_num_pages + + def skip_pages(self, skip_pages: int) -> int: + """Skip pages in the pagination. + + Warning this function is advanced usage and may not produce the results you expect. + In particular, every search is different, even if we search for the same values there is + no guarantee that the results are in the same order. (And results can change if data is + added or removed from CRIPT.) So if you break up your search with `limit_page_fetches` and + `skip_pages` there is no guarantee that it is the same as one continuous search. + If the paginator associated search does not accept pages, there is no effect. + + Parameters + ---------- + skip_pages:int + Number of pages that the paginator skips now before fetching the next page. + The parameter is added to the internal state, so repeated calls skip more pages. + + Returns + ------- + int + The number this paginator is skipping. Internal skip count. + + Raises + ------ + RuntimeError + If the total number of skipped pages is negative. + """ + num_skip_pages = self._num_skip_pages + skip_pages + if self._num_skip_pages < 0: + RuntimeError(f"Invalid number of skipped pages. The total number of pages skipped is negative {num_skip_pages}, requested to skip {skip_pages}.") + self._num_skip_pages = num_skip_pages + return self._num_skip_pages diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 7030e434a..9381d8ee8 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -24,6 +24,8 @@ def test_api_search_node_type(cript_api: cript.API) -> None: # test search results assert isinstance(materials_paginator, Paginator) + materials_paginator.skip_pages(3) + materials_paginator.limit_page_fetches(3) materials_list = [] while True: try: @@ -39,12 +41,9 @@ def test_api_search_node_type(cript_api: cript.API) -> None: materials_paginator.auto_load_nodes = True except StopIteration: break - # We don't need to search for a million pages here. - if materials_paginator._number_fetched_pages > 6: - break # Assure that we paginated more then one page - assert materials_paginator._number_fetched_pages > 0 + assert materials_paginator.page_number == 6 assert len(materials_list) > 5 first_page_first_result = materials_list[0].name # just checking that the word has a few characters in it From f0e0a989f70eddbc3310f4b4c2685ff8d6fbc2ea Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Mon, 11 Mar 2024 11:26:16 -0500 Subject: [PATCH 08/10] fix oversight --- src/cript/api/paginator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py index f57b4eb34..eb7ff299b 100644 --- a/src/cript/api/paginator.py +++ b/src/cript/api/paginator.py @@ -192,8 +192,10 @@ def page_number(self) -> Union[int, None]: None if no page number is associated with the pagination """ + page_number = self._num_skip_pages + self._number_fetched_pages if self._initial_page_number is not None: - return self._num_skip_pages + self._initial_page_number + self._number_fetched_pages + page_number += self._initial_page_number + return page_number @beartype def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None: From 006b0472907d7a42631f21a73abcf569704ace7c Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Mon, 11 Mar 2024 12:45:54 -0500 Subject: [PATCH 09/10] fixing a problem where empty search raise an error --- src/cript/api/paginator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py index eb7ff299b..23e419ed0 100644 --- a/src/cript/api/paginator.py +++ b/src/cript/api/paginator.py @@ -142,9 +142,9 @@ def _fetch_next_page(self) -> None: if api_response["code"] == 404 and api_response["error"] == "The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.": current_page_results = [] - + self._api.logger.debug(f"The paginator hit a 404 HTTP for requesting this {temp_url_path} with GET. We interpret it as no nodes present, but this is brittle at the moment.") # if API response is not 200 raise error for the user to debug - if api_response["code"] != 200: + elif api_response["code"] != 200: raise APIError(api_error=str(response.json()), http_method="GET", api_url=temp_url_path) # Here we only load the JSON into the temporary results. From 0419bad6a16100014e26e200094b4f1dba6ec7e9 Mon Sep 17 00:00:00 2001 From: Ludwig Schneider Date: Mon, 11 Mar 2024 12:46:07 -0500 Subject: [PATCH 10/10] add a test to hit empty searches --- tests/api/test_search.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 9381d8ee8..d09c08bfb 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -103,6 +103,25 @@ def test_api_search_uuid(cript_api: cript.API, dynamic_material_data) -> None: assert str(uuid_list[0].uuid) == dynamic_material_data["uuid"] +@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token") +def test_empty_paginator(cript_api: cript.API) -> None: + non_existent_name = "This is an nonsensical name for a material and should never exist. %^&*()_" + exact_name_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.EXACT_NAME, value_to_search=non_existent_name) + with pytest.raises(StopIteration): + next(exact_name_paginator) + exact_name_paginator.auto_load_nodes = False + with pytest.raises(StopIteration): + next(exact_name_paginator) + + # Special 0 UUID should not exist + uuid_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.UUID, value_to_search="00000000-0000-0000-0000-000000000000") + with pytest.raises(StopIteration): + next(uuid_paginator) + exact_name_paginator.auto_load_nodes = False + with pytest.raises(StopIteration): + next(uuid_paginator) + + @pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token") def test_api_search_bigsmiles(cript_api: cript.API) -> None: """