diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9e719dda..0b1a152ef 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -90,7 +90,7 @@ jobs: run: | # ran only on certain files for now # add here when checked - poetry run black schematic --check + poetry run black schematic tests schematic_api --check #---------------------------------------------- # type checking/enforcement diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 623446ced..e4b90e42e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,4 +10,4 @@ repos: # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version language_version: python3.10 - files: schematic/ \ No newline at end of file + files: ^(tests|schematic|schematic_api)/ \ No newline at end of file diff --git a/README.md b/README.md index 228e0e779..3d0bf04ca 100644 --- a/README.md +++ b/README.md @@ -141,8 +141,6 @@ model: # This section is for using google sheets with Schematic google_sheets: - # The Synapse id of the Google service account credentials. - service_acct_creds_synapse_id: "syn25171627" # Path to the synapse config file, either absolute or relative to this file service_acct_creds: "schematic_service_account_creds.json" # When doing google sheet validation (regex match) with the validation rules. diff --git a/config_example.yml b/config_example.yml index 245b8fefe..9125cb6bb 100644 --- a/config_example.yml +++ b/config_example.yml @@ -35,8 +35,6 @@ model: # This section is for using google sheets with Schematic google_sheets: - # The Synapse id of the Google service account credentials. - service_acct_creds_synapse_id: "syn25171627" # Path to the synapse config file, either absolute or relative to this file service_acct_creds: "schematic_service_account_creds.json" # When doing google sheet validation (regex match) with the validation rules. diff --git a/schematic/configuration/configuration.py b/schematic/configuration/configuration.py index 1bd3f1c40..63bf55313 100644 --- a/schematic/configuration/configuration.py +++ b/schematic/configuration/configuration.py @@ -164,14 +164,6 @@ def model_location(self) -> str: """ return self._model_config.location - @property - def service_account_credentials_synapse_id(self) -> str: - """ - Returns: - str: The Synapse id of the Google service account credentials. - """ - return self._google_sheets_config.service_acct_creds_synapse_id - @property def service_account_credentials_path(self) -> str: """ diff --git a/schematic/configuration/dataclasses.py b/schematic/configuration/dataclasses.py index 7fbc7df57..4b3d1560f 100644 --- a/schematic/configuration/dataclasses.py +++ b/schematic/configuration/dataclasses.py @@ -124,12 +124,10 @@ class GoogleSheetsConfig: strict_validation: When doing google sheet validation (regex match) with the validation rules. True is alerting the user and not allowing entry of bad values. False is warning but allowing the entry on to the sheet. - service_acct_creds_synapse_id: The Synapse id of the Google service account credentials. service_acct_creds: Path to the Google service account credentials, either absolute or relative to this file """ - service_acct_creds_synapse_id: str = "syn25171627" service_acct_creds: str = "schematic_service_account_creds.json" strict_validation: bool = True @@ -151,7 +149,6 @@ def validate_string_is_not_empty(cls, value: str) -> str: raise ValueError(f"{value} is an empty string") return value - @validator("service_acct_creds_synapse_id") @classmethod def validate_synapse_id(cls, value: str) -> str: """Check if string is a valid synapse id diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 84ebb0696..18586cc13 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1333,7 +1333,10 @@ def upload_manifest_file( ).id synapseutils.copy_functions.changeFileMetaData( - syn=self.syn, entity=manifest_synapse_file_id, downloadAs=file_name_new + syn=self.syn, + entity=manifest_synapse_file_id, + downloadAs=file_name_new, + forceVersion=False, ) return manifest_synapse_file_id diff --git a/schematic/utils/google_api_utils.py b/schematic/utils/google_api_utils.py index 9a5d870ca..b705e0419 100644 --- a/schematic/utils/google_api_utils.py +++ b/schematic/utils/google_api_utils.py @@ -11,7 +11,6 @@ from googleapiclient.discovery import build, Resource # type: ignore from google.oauth2 import service_account # type: ignore from schematic.configuration.configuration import CONFIG -from schematic.store.synapse import SynapseStorage logger = logging.getLogger(__name__) @@ -67,39 +66,6 @@ def build_service_account_creds() -> GoogleServiceAcountCreds: return creds -def download_creds_file() -> None: - """Download google credentials file""" - syn = SynapseStorage.login() - - # if file path of service_account does not exist - # and if an environment variable related to service account is not found - # regenerate service_account credentials - if ( - not os.path.exists(CONFIG.service_account_credentials_path) - and "SERVICE_ACCOUNT_CREDS" not in os.environ - ): - # synapse ID of the 'schematic_service_account_creds.json' file - api_creds = CONFIG.service_account_credentials_synapse_id - - # Download in parent directory of SERVICE_ACCT_CREDS to - # ensure same file system for os.rename() - creds_dir = os.path.dirname(CONFIG.service_account_credentials_path) - - creds_file = syn.get(api_creds, downloadLocation=creds_dir) - os.rename(creds_file.path, CONFIG.service_account_credentials_path) - - logger.info( - "The credentials file has been downloaded " - f"to '{CONFIG.service_account_credentials_path}'" - ) - - elif "SERVICE_ACCOUNT_CREDS" in os.environ: - # remind users that "SERVICE_ACCOUNT_CREDS" as an environment variable is being used - logger.info( - "Using environment variable SERVICE_ACCOUNT_CREDS as the credential file." - ) - - @no_type_check def execute_google_api_requests(service, requests_body, **kwargs) -> Any: """ diff --git a/schematic_api/Dockerfile b/schematic_api/Dockerfile index 53f63533f..15e979dee 100644 --- a/schematic_api/Dockerfile +++ b/schematic_api/Dockerfile @@ -1,120 +1,126 @@ -FROM tiangolo/uwsgi-nginx-flask:python3.10 - -# add version tag as a build argument -ARG TAG - -# the environment variables defined here are the default -# and can be overwritten by docker run -e VARIABLE = XX -# or can be overwritten by .env when using docker compose -ENV PYTHONFAULTHANDLER=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONHASHSEED=random \ - PIP_NO_CACHE_DIR=off \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=200 \ - POETRY_VERSION=1.3.0 \ - APP_PARENT_DIR=/app \ - NGINX_CONFIG=/etc/nginx/conf.d \ - APP_DIR=/app/app \ - ROOT=/ \ - UWSGI_INI=/app/uwsgi.ini \ - NGINX_WORKER_PROCESSES=1 \ - VERSION=$TAG - -# Note: -# The starting number of uWSGI processes is controlled by the variable UWSGI_CHEAPER, by default set to 2. -# The maximum number of uWSGI processes is controlled by the variable UWSGI_PROCESSES, by default set to 16 -# By default, the image starts with 2 uWSGI processes running. When the server is experiencing a high load, it creates up to 16 uWSGI processes to handle it on demand. -# NGINX_MAX_UPLOAD is set to 0 by default that allows unlimited upload file sizes -# NGINX_WORKER_CONNECTIONS is set to 1024 by default that allows a maximum limit of 1024 connections per worker. -# NGINX_WORKER_OPEN_FILES is set to 2048 by default that allows 2048 open files - -# run open ssl and generate certificate -RUN apt update && \ - apt-get install openssl && \ - openssl req -x509 -nodes -days 365 \ - -subj "/C=CA/ST=QC/O=Company" \ - -newkey rsa:2048 -keyout /etc/ssl/private/localhost.key \ - -out /etc/ssl/certs/localhost.crt; - -# add dhparam.pem -# this step takes quite some time -RUN openssl dhparam -out /etc/ssl/dhparam.pem 4096 - -# copy config files that handle encryption to docker -WORKDIR ${NGINX_CONFIG} -COPY ./self-signed.conf ./ssl-params.conf ./certificate.conf ./ - - -# use custom uwsgi-nginx-entrypoint.sh -# this uwsgi-nginx-entrypoint.sh file is derived from: https://github.com/tiangolo/uwsgi-nginx-flask-docker/blob/master/docker-images/entrypoint.sh -# we have to modify it so that we could generate a different /etc/nginx/conf.d/nginx.conf file -WORKDIR ${ROOT} -COPY ./uwsgi-nginx-entrypoint.sh ./entrypoint2.sh -COPY ./uwsgi-nginx-entrypoint.sh ./uwsgi-nginx-entrypoint2.sh -RUN chmod +x uwsgi-nginx-entrypoint2.sh -RUN chmod +x entrypoint2.sh -RUN chown -R nginx /uwsgi-nginx-entrypoint2.sh -RUN chown -R nginx /entrypoint2.sh - -# install poetry -RUN pip install --no-cache-dir "poetry==$POETRY_VERSION" - -# set work directory -WORKDIR ${APP_PARENT_DIR} -RUN chown www-data:www-data ${APP_PARENT_DIR} - -# remove the old uwsgi.ini and main.py from the original image -RUN rm -rf ${APP_PARENT_DIR}/main.py -RUN rm -rf ${APP_PARENT_DIR}/uwsgi.ini - -# copy to use custom uwsgi.ini -COPY ./uwsgi.ini ./ - -# create a separate folder called app -RUN mkdir app -WORKDIR ${APP_DIR} - -# copy other files to app/app -# Note: run_api.py is not needed - -COPY ./pyproject.toml ./poetry.lock ./main.py ./ -COPY ./config_example.yml ./config.yml -RUN poetry config virtualenvs.create false -RUN poetry install --no-interaction --all-extras --no-root - -# copy schematic_api folder -COPY schematic_api ./schematic_api - -# copy great_expectations folder -COPY great_expectations ./great_expectations - -# copy tests folder because some endpoints by default download to the tests folder -COPY tests ./tests - -# change permission -RUN chown -R www-data:www-data ${APP_DIR} - -# allow downloading to synapse cache -RUN chown -R www-data:www-data /root - -# copy schematic -COPY schematic ./schematic - -# change permission -WORKDIR /var/www/ -#The -R option: make the command recursive, so it will change the owner of all files and subdirectories within a given folder. -RUN chown -R www-data:www-data /var/www/ - -RUN chown -R www-data:www-data /var/tmp/ - -# change work directory back -WORKDIR ${APP_DIR} - -# specify entrypoint again to generate config -# have to respecify CMD too -ENTRYPOINT ["/entrypoint2.sh"] -CMD ["/start.sh"] - -# Expose ports -EXPOSE 443 +FROM tiangolo/uwsgi-nginx-flask:python3.10 + +# add version tag as a build argument +ARG TAG + +# the environment variables defined here are the default +# and can be overwritten by docker run -e VARIABLE = XX +# or can be overwritten by .env when using docker compose +ENV PYTHONFAULTHANDLER=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONHASHSEED=random \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=200 \ + POETRY_VERSION=1.3.0 \ + APP_PARENT_DIR=/app \ + NGINX_CONFIG=/etc/nginx/conf.d \ + APP_DIR=/app/app \ + ROOT=/ \ + UWSGI_INI=/app/uwsgi.ini \ + NGINX_WORKER_PROCESSES=1 \ + VERSION=$TAG + + + +LABEL maintainer="Lingling Peng Andrew Lamb Gianna Jordan " +LABEL version=$TAG + + +# Note: +# The starting number of uWSGI processes is controlled by the variable UWSGI_CHEAPER, by default set to 2. +# The maximum number of uWSGI processes is controlled by the variable UWSGI_PROCESSES, by default set to 16 +# By default, the image starts with 2 uWSGI processes running. When the server is experiencing a high load, it creates up to 16 uWSGI processes to handle it on demand. +# NGINX_MAX_UPLOAD is set to 0 by default that allows unlimited upload file sizes +# NGINX_WORKER_CONNECTIONS is set to 1024 by default that allows a maximum limit of 1024 connections per worker. +# NGINX_WORKER_OPEN_FILES is set to 2048 by default that allows 2048 open files + +# run open ssl and generate certificate +RUN apt update && \ + apt-get install openssl && \ + openssl req -x509 -nodes -days 365 \ + -subj "/C=CA/ST=QC/O=Company" \ + -newkey rsa:2048 -keyout /etc/ssl/private/localhost.key \ + -out /etc/ssl/certs/localhost.crt; + +# add dhparam.pem +# this step takes quite some time +RUN openssl dhparam -out /etc/ssl/dhparam.pem 4096 + +# copy config files that handle encryption to docker +WORKDIR ${NGINX_CONFIG} +COPY ./self-signed.conf ./ssl-params.conf ./certificate.conf ./ + + +# use custom uwsgi-nginx-entrypoint.sh +# this uwsgi-nginx-entrypoint.sh file is derived from: https://github.com/tiangolo/uwsgi-nginx-flask-docker/blob/master/docker-images/entrypoint.sh +# we have to modify it so that we could generate a different /etc/nginx/conf.d/nginx.conf file +WORKDIR ${ROOT} +COPY ./uwsgi-nginx-entrypoint.sh ./entrypoint2.sh +COPY ./uwsgi-nginx-entrypoint.sh ./uwsgi-nginx-entrypoint2.sh +RUN chmod +x uwsgi-nginx-entrypoint2.sh +RUN chmod +x entrypoint2.sh +RUN chown -R nginx /uwsgi-nginx-entrypoint2.sh +RUN chown -R nginx /entrypoint2.sh + +# install poetry +RUN pip install --no-cache-dir "poetry==$POETRY_VERSION" + +# set work directory +WORKDIR ${APP_PARENT_DIR} +RUN chown www-data:www-data ${APP_PARENT_DIR} + +# remove the old uwsgi.ini and main.py from the original image +RUN rm -rf ${APP_PARENT_DIR}/main.py +RUN rm -rf ${APP_PARENT_DIR}/uwsgi.ini + +# copy to use custom uwsgi.ini +COPY ./uwsgi.ini ./ + +# create a separate folder called app +RUN mkdir app +WORKDIR ${APP_DIR} + +# copy other files to app/app +# Note: run_api.py is not needed + +COPY ./pyproject.toml ./poetry.lock ./main.py ./ +COPY ./config_example.yml ./config.yml +RUN poetry config virtualenvs.create false +RUN poetry install --no-interaction --all-extras --no-root + +# copy schematic_api folder +COPY schematic_api ./schematic_api + +# copy great_expectations folder +COPY great_expectations ./great_expectations + +# copy tests folder because some endpoints by default download to the tests folder +COPY tests ./tests + +# change permission +RUN chown -R www-data:www-data ${APP_DIR} + +# allow downloading to synapse cache +RUN chown -R www-data:www-data /root + +# copy schematic +COPY schematic ./schematic + +# change permission +WORKDIR /var/www/ +#The -R option: make the command recursive, so it will change the owner of all files and subdirectories within a given folder. +RUN chown -R www-data:www-data /var/www/ + +RUN chown -R www-data:www-data /var/tmp/ + +# change work directory back +WORKDIR ${APP_DIR} + +# specify entrypoint again to generate config +# have to respecify CMD too +ENTRYPOINT ["/entrypoint2.sh"] +CMD ["/start.sh"] + +# Expose ports +EXPOSE 443 diff --git a/schematic_api/api/__init__.py b/schematic_api/api/__init__.py index 82bad7e9b..342e33abc 100644 --- a/schematic_api/api/__init__.py +++ b/schematic_api/api/__init__.py @@ -1,13 +1,20 @@ import os import connexion +from typing import Tuple + +import traceback +from synapseclient.core.exceptions import ( + SynapseAuthenticationError, +) +from schematic.exceptions import AccessCredentialsError -from schematic import CONFIG def create_app(): connexionapp = connexion.FlaskApp(__name__, specification_dir="openapi/") - connexionapp.add_api("api.yaml", arguments={"title": "Schematic REST API"}, pythonic_params=True) - + connexionapp.add_api( + "api.yaml", arguments={"title": "Schematic REST API"}, pythonic_params=True + ) # get the underlying Flask app instance app = connexionapp.app @@ -20,22 +27,38 @@ def create_app(): app.config["SCHEMATIC_CONFIG"] = schematic_config app.config["SCHEMATIC_CONFIG_CONTENT"] = schematic_config_content - # Configure flask app - # app.config[] = schematic[] - # app.config[] = schematic[] - # app.config[] = schematic[] + # handle exceptions in schematic when an exception gets raised + @app.errorhandler(Exception) + def handle_exception(e: Exception) -> Tuple[str, int]: + """handle exceptions in schematic APIs""" + # Ensure the application context is available + with app.app_context(): + # Get the last line of error from the traceback + last_line = traceback.format_exc().strip().split("\n")[-1] + + # Log the full trace + app.logger.error(traceback.format_exc()) + + # Return a JSON response with the last line of the error + return last_line, 500 - # Initialize extension schematic - # import MyExtension - # myext = MyExtension() - # myext.init_app(app) + @app.errorhandler(SynapseAuthenticationError) + def handle_synapse_auth_error(e: Exception) -> Tuple[str, int]: + """handle synapse authentication error""" + return str(e), 401 + + @app.errorhandler(AccessCredentialsError) + def handle_synapse_access_error(e: Exception) -> Tuple[str, int]: + """handle synapse access error""" + return str(e), 403 return app + app = create_app() # def route_code(): # import flask_schematic as sc # sc.method1() -# \ No newline at end of file +# diff --git a/schematic_api/api/__main__.py b/schematic_api/api/__main__.py index 923cebaf5..afc24b44a 100644 --- a/schematic_api/api/__main__.py +++ b/schematic_api/api/__main__.py @@ -2,7 +2,7 @@ from schematic_api.api import app -def main(): +def main(): # Get app configuration host = os.environ.get("APP_HOST", "0.0.0.0") port = os.environ.get("APP_PORT", "3001") @@ -11,5 +11,6 @@ def main(): # Launch app app.run(host=host, port=port, debug=False) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/schematic_api/api/routes.py b/schematic_api/api/routes.py index fbf36fbf5..9bc53ff5a 100644 --- a/schematic_api/api/routes.py +++ b/schematic_api/api/routes.py @@ -39,7 +39,10 @@ SynapseTimeoutError, ) from schematic.utils.general import entity_type_mapping -from schematic.utils.schema_utils import get_property_label_from_display_name, DisplayLabelType +from schematic.utils.schema_utils import ( + get_property_label_from_display_name, + DisplayLabelType, +) logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) @@ -210,6 +213,7 @@ def save_file(file_key="csv_file"): return temp_path + def initalize_metadata_model(schema_url, data_model_labels): # get path to temp data model file (csv or jsonld) as appropriate data_model = get_temp_model_path(schema_url) @@ -393,7 +397,7 @@ def submit_manifest_route( project_scope=None, table_column_names=None, annotation_keys=None, - file_annotations_upload:bool=True, + file_annotations_upload: bool = True, ): # call config_handler() config_handler(asset_view=asset_view) @@ -450,7 +454,7 @@ def submit_manifest_route( project_scope=project_scope, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) return manifest_id @@ -729,6 +733,7 @@ def get_asset_view_table(asset_view, return_type): file_view_table_df.to_csv(export_path, index=False) return export_path + def get_project_manifests(project_id, asset_view): # Access token now stored in request header access_token = get_access_token() @@ -1022,4 +1027,4 @@ def get_schematic_version() -> str: raise NotImplementedError( "Using this endpoint to check the version of schematic is only supported when the API is running in a docker container." ) - return version \ No newline at end of file + return version diff --git a/schematic_api/api/security_controller_.py b/schematic_api/api/security_controller_.py index ee336dcb0..fbde596bb 100644 --- a/schematic_api/api/security_controller_.py +++ b/schematic_api/api/security_controller_.py @@ -11,4 +11,4 @@ def info_from_bearerAuth(token): :return: Decoded token information or None if token is invalid :rtype: dict | None """ - return {"uid": "user_id"} \ No newline at end of file + return {"uid": "user_id"} diff --git a/tests/conftest.py b/tests/conftest.py index 8d73650ef..62c2cb3e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -128,19 +128,22 @@ def synapse_store(request): # These fixtures make copies of existing test manifests. -# These copies can the be altered by a given test, and the copy will eb destroyed at the +# These copies can the be altered by a given test, and the copy will eb destroyed at the # end of the test + @pytest.fixture(scope="function") def temporary_file_copy(request, helpers: Helpers) -> Generator[str, None, None]: file_name = request.param # original file copy original_test_path = helpers.get_data_path(f"mock_manifests/{file_name}") # get filename without extension - file_name_no_extension=file_name.split(".")[0] + file_name_no_extension = file_name.split(".")[0] # Copy the original CSV file to a temporary directory - temp_csv_path = helpers.get_data_path(f"mock_manifests/{file_name_no_extension}_copy.csv") - + temp_csv_path = helpers.get_data_path( + f"mock_manifests/{file_name_no_extension}_copy.csv" + ) + shutil.copyfile(original_test_path, temp_csv_path) yield temp_csv_path # Teardown diff --git a/tests/data/test_configs/default_config.yml b/tests/data/test_configs/default_config.yml index 6775b569a..5a1785dc2 100644 --- a/tests/data/test_configs/default_config.yml +++ b/tests/data/test_configs/default_config.yml @@ -16,6 +16,5 @@ model: location: 'tests/data/example.model.jsonld' google_sheets: - service_acct_creds_synapse_id: 'syn25171627' service_acct_creds: "schematic_service_account_creds.json" strict_validation: true diff --git a/tests/data/test_configs/valid_config.yml b/tests/data/test_configs/valid_config.yml index 3e340721c..456c3ccd7 100644 --- a/tests/data/test_configs/valid_config.yml +++ b/tests/data/test_configs/valid_config.yml @@ -16,6 +16,5 @@ model: location: "model.jsonld" google_sheets: - service_acct_creds_synapse_id: "syn1" service_acct_creds: "creds.json" strict_validation: false diff --git a/tests/data/test_configs/valid_config2.yml b/tests/data/test_configs/valid_config2.yml index 78306ee18..e1c85ab4a 100644 --- a/tests/data/test_configs/valid_config2.yml +++ b/tests/data/test_configs/valid_config2.yml @@ -10,6 +10,5 @@ model: location: "model.jsonld" google_sheets: - service_acct_creds_synapse_id: "syn1" service_acct_creds: "creds.json" strict_validation: false diff --git a/tests/test_api.py b/tests/test_api.py index a070297ab..97183186f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -78,7 +78,7 @@ def test_manifest_json(helpers): @pytest.fixture(scope="class") def data_model_jsonld(): - data_model_jsonld ="https://raw.githubusercontent.com/Sage-Bionetworks/schematic/develop/tests/data/example.model.jsonld" + data_model_jsonld = "https://raw.githubusercontent.com/Sage-Bionetworks/schematic/develop/tests/data/example.model.jsonld" yield data_model_jsonld @@ -132,8 +132,30 @@ def request_headers(syn_token): yield headers +@pytest.fixture +def request_invalid_headers(): + headers = {"Authorization": "Bearer invalid headers"} + yield headers + + @pytest.mark.schematic_api class TestSynapseStorage: + def test_invalid_authentication(self, client, request_invalid_headers): + response = client.get( + "http://localhost:3001/v1/storage/assets/tables", + query_string={"asset_view": "syn23643253", "return_type": "csv"}, + headers=request_invalid_headers, + ) + assert response.status_code == 401 + + def test_insufficent_auth(self, client, request_headers): + response = client.get( + "http://localhost:3001/v1/storage/assets/tables", + query_string={"asset_view": "syn23643252", "return_type": "csv"}, + headers=request_headers, + ) + assert response.status_code == 403 + @pytest.mark.synapse_credentials_needed @pytest.mark.parametrize("return_type", ["json", "csv"]) def test_get_storage_assets_tables(self, client, return_type, request_headers): @@ -348,8 +370,7 @@ def test_get_property_label_from_display_name(self, client, strict_camel_case): @pytest.mark.schematic_api class TestDataModelGraphExplorerOperation: def test_get_schema(self, client, data_model_jsonld): - params = {"schema_url": data_model_jsonld, - "data_model_labels": 'class_label'} + params = {"schema_url": data_model_jsonld, "data_model_labels": "class_label"} response = client.get( "http://localhost:3001/v1/schemas/get/schema", query_string=params ) @@ -363,7 +384,11 @@ def test_get_schema(self, client, data_model_jsonld): os.remove(response_dt) def test_if_node_required(test, client, data_model_jsonld): - params = {"schema_url": data_model_jsonld, "node_display_name": "FamilyHistory", "data_model_labels": "class_label"} + params = { + "schema_url": data_model_jsonld, + "node_display_name": "FamilyHistory", + "data_model_labels": "class_label", + } response = client.get( "http://localhost:3001/v1/schemas/is_node_required", query_string=params @@ -791,6 +816,19 @@ def test_generate_manifest_not_file_based_with_annotations( ] ) + def test_generate_manifest_data_type_not_found(self, client, data_model_jsonld): + params = { + "schema_url": data_model_jsonld, + "data_type": "wrong data type", + "use_annotations": False, + } + response = client.get( + "http://localhost:3001/v1/manifest/generate", query_string=params + ) + + assert response.status_code == 500 + assert "LookupError" in str(response.data) + def test_populate_manifest(self, client, data_model_jsonld, test_manifest_csv): # test manifest test_manifest_data = open(test_manifest_csv, "rb") @@ -1086,7 +1124,11 @@ def test_submit_manifest_file_only_replace( elif python_version == "3.9": dataset_id = "syn52656104" - specific_params = {"asset_view": "syn23643253", "dataset_id": dataset_id, "project_scope":["syn54126707"]} + specific_params = { + "asset_view": "syn23643253", + "dataset_id": dataset_id, + "project_scope": ["syn54126707"], + } params.update(specific_params) diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 8845a9b48..7a27c7a34 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -80,7 +80,6 @@ def test_google_sheets_config(self) -> None: assert isinstance( GoogleSheetsConfig( service_acct_creds="file_name", - service_acct_creds_synapse_id="syn1", strict_validation=True, ), GoogleSheetsConfig, @@ -88,19 +87,11 @@ def test_google_sheets_config(self) -> None: with pytest.raises(ValidationError): GoogleSheetsConfig( service_acct_creds="file_name", - service_acct_creds_synapse_id="syn1", strict_validation="tru", ) with pytest.raises(ValidationError): GoogleSheetsConfig( service_acct_creds="", - service_acct_creds_synapse_id="syn1", - strict_validation=True, - ) - with pytest.raises(ValidationError): - GoogleSheetsConfig( - service_acct_creds="file_name", - service_acct_creds_synapse_id="syn", strict_validation=True, ) @@ -120,7 +111,6 @@ def test_init(self) -> None: assert config.manifest_title == "example" assert config.manifest_data_type == ["Biospecimen", "Patient"] assert config.model_location == "tests/data/example.model.jsonld" - assert config.service_account_credentials_synapse_id assert ( config.service_account_credentials_path != "schematic_service_account_creds.json" @@ -158,7 +148,6 @@ def test_load_config1(self) -> None: assert config.manifest_title == "example" assert config.manifest_data_type == ["Biospecimen", "Patient"] assert config.model_location == "tests/data/example.model.jsonld" - assert config.service_account_credentials_synapse_id assert ( config.service_account_credentials_path != "schematic_service_account_creds.json" @@ -188,7 +177,6 @@ def test_load_config2(self) -> None: assert config.manifest_title == "title" assert config.manifest_data_type == ["data_type"] assert config.model_location == "model.jsonld" - assert config.service_account_credentials_synapse_id assert os.path.basename(config.service_account_credentials_path) == "creds.json" assert config.google_sheets_master_template_id == ( "1LYS5qE4nV9jzcYw5sXwCza25slDfRA1CIg3cs-hCdpU" diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 0525b6c6a..da88dda95 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -61,7 +61,9 @@ def manifest_generator(helpers, request): # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) manifest_generator = ManifestGenerator( @@ -111,18 +113,22 @@ def manifest(dataset_id, manifest_generator, request): yield manifest, use_annotations, data_type, sheet_url + @pytest.fixture(scope="class") def app(): app = create_app() yield app + class TestManifestGenerator: def test_init(self, helpers): path_to_data_model = helpers.get_data_path("example.model.jsonld") # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) generator = ManifestGenerator( @@ -157,7 +163,9 @@ def test_missing_root_error(self, helpers, data_type, exc, exc_message): # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) # A LookupError should be raised and include message when the component cannot be found @@ -242,7 +250,9 @@ def test_get_manifest_excel(self, helpers, sheet_url, output_format, dataset_id) # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) generator = ManifestGenerator( @@ -300,7 +310,9 @@ def test_get_manifest_no_annos(self, helpers, dataset_id): # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) # Instantiate object with use_annotations set to True @@ -416,7 +428,9 @@ def test_add_root_to_component_without_additional_metadata( # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) manifest_generator = ManifestGenerator( @@ -453,7 +467,9 @@ def test_add_root_to_component_with_additional_metadata( # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) manifest_generator = ManifestGenerator( @@ -537,7 +553,9 @@ def test_update_dataframe_with_existing_df(self, helpers, existing_manifest): # Get graph data model graph_data_model = generate_graph_data_model( - helpers, path_to_data_model=path_to_data_model, data_model_labels='class_label', + helpers, + path_to_data_model=path_to_data_model, + data_model_labels="class_label", ) # Instantiate the Manifest Generator. @@ -661,34 +679,85 @@ def test_populate_existing_excel_spreadsheet( # remove file os.remove(dummy_output_path) - - @pytest.mark.parametrize("return_output", ["Mock excel file path", "Mock google sheet link"]) - def test_create_single_manifest(self, simple_manifest_generator, helpers, return_output): - with patch("schematic.manifest.generator.ManifestGenerator.get_manifest", return_value=return_output): + + @pytest.mark.parametrize( + "return_output", ["Mock excel file path", "Mock google sheet link"] + ) + def test_create_single_manifest( + self, simple_manifest_generator, helpers, return_output + ): + with patch( + "schematic.manifest.generator.ManifestGenerator.get_manifest", + return_value=return_output, + ): json_ld_path = helpers.get_data_path("example.model.jsonld") data_type = "Patient" - graph_data_model = generate_graph_data_model(helpers, path_to_data_model=json_ld_path, data_model_labels='class_label') + graph_data_model = generate_graph_data_model( + helpers, + path_to_data_model=json_ld_path, + data_model_labels="class_label", + ) - result = simple_manifest_generator.create_single_manifest(path_to_data_model=json_ld_path, graph_data_model=graph_data_model, data_type=data_type, output_format="google_sheet", use_annotations=False) + result = simple_manifest_generator.create_single_manifest( + path_to_data_model=json_ld_path, + graph_data_model=graph_data_model, + data_type=data_type, + output_format="google_sheet", + use_annotations=False, + ) assert result == return_output - - @pytest.mark.parametrize("test_data_types", [["Patient", "Biospecimen"], ["all manifests"]]) - def test_create_manifests_raise_errors(self, simple_manifest_generator, helpers, test_data_types): - with pytest.raises(ValueError) as exception_info: + + @pytest.mark.parametrize( + "test_data_types", [["Patient", "Biospecimen"], ["all manifests"]] + ) + def test_create_manifests_raise_errors( + self, simple_manifest_generator, helpers, test_data_types + ): + with pytest.raises(ValueError) as exception_info: json_ld_path = helpers.get_data_path("example.model.jsonld") data_types = test_data_types - dataset_ids=["syn123456"] - - simple_manifest_generator.create_manifests(path_to_data_model=json_ld_path, data_types=data_types, dataset_ids=dataset_ids, output_format="google_sheet", use_annotations=False, data_model_labels='class_label') - - @pytest.mark.parametrize("test_data_types, dataset_ids, expected_result", [ - (["Patient", "Biospecimen"], ["mock dataset id1", "mock dataset id2"], ["mock google sheet link", "mock google sheet link"]), - (["Patient"], ["mock dataset id1"], ["mock google sheet link"]), - ]) - def test_create_manifests(self, simple_manifest_generator, helpers, test_data_types, dataset_ids, expected_result): - with patch("schematic.manifest.generator.ManifestGenerator.create_single_manifest", return_value="mock google sheet link"): + dataset_ids = ["syn123456"] + + simple_manifest_generator.create_manifests( + path_to_data_model=json_ld_path, + data_types=data_types, + dataset_ids=dataset_ids, + output_format="google_sheet", + use_annotations=False, + data_model_labels="class_label", + ) + + @pytest.mark.parametrize( + "test_data_types, dataset_ids, expected_result", + [ + ( + ["Patient", "Biospecimen"], + ["mock dataset id1", "mock dataset id2"], + ["mock google sheet link", "mock google sheet link"], + ), + (["Patient"], ["mock dataset id1"], ["mock google sheet link"]), + ], + ) + def test_create_manifests( + self, + simple_manifest_generator, + helpers, + test_data_types, + dataset_ids, + expected_result, + ): + with patch( + "schematic.manifest.generator.ManifestGenerator.create_single_manifest", + return_value="mock google sheet link", + ): json_ld_path = helpers.get_data_path("example.model.jsonld") - all_results = simple_manifest_generator.create_manifests(path_to_data_model=json_ld_path, data_types=test_data_types, dataset_ids=dataset_ids, output_format="google_sheet", use_annotations=False, data_model_labels='class_label') + all_results = simple_manifest_generator.create_manifests( + path_to_data_model=json_ld_path, + data_types=test_data_types, + dataset_ids=dataset_ids, + output_format="google_sheet", + use_annotations=False, + data_model_labels="class_label", + ) assert all_results == expected_result - diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 8a2c2e965..bf0c4d97b 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -109,10 +109,12 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): ids=["data_model_labels-display_label", "data_model_labels-class_label"], ) @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) - @pytest.mark.parametrize("temporary_file_copy", ["test_BulkRNAseq.csv"], indirect=True) + @pytest.mark.parametrize( + "temporary_file_copy", ["test_BulkRNAseq.csv"], indirect=True + ) def test_submit_metadata_manifest( self, - temporary_file_copy: Generator[str, None, None], + temporary_file_copy: Generator[str, None, None], helpers: Helpers, file_annotations_upload: bool, restrict_rules: bool, diff --git a/tests/test_schemas.py b/tests/test_schemas.py index 61479a3e8..f80449b18 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -20,7 +20,7 @@ convert_bool_to_str, parse_validation_rules, DisplayLabelType, - get_json_schema_log_file_path + get_json_schema_log_file_path, ) from schematic.utils.io_utils import load_json @@ -448,9 +448,12 @@ def test_generate_data_model_graph(self, helpers, data_model, data_model_labels) # Check that all relationships recorded between 'CheckList' and 'Ab' are present assert ( - "rangeValue" and "parentOf" in graph["CheckListEnum"][expected_valid_values[0]] + "rangeValue" + and "parentOf" in graph["CheckListEnum"][expected_valid_values[0]] + ) + assert ( + "requiresDependency" not in graph["CheckListEnum"][expected_valid_values[0]] ) - assert "requiresDependency" not in graph["CheckListEnum"][expected_valid_values[0]] # Check nodes: assert "Patient" in graph.nodes @@ -1325,8 +1328,8 @@ def test_get_json_validation_schema( data_model_path = helpers.get_data_path(path=data_model) json_schema_log_file_path = get_json_schema_log_file_path( - data_model_path=data_model_path, - source_node=source_node) + data_model_path=data_model_path, source_node=source_node + ) # Remove json schema log file if it already exists. if os.path.exists(json_schema_log_file_path): diff --git a/tests/test_store.py b/tests/test_store.py index 98d11fd48..850a5471e 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -541,51 +541,57 @@ async def test_store_async_annotation(self, synapse_store: SynapseStorage) -> No assert result == expected_dict assert isinstance(result, Annotations) + async def test_process_store_annos_failure( + self, synapse_store: SynapseStorage + ) -> None: + """test _process_store_annos function when there's an error either getting or storing annotations""" - async def test_process_store_annos_failure(self, synapse_store: SynapseStorage) -> None: - """test _process_store_annos function when there's an error either getting or storing annotations - """ async def mock_failure_coro(): - raise ValueError("sample error") - + raise ValueError("sample error") + # create tasks that will fail tasks = set() tasks.add(asyncio.create_task(mock_failure_coro())) - + synapse_store._process_store_annos # make sure error message can be raised with pytest.raises(RuntimeError, match="failed with"): await synapse_store._process_store_annos(tasks) - async def test_process_store_annos_success_store(self, synapse_store: SynapseStorage) -> None: - """test _process_store_annos function and make sure that annotations can be stored after successfully getting annotations. - """ + async def test_process_store_annos_success_store( + self, synapse_store: SynapseStorage + ) -> None: + """test _process_store_annos function and make sure that annotations can be stored after successfully getting annotations.""" # mock annotation obtained after async_store stored_annos = Annotations( - annotations={ - "Id": ["mock_string"], - "EntityId": ["mock_syn_id"], - "SampleID": [""], - "Component": ["mock value"], - "FileFormat": ["mock_format"], - }, - etag="mock etag", - id="mock_syn_id") + annotations={ + "Id": ["mock_string"], + "EntityId": ["mock_syn_id"], + "SampleID": [""], + "Component": ["mock value"], + "FileFormat": ["mock_format"], + }, + etag="mock etag", + id="mock_syn_id", + ) async def mock_success_coro(): return stored_annos - - with patch("schematic.store.synapse.SynapseStorage.store_async_annotation",new_callable=AsyncMock) as mock_store_async1: + + with patch( + "schematic.store.synapse.SynapseStorage.store_async_annotation", + new_callable=AsyncMock, + ) as mock_store_async1: tasks = set() tasks.add(asyncio.create_task(mock_success_coro())) await synapse_store._process_store_annos(tasks) - # make sure that the if statement is working + # make sure that the if statement is working mock_store_async1.assert_not_called() - - async def test_process_store_annos_success_get(self, synapse_store: SynapseStorage) -> None: - """test _process_store_annos function and make sure that task of storing annotations can be triggered - """ + async def test_process_store_annos_success_get( + self, synapse_store: SynapseStorage + ) -> None: + """test _process_store_annos function and make sure that task of storing annotations can be triggered""" # mock annotation obtained after get_async mock_annos_dict = { "annotations": { @@ -605,23 +611,29 @@ async def test_process_store_annos_success_get(self, synapse_store: SynapseStora } mock_stored_annos = Annotations( - annotations={ - "Id": ["mock_string"], - "EntityId": ["mock_syn_id"], - }, - etag="mock etag", - id="mock_syn_id") - + annotations={ + "Id": ["mock_string"], + "EntityId": ["mock_syn_id"], + }, + etag="mock etag", + id="mock_syn_id", + ) + async def mock_success_coro(): return mock_annos_dict - # make sure that the else statement is working + # make sure that the else statement is working new_tasks = set() - with patch("schematic.store.synapse.SynapseStorage.store_async_annotation",new_callable=AsyncMock, return_value=mock_stored_annos) as mock_store_async2: + with patch( + "schematic.store.synapse.SynapseStorage.store_async_annotation", + new_callable=AsyncMock, + return_value=mock_stored_annos, + ) as mock_store_async2: new_tasks.add(asyncio.create_task(mock_success_coro())) await synapse_store._process_store_annos(new_tasks) mock_store_async2.assert_called_once() + class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): assert dataset_fileview.datasetId == dataset_id @@ -1094,9 +1106,10 @@ async def test_add_annotations_to_entities_files( expected_filenames (list(str)): expected list of file names expected_entity_ids (list(str)): expected list of entity ids """ + async def mock_format_row_annos(): return - + async def mock_process_store_annos(requests): return @@ -1104,8 +1117,16 @@ async def mock_process_store_annos(requests): "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", return_value=files_in_dataset, ): - with patch('schematic.store.synapse.SynapseStorage.format_row_annotations', return_value=mock_format_row_annos, new_callable=AsyncMock) as mock_format_row: - with patch('schematic.store.synapse.SynapseStorage._process_store_annos', return_value=mock_process_store_annos, new_callable=AsyncMock) as mock_process_store: + with patch( + "schematic.store.synapse.SynapseStorage.format_row_annotations", + return_value=mock_format_row_annos, + new_callable=AsyncMock, + ) as mock_format_row: + with patch( + "schematic.store.synapse.SynapseStorage._process_store_annos", + return_value=mock_process_store_annos, + new_callable=AsyncMock, + ) as mock_process_store: manifest_df = pd.DataFrame(original_manifest) new_df = await synapse_store.add_annotations_to_entities_files( diff --git a/tests/test_utils.py b/tests/test_utils.py index 1ff72d673..5b37abe6e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,6 +11,7 @@ from typing import Union, Generator from _pytest.fixtures import FixtureRequest + import numpy as np import pandas as pd import pytest @@ -196,7 +197,8 @@ (1073741825, 1073741824, 1181116006.4), ] -def get_metadataModel(helpers, model_name:str): + +def get_metadataModel(helpers, model_name: str): metadataModel = MetadataModel( inputMModelLocation=helpers.get_data_path(model_name), inputMModelLocationType="local", @@ -1025,26 +1027,27 @@ def test_get_label_from_display_name(self, test_dn: str, data_model_labels: str) return @pytest.mark.parametrize( - "data_model", - list(DATA_MODEL_DICT.keys()), - ids=list(DATA_MODEL_DICT.values()) + "data_model", list(DATA_MODEL_DICT.keys()), ids=list(DATA_MODEL_DICT.values()) ) @pytest.mark.parametrize( "source_node", ["Biospecimen", "Patient"], ids=["biospecimen_source", "patient_source"], ) - def test_get_json_schema_log_file_path(self, helpers, data_model:str, source_node: str): + def test_get_json_schema_log_file_path( + self, helpers, data_model: str, source_node: str + ): data_model_path = helpers.get_data_path(path=data_model) json_schema_log_file_path = get_json_schema_log_file_path( - data_model_path=data_model_path, - source_node=source_node) + data_model_path=data_model_path, source_node=source_node + ) # Check that model is not included in the json_schema_log_file_path - assert '.model' not in "data_model" + assert ".model" not in "data_model" # Check the file suffixs are what is expected. - assert ['schema', 'json'] == json_schema_log_file_path.split('.')[-2:] + assert ["schema", "json"] == json_schema_log_file_path.split(".")[-2:] + class TestValidateUtils: def test_validate_schema(self, helpers): @@ -1098,13 +1101,22 @@ def test_validate_property_schema(self, helpers): @pytest.mark.parametrize( ("manifest", "model", "root_node"), - [("mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", - "example.model.csv", "Patient"), - ("mock_manifests/Valid_Test_Manifest_with_nones.csv", - "example_test_nones.model.csv", "MockComponent")] - ) + [ + ( + "mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", + "example.model.csv", + "Patient", + ), + ( + "mock_manifests/Valid_Test_Manifest_with_nones.csv", + "example_test_nones.model.csv", + "MockComponent", + ), + ], + ) def test_convert_nan_entries_to_empty_strings( - self, helpers, manifest, model, root_node): + self, helpers, manifest, model, root_node + ): # Get manifest and data model path manifest_path = helpers.get_data_path(manifest) model_path = helpers.get_data_path(model) @@ -1128,37 +1140,37 @@ def test_convert_nan_entries_to_empty_strings( manifest_path, preserve_raw_input=False, allow_na_values=True, - **load_args,) + **load_args, + ) metadataModel = get_metadataModel(helpers, model) # Instantiate Validate manifest, and run manifest validation - # In this step the manifest is modified while running rule + # In this step the manifest is modified while running rule # validation so need to do this step to get the updated manfest. - vm = ValidateManifest( - errors, manifest, manifest_path, dmge, json_schema) + vm = ValidateManifest(errors, manifest, manifest_path, dmge, json_schema) manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules( - manifest, dmge, restrict_rules=False, project_scope=["syn54126707"], + manifest, + dmge, + restrict_rules=False, + project_scope=["syn54126707"], ) # Run convert nan function - output = validate_utils.convert_nan_entries_to_empty_strings( - manifest=manifest - ) + output = validate_utils.convert_nan_entries_to_empty_strings(manifest=manifest) # Compare post rule validation manifest with output manifest looking # for expected nan to empty string conversion - if root_node == 'Patient': - assert manifest['Family History'][0] == [''] - assert output['Family History'][0] == [''] - elif root_node == 'MockComponent': - assert manifest['Check List'][2] == [''] - assert manifest['Check List Like Enum'][2] == [] - assert type(manifest['Check NA'][2]) == type(pd.NA) - - assert output['Check List'][2] == [''] - assert output['Check List Like Enum'][2] == [] - + if root_node == "Patient": + assert manifest["Family History"][0] == [""] + assert output["Family History"][0] == [""] + elif root_node == "MockComponent": + assert manifest["Check List"][2] == [""] + assert manifest["Check List Like Enum"][2] == [] + assert type(manifest["Check NA"][2]) == type(pd.NA) + + assert output["Check List"][2] == [""] + assert output["Check List Like Enum"][2] == [] def test_get_list_robustness(self, helpers): return diff --git a/tests/test_validation.py b/tests/test_validation.py index b2b85851d..9ea47b973 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -27,7 +27,8 @@ def DMGE(helpers): dmge = helpers.get_data_model_graph_explorer(path="example.model.jsonld") yield dmge -def get_metadataModel(helpers, model_name:str): + +def get_metadataModel(helpers, model_name: str): metadataModel = MetadataModel( inputMModelLocation=helpers.get_data_path(model_name), inputMModelLocationType="local", @@ -55,20 +56,47 @@ class TestManifestValidation: @pytest.mark.parametrize( ("model_name", "manifest_name", "root_node"), [ - ("example.model.csv","mock_manifests/Valid_Test_Manifest.csv", "MockComponent"), - ("example.model.csv", "mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", "Patient"), - ("example_test_nones.model.csv","mock_manifests/Valid_Test_Manifest_with_nones.csv", "MockComponent"), + ( + "example.model.csv", + "mock_manifests/Valid_Test_Manifest.csv", + "MockComponent", + ), + ( + "example.model.csv", + "mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", + "Patient", + ), + ( + "example_test_nones.model.csv", + "mock_manifests/Valid_Test_Manifest_with_nones.csv", + "MockComponent", + ), + ], + ids=[ + "example_model", + "example_with_no_entry_for_cond_required_columns", + "example_with_nones", ], - ids=["example_model", "example_with_no_entry_for_cond_required_columns", "example_with_nones"], ) @pytest.mark.parametrize( "project_scope", ["syn54126707", "syn55250368", "syn55271234"], - ids=["project_scope_with_manifests", "project_scope_without_manifests", "project_scope_with_empty_manifest"], + ids=[ + "project_scope_with_manifests", + "project_scope_without_manifests", + "project_scope_with_empty_manifest", + ], ) - def test_valid_manifest(self, helpers, model_name:str, manifest_name:str, - root_node:str, project_scope:str, dmge:DataModelGraph): - """ Run the valid manifest in various situations, some of which will generate errors or warnings, + def test_valid_manifest( + self, + helpers, + model_name: str, + manifest_name: str, + root_node: str, + project_scope: str, + dmge: DataModelGraph, + ): + """Run the valid manifest in various situations, some of which will generate errors or warnings, if there are "issues" with target manifests on manifests. Since there are so many parameters, limit the combinations that are being run to the ones that are relevant. Args: @@ -90,16 +118,28 @@ def test_valid_manifest(self, helpers, model_name:str, manifest_name:str, manifest_path = helpers.get_data_path(manifest_name) warning_rule_sets_1 = [ - ('Check Match at Least', 'matchAtLeastOne Patient.PatientID set'), - ('Check Match at Least values', 'matchAtLeastOne MockComponent.checkMatchatLeastvalues value'), - ('Check Match Exactly', 'matchExactlyOne MockComponent.checkMatchExactly set'), - ('Check Match Exactly values', 'matchExactlyOne MockComponent.checkMatchExactlyvalues value'), - ] + ("Check Match at Least", "matchAtLeastOne Patient.PatientID set"), + ( + "Check Match at Least values", + "matchAtLeastOne MockComponent.checkMatchatLeastvalues value", + ), + ( + "Check Match Exactly", + "matchExactlyOne MockComponent.checkMatchExactly set", + ), + ( + "Check Match Exactly values", + "matchExactlyOne MockComponent.checkMatchExactlyvalues value", + ), + ] warning_rule_sets_2 = warning_rule_sets_1[1:] error_rule_sets = [ - ('Check Match None', 'matchNone MockComponent.checkMatchNone set error'), - ('Check Match None values', 'matchNone MockComponent.checkMatchNonevalues value error'), - ] + ("Check Match None", "matchNone MockComponent.checkMatchNone set error"), + ( + "Check Match None values", + "matchNone MockComponent.checkMatchNonevalues value error", + ), + ] # For the standard project scope, models and manifest should pass without warnings or errors if project_scope == "syn54126707": @@ -113,25 +153,34 @@ def test_valid_manifest(self, helpers, model_name:str, manifest_name:str, # When submitting the first manifest for cross manifest validation (MockComponent), check that proper warning # (to alert users that no validation will be run), is raised. The manifest is still valid to submit. - if (project_scope == "syn55250368" and root_node=="MockComponent" and - model_name in ["example.model.csv", "example_test_nones.model.csv"]): + if ( + project_scope == "syn55250368" + and root_node == "MockComponent" + and model_name in ["example.model.csv", "example_test_nones.model.csv"] + ): metadataModel = get_metadataModel(helpers, model_name) errors, warnings = metadataModel.validateModelManifest( manifestPath=manifest_path, rootNode=root_node, project_scope=[project_scope], ) - + for attribute_name, val_rule in warning_rule_sets_1: - assert GenerateError.generate_no_cross_warning( - dmge=dmge, - attribute_name=attribute_name, - val_rule=val_rule)[0] in warnings + assert ( + GenerateError.generate_no_cross_warning( + dmge=dmge, attribute_name=attribute_name, val_rule=val_rule + )[0] + in warnings + ) assert errors == [] - + # When submitting a manifest to a project that contains a manifest without data, ensure that the proper # warnings/errors are raised. - elif project_scope == "syn55271234" and root_node=="MockComponent" and model_name == "example.model.csv": + elif ( + project_scope == "syn55271234" + and root_node == "MockComponent" + and model_name == "example.model.csv" + ): metadataModel = get_metadataModel(helpers, model_name) errors, warnings = metadataModel.validateModelManifest( manifestPath=manifest_path, @@ -139,21 +188,24 @@ def test_valid_manifest(self, helpers, model_name:str, manifest_name:str, project_scope=[project_scope], ) for attribute_name, val_rule in warning_rule_sets_2: - assert GenerateError.generate_no_value_in_manifest_error( - dmge=dmge, - attribute_name=attribute_name, - val_rule=val_rule)[1][0] in warnings - - for attribute_name, val_rule in error_rule_sets: - assert GenerateError.generate_no_value_in_manifest_error( - dmge=dmge, - attribute_name=attribute_name, - val_rule=val_rule)[0][0] in errors + assert ( + GenerateError.generate_no_value_in_manifest_error( + dmge=dmge, attribute_name=attribute_name, val_rule=val_rule + )[1][0] + in warnings + ) + for attribute_name, val_rule in error_rule_sets: + assert ( + GenerateError.generate_no_value_in_manifest_error( + dmge=dmge, attribute_name=attribute_name, val_rule=val_rule + )[0][0] + in errors + ) def test_invalid_manifest(self, helpers, dmge): metadataModel = get_metadataModel(helpers, model_name="example.model.jsonld") - + manifestPath = helpers.get_data_path("mock_manifests/Invalid_Test_Manifest.csv") rootNode = "MockComponent" @@ -164,31 +216,41 @@ def test_invalid_manifest(self, helpers, dmge): ) # Check errors - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="num", row_num="3", attribute_name="Check Num", invalid_entry="c", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="int", row_num="3", attribute_name="Check Int", invalid_entry="5.63", dmge=dmge, - )[0] in errors - - assert GenerateError.generate_type_error( + )[0] + in errors + ) + + assert ( + GenerateError.generate_type_error( val_rule="str", row_num="3", attribute_name="Check String", invalid_entry="94", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="9", row_num="3", @@ -196,9 +258,12 @@ def test_invalid_manifest(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="9", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="ab", row_num="4", @@ -206,9 +271,12 @@ def test_invalid_manifest(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="ab", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="a c f", row_num="3", @@ -216,9 +284,12 @@ def test_invalid_manifest(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="a c f", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="a", row_num="4", @@ -226,9 +297,12 @@ def test_invalid_manifest(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="a", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="a", row_num="4", @@ -236,9 +310,12 @@ def test_invalid_manifest(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="a", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_regex_error( + assert ( + GenerateError.generate_regex_error( val_rule="regex", reg_expression="[a-f]", row_num="3", @@ -246,9 +323,12 @@ def test_invalid_manifest(self, helpers, dmge): module_to_call="match", invalid_entry="m", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_regex_error( + assert ( + GenerateError.generate_regex_error( val_rule="regex", reg_expression="[a-f]", row_num="3", @@ -256,9 +336,12 @@ def test_invalid_manifest(self, helpers, dmge): module_to_call="search", invalid_entry="q", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_regex_error( + assert ( + GenerateError.generate_regex_error( val_rule="regex", reg_expression="^\d+$", row_num="2", @@ -266,9 +349,12 @@ def test_invalid_manifest(self, helpers, dmge): module_to_call="search", invalid_entry="5.4", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_url_error( + assert ( + GenerateError.generate_url_error( val_rule="url", url="http://googlef.com/", url_error="invalid_url", @@ -277,7 +363,9 @@ def test_invalid_manifest(self, helpers, dmge): argument=None, invalid_entry="http://googlef.com/", dmge=dmge, - )[0] in errors + )[0] + in errors + ) date_err = GenerateError.generate_content_error( val_rule="date", @@ -289,21 +377,27 @@ def test_invalid_manifest(self, helpers, dmge): error_in_list = [date_err[2] in error for error in errors] assert any(error_in_list) - assert GenerateError.generate_content_error( + assert ( + GenerateError.generate_content_error( val_rule="unique error", attribute_name="Check Unique", dmge=dmge, row_num=["2", "3", "4"], invalid_entry=["str1"], - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_content_error( + assert ( + GenerateError.generate_content_error( val_rule="inRange 50 100 error", attribute_name="Check Range", dmge=dmge, row_num=["3"], invalid_entry=["30"], - )[0] in errors + )[0] + in errors + ) assert ( GenerateError.generate_cross_warning( @@ -314,7 +408,7 @@ def test_invalid_manifest(self, helpers, dmge): invalid_entry=["123"], dmge=dmge, )[0] - in errors + in errors ) assert ( @@ -325,54 +419,69 @@ def test_invalid_manifest(self, helpers, dmge): invalid_entry=["123"], dmge=dmge, )[0] - in errors + in errors ) # check warnings - assert GenerateError.generate_content_error( + assert ( + GenerateError.generate_content_error( val_rule="recommended", attribute_name="Check Recommended", dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert GenerateError.generate_content_error( + assert ( + GenerateError.generate_content_error( val_rule="protectAges", attribute_name="Check Ages", dmge=dmge, row_num=["2", "3"], invalid_entry=["6549", "32851"], - )[1] in warnings + )[1] + in warnings + ) - assert GenerateError.generate_cross_warning( + assert ( + GenerateError.generate_cross_warning( val_rule="matchAtLeastOne", row_num=["3"], attribute_name="Check Match at Least", invalid_entry=["7163"], manifest_id=["syn54126997", "syn54127001"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert GenerateError.generate_cross_warning( + assert ( + GenerateError.generate_cross_warning( val_rule="matchAtLeastOne MockComponent.checkMatchatLeastvalues value", row_num=["3"], attribute_name="Check Match at Least values", invalid_entry=["51100"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert \ + assert ( GenerateError.generate_cross_warning( val_rule="matchExactlyOne", attribute_name="Check Match Exactly", matching_manifests=["syn54126950", "syn54127008"], dmge=dmge, - )[1] in warnings \ + )[1] + in warnings or GenerateError.generate_cross_warning( val_rule="matchExactlyOne", attribute_name="Check Match Exactly", matching_manifests=["syn54127702", "syn54127008"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) cross_warning = GenerateError.generate_cross_warning( val_rule="matchExactlyOne MockComponent.checkMatchExactlyvalues MockComponent.checkMatchExactlyvalues value", @@ -385,7 +494,6 @@ def test_invalid_manifest(self, helpers, dmge): warning_in_list = [cross_warning[1] in warning for warning in warnings] assert any(warning_in_list) - def test_in_house_validation(self, helpers, dmge): metadataModel = get_metadataModel(helpers, model_name="example.model.jsonld") manifestPath = helpers.get_data_path("mock_manifests/Invalid_Test_Manifest.csv") @@ -399,39 +507,52 @@ def test_in_house_validation(self, helpers, dmge): ) # Check errors - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="num", row_num="3", attribute_name="Check Num", invalid_entry="c", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="int", row_num="3", attribute_name="Check Int", invalid_entry="5.63", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="str", row_num="3", attribute_name="Check String", invalid_entry="94", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_type_error( + assert ( + GenerateError.generate_type_error( val_rule="int", row_num="3", attribute_name="Check NA", invalid_entry="9.5", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="9", row_num="3", @@ -439,9 +560,12 @@ def test_in_house_validation(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="9", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_list_error( + assert ( + GenerateError.generate_list_error( val_rule="list", list_string="ab", row_num="4", @@ -449,9 +573,12 @@ def test_in_house_validation(self, helpers, dmge): list_error="not_comma_delimited", invalid_entry="ab", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_regex_error( + assert ( + GenerateError.generate_regex_error( val_rule="regex", reg_expression="[a-f]", row_num="3", @@ -459,9 +586,12 @@ def test_in_house_validation(self, helpers, dmge): module_to_call="search", invalid_entry="q", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_regex_error( + assert ( + GenerateError.generate_regex_error( val_rule="regex", reg_expression="[a-f]", row_num="3", @@ -469,9 +599,12 @@ def test_in_house_validation(self, helpers, dmge): module_to_call="match", invalid_entry="m", dmge=dmge, - )[0] in errors + )[0] + in errors + ) - assert GenerateError.generate_url_error( + assert ( + GenerateError.generate_url_error( val_rule="url", url="http://googlef.com/", url_error="invalid_url", @@ -480,7 +613,9 @@ def test_in_house_validation(self, helpers, dmge): argument=None, invalid_entry="http://googlef.com/", dmge=dmge, - )[0] in errors + )[0] + in errors + ) assert ( GenerateError.generate_cross_warning( @@ -491,7 +626,7 @@ def test_in_house_validation(self, helpers, dmge): invalid_entry=["123"], dmge=dmge, )[0] - in errors + in errors ) assert ( @@ -502,56 +637,66 @@ def test_in_house_validation(self, helpers, dmge): invalid_entry=["123"], dmge=dmge, )[0] - in errors + in errors ) # Check Warnings - assert GenerateError.generate_cross_warning( + assert ( + GenerateError.generate_cross_warning( val_rule="matchAtLeastOne", row_num=["3"], attribute_name="Check Match at Least", invalid_entry=["7163"], manifest_id=["syn54126997", "syn54127001"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert GenerateError.generate_cross_warning( + assert ( + GenerateError.generate_cross_warning( val_rule="matchAtLeastOne MockComponent.checkMatchatLeastvalues value", row_num=["3"], attribute_name="Check Match at Least values", invalid_entry=["51100"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert \ + assert ( GenerateError.generate_cross_warning( val_rule="matchExactlyOne", attribute_name="Check Match Exactly", matching_manifests=["syn54126950", "syn54127008"], dmge=dmge, - )[1] in warnings \ + )[1] + in warnings or GenerateError.generate_cross_warning( val_rule="matchExactlyOne", attribute_name="Check Match Exactly", matching_manifests=["syn54127702", "syn54127008"], dmge=dmge, - )[1] in warnings + )[1] + in warnings + ) - assert GenerateError.generate_cross_warning( + assert ( + GenerateError.generate_cross_warning( val_rule="matchExactlyOne MockComponent.checkMatchExactlyvalues MockComponent.checkMatchExactlyvalues value", row_num=["2", "3", "4"], attribute_name="Check Match Exactly values", invalid_entry=["71738", "98085", "210065"], dmge=dmge, - )[1] in warnings - + )[1] + in warnings + ) - def test_missing_column(self, helpers, dmge:DataModelGraph): - """ Test that a manifest missing a column returns the proper error. - """ - model_name="example.model.csv" - manifest_name="mock_manifests/Invalid_Biospecimen_Missing_Column_Manifest.csv" - root_node="Biospecimen" + def test_missing_column(self, helpers, dmge: DataModelGraph): + """Test that a manifest missing a column returns the proper error.""" + model_name = "example.model.csv" + manifest_name = "mock_manifests/Invalid_Biospecimen_Missing_Column_Manifest.csv" + root_node = "Biospecimen" manifest_path = helpers.get_data_path(manifest_name) metadataModel = get_metadataModel(helpers, model_name) @@ -560,14 +705,16 @@ def test_missing_column(self, helpers, dmge:DataModelGraph): rootNode=root_node, ) - assert GenerateError.generate_schema_error( - row_num='2', + assert ( + GenerateError.generate_schema_error( + row_num="2", attribute_name="Wrong schema", error_message="'Tissue Status' is a required property", invalid_entry="Wrong schema", dmge=dmge, - )[0] in errors - + )[0] + in errors + ) @pytest.mark.parametrize( "model_name", @@ -577,19 +724,46 @@ def test_missing_column(self, helpers, dmge:DataModelGraph): ], ids=["example_model", "example_with_requirements_from_vr"], ) - @pytest.mark.parametrize( - ["manifest_name", "root_node",], [ - ("mock_manifests/Biospecimen_required_vr_test_fail.manifest.csv", "Biospecimen"), - ("mock_manifests/Biospecimen_required_vr_test_pass.manifest.csv", "Biospecimen"), + "manifest_name", + "root_node", + ], + [ + ( + "mock_manifests/Biospecimen_required_vr_test_fail.manifest.csv", + "Biospecimen", + ), + ( + "mock_manifests/Biospecimen_required_vr_test_pass.manifest.csv", + "Biospecimen", + ), ("mock_manifests/Patient_required_vr_test_pass.manifest.csv", "Patient"), - ("mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", "Patient"), - ("mock_manifests/BulkRNAseq_component_based_required_rule_test.manifest.csv", "BulkRNA-seqAssay"), + ( + "mock_manifests/Patient_test_no_entry_for_cond_required_column.manifest.csv", + "Patient", + ), + ( + "mock_manifests/BulkRNAseq_component_based_required_rule_test.manifest.csv", + "BulkRNA-seqAssay", + ), + ], + ids=[ + "biospeciment_required_vr_empty", + "biospecimen_required_filled", + "patient_not_required_empty", + "patient_conditionally_required_not_filled", + "bulk_rna_seq_component_based_rule_test", ], - ids=["biospeciment_required_vr_empty", "biospecimen_required_filled", "patient_not_required_empty", "patient_conditionally_required_not_filled", "bulk_rna_seq_component_based_rule_test"], ) - def test_required_validation_rule(self, helpers, model_name:str, manifest_name:str, root_node:str, dmge:DataModelGraphExplorer) -> None: + def test_required_validation_rule( + self, + helpers, + model_name: str, + manifest_name: str, + root_node: str, + dmge: DataModelGraphExplorer, + ) -> None: """ Args: model_name, str: model to run test validation against @@ -630,7 +804,11 @@ def test_required_validation_rule(self, helpers, model_name:str, manifest_name:s rootNode=root_node, ) - error_and_warning_free_manifests = ["Biospecimen_required_vr_test_pass", "Patient_test_no_entry_for_cond_required_column", ""] + error_and_warning_free_manifests = [ + "Biospecimen_required_vr_test_pass", + "Patient_test_no_entry_for_cond_required_column", + "", + ] # For each model, these manifest should pass, bc either the value is being passed as requierd, or its not currently required for manifest in error_and_warning_free_manifests: @@ -638,70 +816,85 @@ def test_required_validation_rule(self, helpers, model_name:str, manifest_name:s assert errors == [] assert warnings == [] - messages = {"patient_id_empty_warning": { - "row_num":"2", - "attribute_name":"Patient ID", - "error_message":"'' should be non-empty", - "invalid_entry":""}, - "bulk_rnaseq_cbr_error_1":{ - "row_num":"3", - "attribute_name":"Genome FASTA", - "error_message":"'' should be non-empty", - "invalid_entry":""}, - "bulk_rnaseq_cbr_error_2":{ - "row_num":"4", - "attribute_name":"File Format", - "error_message":"'' is not one of ['CSV/TSV', 'CRAM', 'FASTQ', 'BAM']", - "invalid_entry":""}, - } + messages = { + "patient_id_empty_warning": { + "row_num": "2", + "attribute_name": "Patient ID", + "error_message": "'' should be non-empty", + "invalid_entry": "", + }, + "bulk_rnaseq_cbr_error_1": { + "row_num": "3", + "attribute_name": "Genome FASTA", + "error_message": "'' should be non-empty", + "invalid_entry": "", + }, + "bulk_rnaseq_cbr_error_2": { + "row_num": "4", + "attribute_name": "File Format", + "error_message": "'' is not one of ['CSV/TSV', 'CRAM', 'FASTQ', 'BAM']", + "invalid_entry": "", + }, + } # This manifest should fail in the example_model bc the manifest Required=False, and in the example_with_requirements_from_vr # bc the requirments are set to false in the validation rule - if (("Biospecimen_required_vr_test_fail" in manifest_name) or - ("Patient_required_vr_test_pass" in manifest_name and model_name == "example.model.csv") - ): + if ("Biospecimen_required_vr_test_fail" in manifest_name) or ( + "Patient_required_vr_test_pass" in manifest_name + and model_name == "example.model.csv" + ): message_key = "patient_id_empty_warning" - assert GenerateError.generate_schema_error( - row_num=messages[message_key]["row_num"], - attribute_name=messages[message_key]["attribute_name"], - error_message=messages[message_key]["error_message"], - invalid_entry=messages[message_key]["invalid_entry"], - dmge=dmge, - )[0] in errors + assert ( + GenerateError.generate_schema_error( + row_num=messages[message_key]["row_num"], + attribute_name=messages[message_key]["attribute_name"], + error_message=messages[message_key]["error_message"], + invalid_entry=messages[message_key]["invalid_entry"], + dmge=dmge, + )[0] + in errors + ) assert warnings == [] - if "Patient_required_vr_test_pass" in manifest_name and model_name == "example_required_vr_test.model.csv": + if ( + "Patient_required_vr_test_pass" in manifest_name + and model_name == "example_required_vr_test.model.csv" + ): assert errors == [] assert warnings == [] if "BulkRNAseq_component_based_required_rule_test" in manifest_name: message_key = "bulk_rnaseq_cbr_error_1" - assert GenerateError.generate_schema_error( + assert ( + GenerateError.generate_schema_error( row_num=messages[message_key]["row_num"], attribute_name=messages[message_key]["attribute_name"], error_message=messages[message_key]["error_message"], invalid_entry=messages[message_key]["invalid_entry"], dmge=dmge, - )[0] in errors + )[0] + in errors + ) message_key = "bulk_rnaseq_cbr_error_2" expected_error = GenerateError.generate_schema_error( - row_num=messages[message_key]["row_num"], - attribute_name=messages[message_key]["attribute_name"], - error_message=messages[message_key]["error_message"], - invalid_entry=messages[message_key]["invalid_entry"], - dmge=dmge, - )[0] + row_num=messages[message_key]["row_num"], + attribute_name=messages[message_key]["attribute_name"], + error_message=messages[message_key]["error_message"], + invalid_entry=messages[message_key]["invalid_entry"], + dmge=dmge, + )[0] # since the valid value order isnt set in error reporting, check a portion of the expected output # Check the error row is expected assert expected_error[1] in errors[1] # Check that one of the values for the expected valid values is present # Extract a valid value - valid_value = expected_error[2].split(',')[-1].split(']')[0].strip(' ').strip("\'") - assert valid_value in errors[1][2] - assert warnings==[] - + valid_value = ( + expected_error[2].split(",")[-1].split("]")[0].strip(" ").strip("'") + ) + assert valid_value in errors[1][2] + assert warnings == [] @pytest.mark.parametrize( "manifest_path", @@ -756,13 +949,16 @@ def test_component_validations(self, helpers, manifest_path, dmge): and vmr_warnings[0][-1] == ["123"] ) - @pytest.mark.rule_combos( reason="This introduces a great number of tests covering every possible rule combination that are only necessary on occasion." ) @pytest.mark.parametrize("base_rule, second_rule", get_rule_combinations()) def test_rule_combinations( - self, helpers, dmge, base_rule, second_rule, + self, + helpers, + dmge, + base_rule, + second_rule, ): """ TODO: Describe what this test is doing.