Skip to content

Commit

Permalink
Merge pull request #685 from biolink/validate_cli_tests
Browse files Browse the repository at this point in the history
add an optional test to make sure full validate produce is run when l…
  • Loading branch information
sierra-moxon authored Sep 6, 2024
2 parents 2232aea + 7ccd31e commit ac4ea1a
Show file tree
Hide file tree
Showing 24 changed files with 3,379,021 additions and 4,833 deletions.
1 change: 1 addition & 0 deletions .github/workflows/make-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ jobs:
pip install .
pip install pytest
- name: run make on rules
# reminder: please run `make travis_test_full` locally to include slow running tests not suitable for CI
run: make travis_test
40 changes: 34 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,47 @@ PACKAGES = ontobio prefixcommons
subpackage_tests: $(patsubst %,test-%,$(PACKAGES))

test:
pytest tests/*.py tests/unit/
pytest -m "not slow" tests/*.py tests/unit/

debug_test:
pytest -s -vvvv tests/*.py
pytest -m "not slow" -s -vvvv tests/*.py

t-%:
pytest tests/test_$*.py
pytest -m "not slow" tests/test_$*.py

tv-%:
pytest -s tests/test_$*.py
pytest -m "not slow" -s tests/test_$*.py

foo:
which pytest

# only run local tests
travis_test:
@if [ -d ".venv" ] && [ -f "pyproject.toml" ]; then \
echo "Running tests in Poetry environment..."; \
poetry run pytest -m "not slow" tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
tests/test_validation_rules.py tests/unit/test_annotation_scorer.py \
tests/test_goassociation_model.py tests/test_relations.py \
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
else \
pytest -m "not slow" tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
tests/test_validation_rules.py tests/unit/test_annotation_scorer.py \
tests/test_goassociation_model.py tests/test_relations.py \
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
fi


travis_test_full:
@if [ -d ".venv" ] && [ -f "pyproject.toml" ]; then \
echo "Running tests in Poetry environment..."; \
poetry run pytest tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
Expand All @@ -33,7 +58,8 @@ travis_test:
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py; \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
else \
pytest tests/test_*local*.py tests/test_*parse*.py tests/test*writer*.py tests/test_qc.py \
tests/test_rdfgen.py tests/test_phenosim_engine.py tests/test_ontol.py \
Expand All @@ -42,9 +68,11 @@ travis_test:
tests/unit/test_golr_search_query.py tests/unit/test_owlsim2_api.py \
tests/test_collections.py \
tests/test_gocamgen.py \
tests/test_gpi_isoform_replacement.py; \
tests/test_gpi_isoform_replacement.py \
tests/test_validate_cli.py; \
fi


cleandist:
rm dist/* || true

Expand Down
14 changes: 12 additions & 2 deletions bin/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
See [command line docs](http://ontobio.readthedocs.io/en/latest/commandline.html#commandline) on ReadTheDocs

To test validate.py "validate" command, the command that produces the final GPADs in the pipeline via the "mega make"
(aka: "produces GAFs, GPADs, ttl" stage), on a particular source:
(aka: "produces GAFs, GPADs, ttl" stage), on a particular source, run:
```bash
make test_travis_full
```

This makefile target will run the full validate.produce command using goa_cow, mgi, zfin, and goa_chicken sources,
producing GPAD, GAF files in the groups subdirectory and then do a check of the content of these products. These
tests only run manually, not via CI because they take minutes to run.

alternatively, you can run the following commands to test the validate.produce command on a particular source, locally:

```bash
Note: snapshot below in the URL can be changed to any pipeline branch; its listed here for ease of cp/paste.
```bash
poetry install
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset mgi MGI --gpad-gpi-output-version 2.0
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset goa_chicken goa --gpad-gpi-output-version 2.0
poetry run validate produce -m ../go-site/metadata --gpad -t . -o go-basic.json --base-download-url "http://skyhook.berkeleybop.org/snapshot/" --only-dataset zfin ZFIN --gpad-gpi-output-version 2.0
```


To test whether a GAF file is valid (passes all the GORules):
```bash
poetry install
Expand Down
17 changes: 13 additions & 4 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,20 @@ def download_a_dataset_source(group, dataset_metadata, target_dir, source_url, b
return path


def download_source_gafs(group_metadata, target_dir, exclusions=[], base_download_url=None, replace_existing_files=True,
def download_source_gafs(group_metadata,
target_dir,
exclusions=[],
base_download_url=None,
replace_existing_files=True,
only_dataset=None):
"""
This looks at a group metadata dictionary and downloads each GAF source that is not in the exclusions list.
For each downloaded file, keep track of the path of the file. If the file is zipped, it will unzip it here.
This function returns a list of tuples of the dataset dictionary mapped to the downloaded source path.
"""
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitely excluded from an option, and excluding datasets with the `exclude key` set to true
# Grab all datasets in a group, excluding non-gaf, datasets that are explicitly excluded
# from an option, and excluding datasets with the `exclude key` set to true

gaf_urls = []
if only_dataset is None:
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if
Expand All @@ -132,7 +138,7 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[], base_downloa
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if data["dataset"] == only_dataset]
# List of dataset metadata to gaf download url

click.echo("Found {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
logger.info("Found gaf_urls {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
downloaded_paths = []
for dataset_metadata, gaf_url in gaf_urls:
dataset = dataset_metadata["dataset"]
Expand All @@ -148,7 +154,7 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[], base_downloa
# otherwise file is coming in uncompressed. But we want to make sure
# to zip up the original source also
tools.zipup(path)

logger.info("Downloaded {}".format(path))
downloaded_paths.append((dataset_metadata, path))

return downloaded_paths
Expand Down Expand Up @@ -645,6 +651,8 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
click.echo("Products will go in {}".format(absolute_target))
absolute_metadata = os.path.abspath(metadata_dir)

print("group", group)
print("dataset", )
group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)
Expand All @@ -654,6 +662,7 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
replace_existing_files=not skip_existing_files,
only_dataset=only_dataset)

click.echo("Downloaded GAF sources: {}".format(downloaded_gaf_sources))
# extract the titles for the go rules, this is a dictionary comprehension
rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
Expand Down
Loading

0 comments on commit ac4ea1a

Please sign in to comment.