Skip to content

Commit

Permalink
some test on merged authors
Browse files Browse the repository at this point in the history
  • Loading branch information
Volodymyr Savchenko authored and Volodymyr Savchenko committed May 23, 2024
1 parent 2e5bff3 commit 533bf39
Show file tree
Hide file tree
Showing 4 changed files with 289 additions and 6 deletions.
18 changes: 13 additions & 5 deletions tests/test_absolutize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def get_test_data_file(filename=""):

class TestAbsolutizePaths: # (unittest.TestCase):
# NOTE: ids can not be found, like 634, or forbidden, like 678
@pytest.mark.parametrize("workflow_id", [41, 552, 883])
@pytest.mark.parametrize("workflow_id", [41, 31, 552, 883])
def test_make_paths_absolute(self, workflow_id):
with patch_rdflib_urlopen(get_test_data_file(), write_cache=False):
with open(
Expand All @@ -42,11 +42,19 @@ def test_make_paths_absolute(self, workflow_id):
def test_merged(self):
G = merge_all_files(
get_test_data_file("[0-9]*ro-crate*.json"),
cache_base_dir=get_test_data_file(),
write_cache=False,
cache_kwargs=dict(
cache_base_dir=get_test_data_file(),
write_cache=False,
),
)

assert is_all_absolute(G)

for s, o in G.subject_objects(rdflib.URIRef("http://schema.org/author")):
print(s, o)
authors = set(
[o for s, o in G.subject_objects(rdflib.URIRef("http://schema.org/author"))]
)

# checking that we got some useful data about the authors
assert authors == set(
[rdflib.term.Literal("Arnaud Meng, Maxim Scheremetjew, Michael Crusoe")]
)
253 changes: 253 additions & 0 deletions tests/test_data/31_ro-crate-metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
{
"@context": "https://w3id.org/ro/crate/1.0/context",
"@graph": [
{
"@id": "ro-crate-metadata.jsonld",
"@type": "CreativeWork",
"about": {
"@id": "./"
}
},
{
"@id": "ro-crate-preview.html",
"@type": "CreativeWork",
"about": {
"@id": "./"
}
},
{
"@id": "./",
"@type": "Dataset",
"name": "TranscriptsAnnotation-i5only-wf",
"description": "Transcripts annotation workflow",
"license": "https://www.apache.org/licenses/LICENSE-2.0",
"author": "Arnaud Meng, Maxim Scheremetjew, Michael Crusoe",
"publisher": "EMBL - European Bioinformatics Institute, 2018",
"url": "https://github.com/EBI-Metagenomics/workflow-is-cwl/blob/master/workflows/TranscriptsAnnotation-i5only-wf.cwl",
"mainEntity": {
"@id": "workflows/TranscriptsAnnotation-i5only-wf.cwl"
},
"hasPart": [
{
"@id": "workflows/TranscriptsAnnotation-i5only-wf.cwl"
},
{
"@id": "diagram.svg"
},
{
"@id": "workflows/TransDecoder-v5-wf-2steps.cwl"
},
{
"@id": "tools/TransDecoder/TransDecoder.Predict-v5.cwl"
},
{
"@id": "tools/TransDecoder/TransDecoder-v5-genetic_codes.yaml"
},
{
"@id": "tools/TransDecoder/TransDecoder.LongOrfs-v5.cwl"
},
{
"@id": "workflows/InterProScan-v5-chunked-wf.cwl"
},
{
"@id": "tools/InterProScan/InterProScan-v5.cwl"
},
{
"@id": "tools/InterProScan/InterProScan-apps.yaml"
},
{
"@id": "tools/InterProScan/InterProScan-protein_formats.yaml"
},
{
"@id": "utils/fasta_chunker.cwl"
},
{
"@id": "utils/concatenate.cwl"
},
{
"@id": "utils/clean_fasta_headers.cwl"
},
{
"@id": "tools/Diamond/Diamon.blastx-v0.9.21.cwl"
},
{
"@id": "tools/Diamond/Diamond-strand_values.yaml"
},
{
"@id": "tools/Diamond/Diamond-output_formats.yaml"
},
{
"@id": "utils/esl-reformat.cwl"
},
{
"@id": "utils/esl-reformat-replace.yaml"
},
{
"@id": "utils/cut_fasta_headers.cwl"
},
{
"@id": "tools/BUSCO/BUSCO-assessment_modes.yaml"
},
{
"@id": "workflows/cmsearch-multimodel-wf.cwl"
},
{
"@id": "tools/cmsearch-deoverlap/cmsearch-deoverlap-v0.02.cwl"
},
{
"@id": "tools/Infernal/cmsearch/infernal-cmsearch-v1.1.2.cwl"
},
{
"@id": "tools/BUSCO/BUSCO-v3.cwl"
}
]
},
{
"@id": "workflows/TranscriptsAnnotation-i5only-wf.cwl",
"@type": [
"File",
"SoftwareSourceCode",
"Workflow"
],
"programmingLanguage": {
"@id": "#cwl"
},
"name": "TranscriptsAnnotation-i5only-wf",
"contentSize": 5490,
"image": {
"@id": "diagram.svg"
}
},
{
"@id": "diagram.svg",
"@type": [
"File",
"ImageObject",
"WorkflowSketch"
],
"contentSize": 34556
},
{
"@id": "workflows/TransDecoder-v5-wf-2steps.cwl",
"@type": "File",
"contentSize": 1547
},
{
"@id": "tools/TransDecoder/TransDecoder.Predict-v5.cwl",
"@type": "File",
"contentSize": 5722
},
{
"@id": "tools/TransDecoder/TransDecoder-v5-genetic_codes.yaml",
"@type": "File",
"contentSize": 415
},
{
"@id": "tools/TransDecoder/TransDecoder.LongOrfs-v5.cwl",
"@type": "File",
"contentSize": 3920
},
{
"@id": "workflows/InterProScan-v5-chunked-wf.cwl",
"@type": "File",
"contentSize": 2940
},
{
"@id": "tools/InterProScan/InterProScan-v5.cwl",
"@type": "File",
"contentSize": 4556
},
{
"@id": "tools/InterProScan/InterProScan-apps.yaml",
"@type": "File",
"contentSize": 283
},
{
"@id": "tools/InterProScan/InterProScan-protein_formats.yaml",
"@type": "File",
"contentSize": 78
},
{
"@id": "utils/fasta_chunker.cwl",
"@type": "File",
"contentSize": 2178
},
{
"@id": "utils/concatenate.cwl",
"@type": "File",
"contentSize": 1217
},
{
"@id": "utils/clean_fasta_headers.cwl",
"@type": "File",
"contentSize": 928
},
{
"@id": "tools/Diamond/Diamon.blastx-v0.9.21.cwl",
"@type": "File",
"contentSize": 4441
},
{
"@id": "tools/Diamond/Diamond-strand_values.yaml",
"@type": "File",
"contentSize": 54
},
{
"@id": "tools/Diamond/Diamond-output_formats.yaml",
"@type": "File",
"contentSize": 79
},
{
"@id": "utils/esl-reformat.cwl",
"@type": "File",
"contentSize": 1408
},
{
"@id": "utils/esl-reformat-replace.yaml",
"@type": "File",
"contentSize": 381
},
{
"@id": "utils/cut_fasta_headers.cwl",
"@type": "File",
"contentSize": 970
},
{
"@id": "tools/BUSCO/BUSCO-assessment_modes.yaml",
"@type": "File",
"contentSize": 63
},
{
"@id": "workflows/cmsearch-multimodel-wf.cwl",
"@type": "File",
"contentSize": 1892
},
{
"@id": "tools/cmsearch-deoverlap/cmsearch-deoverlap-v0.02.cwl",
"@type": "File",
"contentSize": 1925
},
{
"@id": "tools/Infernal/cmsearch/infernal-cmsearch-v1.1.2.cwl",
"@type": "File",
"contentSize": 4376
},
{
"@id": "tools/BUSCO/BUSCO-v3.cwl",
"@type": "File",
"contentSize": 9661
},
{
"@id": "#cwl",
"@type": "ComputerLanguage",
"name": "Common Workflow Language",
"alternateName": "CWL",
"identifier": {
"@id": "https://w3id.org/cwl/v1.0/"
},
"url": {
"@id": "https://www.commonwl.org/"
}
}
]
}
13 changes: 13 additions & 0 deletions workflowhub_graph/cachedurlopen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@


def url_to_filename(url):
"""
Converts a URL to a filename by removing non-alphanumeric characters and replacing them with dashes.
:param url: The URL to convert.
:return: The filename.
"""

parsed = urlparse(url)
if parsed.scheme not in ["http", "https"]:
raise ValueError(f"Unsupported scheme {parsed.scheme}")
Expand All @@ -21,6 +27,13 @@ def patch_rdflib_urlopen(
write_cache=True,
allowed_urls_pattern=r"https://w3id.org/ro/crate/1\.[01]/context",
):
"""
Context manager to patch rdflib.parser.urlopen to cache and return the content of a URL.
:param cache_base_dir: The base directory to store the cached files.
:param write_cache: Whether to write the cache if the file is not found.
:param allowed_urls_pattern: A regex pattern to match the allowed URLs to cache.
"""

allowed_urls_re = re.compile(allowed_urls_pattern)
if cache_base_dir is None:
cache_base_dir = "cached_urlopen"
Expand Down
11 changes: 10 additions & 1 deletion workflowhub_graph/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import glob
import json
import os
from typing import Optional

import rdflib

Expand All @@ -11,8 +12,16 @@


def merge_all_files(
pattern="data/*.json", base_url=BASE_URL, **cache_kwargs
pattern="data/*.json", base_url: str = BASE_URL, cache_kwargs: None = Optional[dict]
) -> rdflib.Graph:
"""
Merges all JSON-LD files in the given pattern into a single RDF graph.
:param pattern: The pattern to match the files.
:param base_url: The base URL for the WorkflowHub.
:param cache_kwargs: Keyword arguments to pass to urllib cache
:return: The merged RDF graph.
"""

G = rdflib.Graph()

filenames = glob.glob(pattern)
Expand Down

0 comments on commit 533bf39

Please sign in to comment.