Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paint metadata refactor #180

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,13 @@ def download_source_gafs(group_metadata, target_dir, exclusions=[]):
return downloaded_paths

def check_and_download_paint_source(paint_metadata, group_id, dataset, target_dir):
paint_dataset = find(paint_metadata["datasets"], lambda d: d["dataset"] == "paint_{}".format(dataset))
if paint_dataset is None:
if paint_metadata is None:
return None

path = os.path.join(target_dir, "groups", group_id, "{}-src.gaf.gz".format(paint_dataset["dataset"]))
path = os.path.join(target_dir, "groups", group_id, "{}-src.gaf.gz".format(paint_metadata["dataset"]))
click.echo("Downloading paint to {}".format(path))
urllib.request.urlretrieve(paint_dataset["source"], path)
unzipped = os.path.join(os.path.split(path)[0], "{}-src.gaf".format(paint_dataset["dataset"]))
urllib.request.urlretrieve(paint_metadata["source"], path)
unzipped = os.path.join(os.path.split(path)[0], "{}-src.gaf".format(paint_metadata["dataset"]))
unzip(path, unzipped)
return unzipped

Expand Down Expand Up @@ -329,20 +328,27 @@ def produce(group, metadata, gpad, ttl, target, ontology, exclude):
click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology)

# Dictionary from dataset name to path in the target directory to the source zip.
source_gaf_zips = download_source_gafs(group_metadata, absolute_target, exclusions=exclude)
source_gafs = {zip_path: os.path.join(os.path.split(zip_path)[0], "{}-src.gaf".format(dataset)) for dataset, zip_path in source_gaf_zips.items()}
for source_zip, source_gaf in source_gafs.items():
unzip(source_zip, source_gaf)

paint_metadata = metadata_file(absolute_metadata, "paint")
for metadataset in group_metadata["datasets"]:
dataset = metadataset["dataset"]
# Skip this dataset if it's not gaf, or if it's not something we've downloaded
if metadataset["type"] != "gaf" or dataset not in source_gaf_zips:
click.echo("Skipping {}".format(metadataset["id"]))
continue

for dataset in source_gaf_zips.keys():
gafzip = source_gaf_zips[dataset]
source_gaf = source_gafs[gafzip]
valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph)[0]

gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

paint_metadata = metadataset.get("paint", None)

paint_src_gaf = check_and_download_paint_source(paint_metadata, group_metadata["id"], dataset, absolute_target)

end_gaf = valid_gaf
Expand Down
2 changes: 1 addition & 1 deletion ontobio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import

__version__ = '1.1.0'
__version__ = '1.2.1'

from .ontol_factory import OntologyFactory
from .ontol import Ontology, Synonym, TextDefinition
Expand Down