Skip to content

Commit

Permalink
minor updates to metadata parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldg committed Apr 14, 2022
1 parent 81b1e1e commit 299f6f4
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
27 changes: 22 additions & 5 deletions rocrate/provenance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from prov.identifier import Identifier
from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity
from tools.load_ga_export import load_ga_history_export, GalaxyJob
from tools.load_ga_export import load_ga_history_export, GalaxyJob, GalaxyDataset
from ast import literal_eval
import os

Expand Down Expand Up @@ -109,14 +109,30 @@ def __init__(
# move to separate function
metadata_export = load_ga_history_export(ga_export)
self.generate_prov_doc()
self.jobs = []

self.datasets = {}
# print(metadata_export["jobs_attrs"][0]["params"])
for job in metadata_export["jobs_attrs"]:
for i,dataset in enumerate(metadata_export["datasets_attrs"]):
datasets_attrs = GalaxyDataset()
datasets_attrs.parse_ga_dataset_attrs(dataset)
print(i)
print(datasets_attrs.attributes['encoded_id'])
self.datasets[datasets_attrs.attributes['encoded_id']] = datasets_attrs.attributes
# self.declare_process(ds_attrs.attributes)

self.jobs = {}
for i,job in enumerate(metadata_export["jobs_attrs"]):
job_attrs = GalaxyJob()
job_attrs.parse_ga_jobs_attrs(job)
self.jobs.append(job_attrs.attributes)
print(i)
print(job_attrs.attributes.keys())
# for k,v in job_attrs.attributes['parameters'].items():
# print(k, " : ",v)
self.jobs[job_attrs.attributes['encoded_id']] = job_attrs.attributes
self.declare_process(job_attrs.attributes)

# print(self.jobs[0]['inputs'])

def __str__(self) -> str:
"""Represent this Provenvance profile as a string."""
return "ProvenanceProfile <{}>".format(
Expand Down Expand Up @@ -223,7 +239,8 @@ def declare_process(
# cmd = ga_export_jobs_attrs["command_line"]
process_name = ga_export_jobs_attrs["tool_id"]
# tool_version = ga_export_jobs_attrs["tool_version"]
prov_label = "Run of ga_export/jobs_attrs.txt#" + process_name
# TODO: insert workflow id
prov_label = "Run of workflow_id_placeholder" + process_name
start_time = ga_export_jobs_attrs["create_time"]
end_time = ga_export_jobs_attrs["update_time"]

Expand Down
8 changes: 4 additions & 4 deletions tools/load_ga_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,21 @@ def parse_ga_jobs_attrs(self, job_attrs):

class GalaxyDataset(Dict):

def __init__(self, ga_export_dataset_attrs):
def __init__(self):
"""
Initialize the GalaxyDataset object.
"""
self.attributes = {}
self.attributes["metadata"] = {}

def parse_ga_dataset_attrs(self, job_attrs):
def parse_ga_dataset_attrs(self, dataset_attrs):

for key, value in job_attrs.items():
for key, value in dataset_attrs.items():
if not isinstance(value, dict):
self.attributes[key] = value
else:
if len(value) == 0:
pass
else:
if "metadata" in key:
self.attributes["metadata"].update(job_attrs[key])
self.attributes["metadata"].update(dataset_attrs[key])

0 comments on commit 299f6f4

Please sign in to comment.