Skip to content

Commit

Permalink
Add assay/study information validation
Browse files Browse the repository at this point in the history
* Add study/assay uniqueness validation (#45)
* Check if investigation refers to studies (#17)
* Fix Reader usage in isatab2dot app
* Modify assay info and assay/study path usage
* Add validation of minimal assay/study information (#17)
* Rearrange isatab2isatab and isatab2validation apps
  • Loading branch information
Mathias Kuhring authored and mkuhring committed May 15, 2019
1 parent 0738002 commit 665363d
Show file tree
Hide file tree
Showing 19 changed files with 450 additions and 352 deletions.
13 changes: 3 additions & 10 deletions altamisa/apps/isatab2dot.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,17 @@ def run(args):

for s, study_info in enumerate(investigation.studies):
with open(os.path.join(path, study_info.info.path), "rt") as inputf:
study = StudyReader.from_stream(
investigation, study_info, "S{}".format(s + 1), inputf
).read()
study = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
print(" /* study {} */".format(study_info.info.path), file=args.output_file)
print(" subgraph clusterStudy{} {{".format(s), file=args.output_file)
print(' label = "Study: {}"'.format(study_info.info.path), file=args.output_file)
print_dot(study, args.output_file)
print(" }", file=args.output_file)

for a, assay_info in enumerate(study_info.assays.values()):
for a, assay_info in enumerate(study_info.assays):
with open(os.path.join(path, assay_info.path), "rt") as inputf:
assay = AssayReader.from_stream(
investigation,
study_info,
assay_info,
"S{}".format(s + 1),
"A{}".format(a + 1),
inputf,
"S{}".format(s + 1), "A{}".format(a + 1), inputf
).read()
print(" /* assay {} */".format(assay_info.path), file=args.output_file)
print(" subgraph clusterAssayS{}A{} {{".format(s, a), file=args.output_file)
Expand Down
102 changes: 61 additions & 41 deletions altamisa/apps/isatab2isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,70 +24,90 @@
def run(args):
# Collect warnings
with warnings.catch_warnings(record=True) as records:
run_warnings_caught(args)

# Check if input and output directory are different
path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
path_out = os.path.normpath(os.path.dirname(args.output_investigation_file.name))
if path_in == path_out:
tpl = "Can't output ISA-tab files to same directory as as input: {} == {}"
msg = tpl.format(path_in, path_out)
raise IsaException(msg)

# Read investigation
investigation = InvestigationReader.from_stream(args.input_investigation_file).read()

# Read studies and assays
studies = {}
assays = {}
for s, study_info in enumerate(investigation.studies):
# Print warnings
if not args.no_warnings:
for record in records:
warnings.showwarning(
record.message, record.category, record.filename, record.lineno, record.line
)


def run_warnings_caught(args):
# Check if input and output directory are different
path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
path_out = os.path.normpath(os.path.dirname(args.output_investigation_file.name))
if path_in == path_out:
tpl = "Can't output ISA-tab files to same directory as as input: {} == {}"
msg = tpl.format(path_in, path_out)
raise IsaException(msg)

investigation, studies, assays = run_reading(args, path_in)
run_writing(args, path_out, investigation, studies, assays)


def run_reading(args, path_in):
# Read investigation
investigation = InvestigationReader.from_stream(args.input_investigation_file).read()

# Validate investigation
InvestigationValidator(investigation).validate()

# Read studies and assays
studies = {}
assays = {}
for s, study_info in enumerate(investigation.studies):
if study_info.info.path:
with open(os.path.join(path_in, study_info.info.path), "rt") as inputf:
studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
if study_info.assays:
assays[s] = {}
for a, assay_info in enumerate(study_info.assays.values()):
if study_info.assays:
assays[s] = {}
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
with open(os.path.join(path_in, assay_info.path), "rt") as inputf:
assays[s][a] = AssayReader.from_stream(
"S{}".format(s + 1), "A{}".format(a + 1), inputf
).read()

# Validate investigation
InvestigationValidator(investigation).validate()

# Validate studies and assays
for s, study_info in enumerate(investigation.studies):
# Validate studies and assays
for s, study_info in enumerate(investigation.studies):
if study_info.info.path:
StudyValidator(investigation, study_info, studies[s]).validate()
for a, assay_info in enumerate(study_info.assays.values()):
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate()

# Write investigation
InvestigationWriter.from_stream(
investigation, args.output_investigation_file, quote=args.quotes
).write()
return investigation, studies, assays


# Write studies and assays
for s, study_info in enumerate(investigation.studies):
if args.output_investigation_file.name == "<stdout>":
def run_writing(args, path_out, investigation, studies, assays):
# Write investigation
InvestigationWriter.from_stream(
investigation, args.output_investigation_file, quote=args.quotes
).write()

# Write studies and assays
for s, study_info in enumerate(investigation.studies):
if args.output_investigation_file.name == "<stdout>":
if study_info.info.path:
StudyWriter.from_stream(
studies[s], args.output_investigation_file, quote=args.quotes
).write()
for a, assay_info in enumerate(study_info.assays.values()):
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
AssayWriter.from_stream(
assays[s][a], args.output_investigation_file, quote=args.quotes
).write()
else:
else:
if study_info.info.path:
with open(os.path.join(path_out, study_info.info.path), "wt") as outputf:
StudyWriter.from_stream(studies[s], outputf, quote=args.quotes).write()
for a, assay_info in enumerate(study_info.assays.values()):
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
with open(os.path.join(path_out, assay_info.path), "wt") as outputf:
AssayWriter.from_stream(assays[s][a], outputf, quote=args.quotes).write()

# Print warnings
if not args.no_warnings:
for record in records:
warnings.showwarning(
record.message, record.category, record.filename, record.lineno, record.line
)


def main(argv=None):
parser = argparse.ArgumentParser()
Expand Down
54 changes: 31 additions & 23 deletions altamisa/apps/isatab2validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,54 @@


def run(args):
# Show all warnings of same type and content
if args.show_duplicate_warnings:
warnings.simplefilter("always")

# Collect warnings
with warnings.catch_warnings(record=True) as records:
run_warnings_caught(args)

# Read investigation
investigation = InvestigationReader.from_stream(args.input_investigation_file).read()
args.input_investigation_file.close()
# Print warnings
for record in records:
warnings.showwarning(
record.message, record.category, record.filename, record.lineno, record.line
)


def run_warnings_caught(args):
# Read investigation
investigation = InvestigationReader.from_stream(args.input_investigation_file).read()
args.input_investigation_file.close()

# Validate investigation
InvestigationValidator(investigation).validate()

# Read studies and assays
path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
studies = {}
assays = {}
for s, study_info in enumerate(investigation.studies):
# Read studies and assays
path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
studies = {}
assays = {}
for s, study_info in enumerate(investigation.studies):
if study_info.info.path:
with open(os.path.join(path_in, study_info.info.path), "rt") as inputf:
studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
if study_info.assays:
assays[s] = {}
for a, assay_info in enumerate(study_info.assays.values()):
if study_info.assays:
assays[s] = {}
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
with open(os.path.join(path_in, assay_info.path), "rt") as inputf:
assays[s][a] = AssayReader.from_stream(
"S{}".format(s + 1), "A{}".format(a + 1), inputf
).read()

# Validate investigation
InvestigationValidator(investigation).validate()

# Validate studies and assays
for s, study_info in enumerate(investigation.studies):
# Validate studies and assays
for s, study_info in enumerate(investigation.studies):
if study_info.info.path:
StudyValidator(investigation, study_info, studies[s]).validate()
for a, assay_info in enumerate(study_info.assays.values()):
for a, assay_info in enumerate(study_info.assays):
if assay_info.path:
AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate()

# Print warnings
for record in records:
warnings.showwarning(
record.message, record.category, record.filename, record.lineno, record.line
)


def main(argv=None):
parser = argparse.ArgumentParser()
Expand Down
4 changes: 2 additions & 2 deletions altamisa/isatab/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ class StudyInfo(NamedTuple):
publications: Tuple[PublicationInfo]
#: Study factors by name
factors: Dict[str, FactorInfo]
#: Study assays by name
assays: Dict[str, AssayInfo]
#: Study assays
assays: Tuple[AssayInfo]
#: Study protocols by name
protocols: Dict[str, ProtocolInfo]
#: Study contact list
Expand Down
3 changes: 2 additions & 1 deletion altamisa/isatab/parse_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ def _assign_column_headers(self): # noqa: C901
if not is_secondary:
prev = header

def _raise_seen_before(self, name, col_no):
@staticmethod
def _raise_seen_before(name, col_no):
tpl = 'Seen "{}" header for same entity in col {}'
msg = tpl.format(name, col_no)
raise ParseIsatabException(msg)
Expand Down
36 changes: 12 additions & 24 deletions altamisa/isatab/parse_investigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,6 @@ def _read_contacts(self) -> Iterator[models.ContactInfo]:
)

def _read_studies(self) -> Iterator[models.StudyInfo]:
# TODO: is it legal to have no study in the investigation?
while self._line:
# Read STUDY header
line = self._read_next_line()
Expand All @@ -355,7 +354,9 @@ def _read_studies(self) -> Iterator[models.StudyInfo]:
# From this, parse the basic information from the study
comments = _parse_comments(section, comment_keys)
basic_info = models.BasicInfo(
Path(section[investigation_headers.STUDY_FILE_NAME]),
Path(section[investigation_headers.STUDY_FILE_NAME])
if section[investigation_headers.STUDY_FILE_NAME]
else None,
section[investigation_headers.STUDY_IDENTIFIER],
section[investigation_headers.STUDY_TITLE],
section[investigation_headers.STUDY_DESCRIPTION],
Expand All @@ -369,7 +370,7 @@ def _read_studies(self) -> Iterator[models.StudyInfo]:
design_descriptors = tuple(self._read_study_design_descriptors())
publications = tuple(self._read_study_publications())
factors = {f.name: f for f in self._read_study_factors()}
assays = {a.path.name: a for a in self._read_study_assays()}
assays = tuple(self._read_study_assays())
protocols = {p.name: p for p in self._read_study_protocols()}
contacts = tuple(self._read_study_contacts())
# Create study object
Expand Down Expand Up @@ -470,25 +471,8 @@ def _read_study_assays(self) -> Iterator[models.AssayInfo]:
tech_plat,
),
) in enumerate(columns):
if not file_ and any(
if any(
(
meas_type,
meas_type_term_acc,
meas_type_term_src,
tech_type,
tech_type_term_acc,
tech_type_term_src,
tech_plat,
)
):
# don't allow assay columns without assay file
tpl = (
"Found assay with no {} in {}; found: "
'"{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}"'
)
msg = tpl.format(
investigation_headers.STUDY_ASSAY_FILE_NAME,
investigation_headers.STUDY_ASSAYS,
file_,
meas_type,
meas_type_term_acc,
Expand All @@ -498,13 +482,17 @@ def _read_study_assays(self) -> Iterator[models.AssayInfo]:
tech_type_term_src,
tech_plat,
)
raise ParseIsatabException(msg)
elif file_: # if at least a file exists --> AssayInfo
):
meas = models.OntologyTermRef(meas_type, meas_type_term_acc, meas_type_term_src)
tech = models.OntologyTermRef(tech_type, tech_type_term_acc, tech_type_term_src)
comments = _parse_comments(section, comment_keys, i)
yield models.AssayInfo(
meas, tech, tech_plat, Path(file_), comments, list(section.keys())
meas,
tech,
tech_plat,
Path(file_) if file_ else None,
comments,
list(section.keys()),
)
# else, i.e. if all assay fields are empty --> Nothing

Expand Down
Loading

0 comments on commit 665363d

Please sign in to comment.