From 665363dda09ded0fc232739e1aed27b40feb188f Mon Sep 17 00:00:00 2001 From: Mathias Kuhring Date: Thu, 25 Apr 2019 14:25:18 +0200 Subject: [PATCH] Add assay/study information validation * Add study/assay uniqueness validation (#45) * Check if investigation refers to studies (#17) * Fix Reader usage in isatab2dot app * Modify assay info and assay/study path usage * Add validation of minimal assay/study information (#17) * Rearrange isatab2isatab and isatab2validation apps --- altamisa/apps/isatab2dot.py | 13 +- altamisa/apps/isatab2isatab.py | 102 ++++++++------ altamisa/apps/isatab2validation.py | 54 ++++--- altamisa/isatab/models.py | 4 +- altamisa/isatab/parse_assay_study.py | 3 +- altamisa/isatab/parse_investigation.py | 36 ++--- altamisa/isatab/validate_investigation.py | 102 +++++++++++++- altamisa/isatab/write_investigation.py | 11 +- tests/conftest.py | 7 + tests/data/i_assays/i_assays.txt | 4 +- tests/data/i_assays2/i_assays2.txt | 155 --------------------- tests/data/i_fullinvest2/i_fullinvest2.txt | 8 +- tests/data/i_onlyinvest/i_onlyinvest.txt | 42 ++++++ tests/test_parse_assay.py | 36 ++--- tests/test_parse_investigation.py | 62 +++++++-- tests/test_parse_study.py | 16 ++- tests/test_write_assay.py | 40 ++++-- tests/test_write_investigation.py | 85 ++++++----- tests/test_write_study.py | 22 ++- 19 files changed, 450 insertions(+), 352 deletions(-) delete mode 100644 tests/data/i_assays2/i_assays2.txt create mode 100644 tests/data/i_onlyinvest/i_onlyinvest.txt diff --git a/altamisa/apps/isatab2dot.py b/altamisa/apps/isatab2dot.py index d974f6d..d93b231 100644 --- a/altamisa/apps/isatab2dot.py +++ b/altamisa/apps/isatab2dot.py @@ -61,24 +61,17 @@ def run(args): for s, study_info in enumerate(investigation.studies): with open(os.path.join(path, study_info.info.path), "rt") as inputf: - study = StudyReader.from_stream( - investigation, study_info, "S{}".format(s + 1), inputf - ).read() + study = StudyReader.from_stream("S{}".format(s + 1), inputf).read() print(" /* study {} */".format(study_info.info.path), file=args.output_file) print(" subgraph clusterStudy{} {{".format(s), file=args.output_file) print(' label = "Study: {}"'.format(study_info.info.path), file=args.output_file) print_dot(study, args.output_file) print(" }", file=args.output_file) - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): with open(os.path.join(path, assay_info.path), "rt") as inputf: assay = AssayReader.from_stream( - investigation, - study_info, - assay_info, - "S{}".format(s + 1), - "A{}".format(a + 1), - inputf, + "S{}".format(s + 1), "A{}".format(a + 1), inputf ).read() print(" /* assay {} */".format(assay_info.path), file=args.output_file) print(" subgraph clusterAssayS{}A{} {{".format(s, a), file=args.output_file) diff --git a/altamisa/apps/isatab2isatab.py b/altamisa/apps/isatab2isatab.py index e87eb6e..a42f353 100644 --- a/altamisa/apps/isatab2isatab.py +++ b/altamisa/apps/isatab2isatab.py @@ -24,70 +24,90 @@ def run(args): # Collect warnings with warnings.catch_warnings(record=True) as records: + run_warnings_caught(args) - # Check if input and output directory are different - path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) - path_out = os.path.normpath(os.path.dirname(args.output_investigation_file.name)) - if path_in == path_out: - tpl = "Can't output ISA-tab files to same directory as as input: {} == {}" - msg = tpl.format(path_in, path_out) - raise IsaException(msg) - - # Read investigation - investigation = InvestigationReader.from_stream(args.input_investigation_file).read() - - # Read studies and assays - studies = {} - assays = {} - for s, study_info in enumerate(investigation.studies): + # Print warnings + if not args.no_warnings: + for record in records: + warnings.showwarning( + record.message, record.category, record.filename, record.lineno, record.line + ) + + +def run_warnings_caught(args): + # Check if input and output directory are different + path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) + path_out = os.path.normpath(os.path.dirname(args.output_investigation_file.name)) + if path_in == path_out: + tpl = "Can't output ISA-tab files to same directory as as input: {} == {}" + msg = tpl.format(path_in, path_out) + raise IsaException(msg) + + investigation, studies, assays = run_reading(args, path_in) + run_writing(args, path_out, investigation, studies, assays) + + +def run_reading(args, path_in): + # Read investigation + investigation = InvestigationReader.from_stream(args.input_investigation_file).read() + + # Validate investigation + InvestigationValidator(investigation).validate() + + # Read studies and assays + studies = {} + assays = {} + for s, study_info in enumerate(investigation.studies): + if study_info.info.path: with open(os.path.join(path_in, study_info.info.path), "rt") as inputf: studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read() - if study_info.assays: - assays[s] = {} - for a, assay_info in enumerate(study_info.assays.values()): + if study_info.assays: + assays[s] = {} + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: with open(os.path.join(path_in, assay_info.path), "rt") as inputf: assays[s][a] = AssayReader.from_stream( "S{}".format(s + 1), "A{}".format(a + 1), inputf ).read() - # Validate investigation - InvestigationValidator(investigation).validate() - - # Validate studies and assays - for s, study_info in enumerate(investigation.studies): + # Validate studies and assays + for s, study_info in enumerate(investigation.studies): + if study_info.info.path: StudyValidator(investigation, study_info, studies[s]).validate() - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate() - # Write investigation - InvestigationWriter.from_stream( - investigation, args.output_investigation_file, quote=args.quotes - ).write() + return investigation, studies, assays + - # Write studies and assays - for s, study_info in enumerate(investigation.studies): - if args.output_investigation_file.name == "": +def run_writing(args, path_out, investigation, studies, assays): + # Write investigation + InvestigationWriter.from_stream( + investigation, args.output_investigation_file, quote=args.quotes + ).write() + + # Write studies and assays + for s, study_info in enumerate(investigation.studies): + if args.output_investigation_file.name == "": + if study_info.info.path: StudyWriter.from_stream( studies[s], args.output_investigation_file, quote=args.quotes ).write() - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: AssayWriter.from_stream( assays[s][a], args.output_investigation_file, quote=args.quotes ).write() - else: + else: + if study_info.info.path: with open(os.path.join(path_out, study_info.info.path), "wt") as outputf: StudyWriter.from_stream(studies[s], outputf, quote=args.quotes).write() - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: with open(os.path.join(path_out, assay_info.path), "wt") as outputf: AssayWriter.from_stream(assays[s][a], outputf, quote=args.quotes).write() - # Print warnings - if not args.no_warnings: - for record in records: - warnings.showwarning( - record.message, record.category, record.filename, record.lineno, record.line - ) - def main(argv=None): parser = argparse.ArgumentParser() diff --git a/altamisa/apps/isatab2validation.py b/altamisa/apps/isatab2validation.py index 815f06f..f7ecfbe 100644 --- a/altamisa/apps/isatab2validation.py +++ b/altamisa/apps/isatab2validation.py @@ -18,46 +18,54 @@ def run(args): + # Show all warnings of same type and content if args.show_duplicate_warnings: warnings.simplefilter("always") # Collect warnings with warnings.catch_warnings(record=True) as records: + run_warnings_caught(args) - # Read investigation - investigation = InvestigationReader.from_stream(args.input_investigation_file).read() - args.input_investigation_file.close() + # Print warnings + for record in records: + warnings.showwarning( + record.message, record.category, record.filename, record.lineno, record.line + ) + + +def run_warnings_caught(args): + # Read investigation + investigation = InvestigationReader.from_stream(args.input_investigation_file).read() + args.input_investigation_file.close() + + # Validate investigation + InvestigationValidator(investigation).validate() - # Read studies and assays - path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) - studies = {} - assays = {} - for s, study_info in enumerate(investigation.studies): + # Read studies and assays + path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) + studies = {} + assays = {} + for s, study_info in enumerate(investigation.studies): + if study_info.info.path: with open(os.path.join(path_in, study_info.info.path), "rt") as inputf: studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read() - if study_info.assays: - assays[s] = {} - for a, assay_info in enumerate(study_info.assays.values()): + if study_info.assays: + assays[s] = {} + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: with open(os.path.join(path_in, assay_info.path), "rt") as inputf: assays[s][a] = AssayReader.from_stream( "S{}".format(s + 1), "A{}".format(a + 1), inputf ).read() - # Validate investigation - InvestigationValidator(investigation).validate() - - # Validate studies and assays - for s, study_info in enumerate(investigation.studies): + # Validate studies and assays + for s, study_info in enumerate(investigation.studies): + if study_info.info.path: StudyValidator(investigation, study_info, studies[s]).validate() - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): + if assay_info.path: AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate() - # Print warnings - for record in records: - warnings.showwarning( - record.message, record.category, record.filename, record.lineno, record.line - ) - def main(argv=None): parser = argparse.ArgumentParser() diff --git a/altamisa/isatab/models.py b/altamisa/isatab/models.py index cf5b2d5..b218dac 100644 --- a/altamisa/isatab/models.py +++ b/altamisa/isatab/models.py @@ -261,8 +261,8 @@ class StudyInfo(NamedTuple): publications: Tuple[PublicationInfo] #: Study factors by name factors: Dict[str, FactorInfo] - #: Study assays by name - assays: Dict[str, AssayInfo] + #: Study assays + assays: Tuple[AssayInfo] #: Study protocols by name protocols: Dict[str, ProtocolInfo] #: Study contact list diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py index 9027d99..176622f 100644 --- a/altamisa/isatab/parse_assay_study.py +++ b/altamisa/isatab/parse_assay_study.py @@ -182,7 +182,8 @@ def _assign_column_headers(self): # noqa: C901 if not is_secondary: prev = header - def _raise_seen_before(self, name, col_no): + @staticmethod + def _raise_seen_before(name, col_no): tpl = 'Seen "{}" header for same entity in col {}' msg = tpl.format(name, col_no) raise ParseIsatabException(msg) diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 231c5fb..9f73fcd 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -340,7 +340,6 @@ def _read_contacts(self) -> Iterator[models.ContactInfo]: ) def _read_studies(self) -> Iterator[models.StudyInfo]: - # TODO: is it legal to have no study in the investigation? while self._line: # Read STUDY header line = self._read_next_line() @@ -355,7 +354,9 @@ def _read_studies(self) -> Iterator[models.StudyInfo]: # From this, parse the basic information from the study comments = _parse_comments(section, comment_keys) basic_info = models.BasicInfo( - Path(section[investigation_headers.STUDY_FILE_NAME]), + Path(section[investigation_headers.STUDY_FILE_NAME]) + if section[investigation_headers.STUDY_FILE_NAME] + else None, section[investigation_headers.STUDY_IDENTIFIER], section[investigation_headers.STUDY_TITLE], section[investigation_headers.STUDY_DESCRIPTION], @@ -369,7 +370,7 @@ def _read_studies(self) -> Iterator[models.StudyInfo]: design_descriptors = tuple(self._read_study_design_descriptors()) publications = tuple(self._read_study_publications()) factors = {f.name: f for f in self._read_study_factors()} - assays = {a.path.name: a for a in self._read_study_assays()} + assays = tuple(self._read_study_assays()) protocols = {p.name: p for p in self._read_study_protocols()} contacts = tuple(self._read_study_contacts()) # Create study object @@ -470,25 +471,8 @@ def _read_study_assays(self) -> Iterator[models.AssayInfo]: tech_plat, ), ) in enumerate(columns): - if not file_ and any( + if any( ( - meas_type, - meas_type_term_acc, - meas_type_term_src, - tech_type, - tech_type_term_acc, - tech_type_term_src, - tech_plat, - ) - ): - # don't allow assay columns without assay file - tpl = ( - "Found assay with no {} in {}; found: " - '"{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}"' - ) - msg = tpl.format( - investigation_headers.STUDY_ASSAY_FILE_NAME, - investigation_headers.STUDY_ASSAYS, file_, meas_type, meas_type_term_acc, @@ -498,13 +482,17 @@ def _read_study_assays(self) -> Iterator[models.AssayInfo]: tech_type_term_src, tech_plat, ) - raise ParseIsatabException(msg) - elif file_: # if at least a file exists --> AssayInfo + ): meas = models.OntologyTermRef(meas_type, meas_type_term_acc, meas_type_term_src) tech = models.OntologyTermRef(tech_type, tech_type_term_acc, tech_type_term_src) comments = _parse_comments(section, comment_keys, i) yield models.AssayInfo( - meas, tech, tech_plat, Path(file_), comments, list(section.keys()) + meas, + tech, + tech_plat, + Path(file_) if file_ else None, + comments, + list(section.keys()), ) # else, i.e. if all assay fields are empty --> Nothing diff --git a/altamisa/isatab/validate_investigation.py b/altamisa/isatab/validate_investigation.py index a0a4e89..20ff23b 100644 --- a/altamisa/isatab/validate_investigation.py +++ b/altamisa/isatab/validate_investigation.py @@ -11,7 +11,11 @@ from typing import Dict, Tuple import warnings -from ..exceptions import AdvisoryIsaValidationWarning, CriticalIsaValidationWarning +from ..exceptions import ( + AdvisoryIsaValidationWarning, + CriticalIsaValidationWarning, + ModerateIsaValidationWarning, +) from .helpers import is_ontology_term_ref from . import models from .validate_assay_study import OntologyTermRefValidator @@ -71,6 +75,10 @@ class InvestigationValidator: def __init__(self, investigation: models.InvestigationInfo): self._investigation = investigation self._ontology_validator = OntologyTermRefValidator(investigation.ontology_source_refs) + self._study_ids = set() + self._study_paths = set() + self._study_titles = set() + self._assay_paths = set() def validate(self): self._validate_ontology_sources() @@ -89,12 +97,55 @@ def _validate_ontology_sources(self): def _validate_sections(self): self._validate_publications(self._investigation.publications) self._validate_contacts(self._investigation.contacts) + self._validate_studies() + + def _validate_studies(self): + # Check if any study exists + if not self._investigation.studies: + tpl = "No studies declared in investigation: {}" + msg = tpl.format(self._investigation.info.path) + warnings.warn(msg, CriticalIsaValidationWarning) + return for study in self._investigation.studies: + # Validate availability of minimal study information (ids, paths, titles) and + if not (study.info.identifier and study.info.path): + tpl = ( + "Study with incomplete minimal information (ID and path):" + "\nID:\t{}\nTitle:\t{}\nPath:\t{}" + ) + msg = tpl.format(study.info.identifier, study.info.title, study.info.path or "") + warnings.warn(msg, CriticalIsaValidationWarning) + if not study.info.title: + tpl = "Study without title:\nID:\t{}\nTitle:\t{}\nPath:\t{}" + msg = tpl.format(study.info.identifier, study.info.title, study.info.path or "") + warnings.warn(msg, ModerateIsaValidationWarning) + # Assure distinct studies, i.e. unique ids, paths and preferably titles + if study.info.identifier in self._study_ids: + tpl = "Study identifier used more than once: {}" + msg = tpl.format(study.info.identifier) + warnings.warn(msg, CriticalIsaValidationWarning) + else: + self._study_ids.add(study.info.identifier) + if study.info.path: + if study.info.path in self._study_paths: + tpl = "Study path used more than once: {}" + msg = tpl.format(study.info.path or "") + warnings.warn(msg, CriticalIsaValidationWarning) + else: + self._study_paths.add(study.info.path) + if study.info.title: + if study.info.title in self._study_titles: + tpl = "Study title used more than once: {}" + msg = tpl.format(study.info.title) + warnings.warn(msg, ModerateIsaValidationWarning) + else: + self._study_titles.add(study.info.title) + # Validate study sections self._validate_publications(study.publications) self._validate_contacts(study.contacts) self._validate_designs(study.designs) self._validate_factors(study.factors) - self._validate_assays(study.assays) + self._validate_assays(study.assays, study.info.identifier) self._validate_protocols(study.protocols) def _validate_publications(self, publications: Tuple[models.PublicationInfo]): @@ -126,9 +177,50 @@ def _validate_factors(self, factors: Dict[str, models.FactorInfo]): if is_ontology_term_ref(factor.type): self._ontology_validator.validate(factor.type) - def _validate_assays(self, assays: Dict[str, models.AssayInfo]): - # Validate format of specific fields in assays - for assay in assays.values(): + def _validate_assays(self, assays: Tuple[models.AssayInfo], study_id: str): + # Check if any assays exists + if not assays: + tpl = "No assays declared in study '{}' of investigation '{}'" + msg = tpl.format(study_id, self._investigation.info.path) + warnings.warn(msg, CriticalIsaValidationWarning) + return + for assay in assays: + # Validate availability of minimal assay information + # (path, measurement type, technology type and technology platform) + meas_type = ( + assay.measurement_type.name + if is_ontology_term_ref(assay.measurement_type) + else assay.measurement_type + ) + tech_type = ( + assay.technology_type.name + if is_ontology_term_ref(assay.technology_type) + else assay.technology_type + ) + if not (assay.path and meas_type and tech_type): + tpl = ( + "Assay with incomplete minimal information (path, measurement and " + "technology type):\nPath:\t{}\nMeasurement Type:\t{}\nTechnology Type:\t{" + "}\nTechnology Platform:\t{}" + ) + msg = tpl.format(assay.path or "", meas_type, tech_type, assay.platform) + warnings.warn(msg, CriticalIsaValidationWarning) + if not assay.platform: + tpl = ( + "Assay without platform:\nPath:\t{}" + "\nMeasurement Type:\t{}\nTechnology Type:\t{}\nTechnology Platform:\t{}" + ) + msg = tpl.format(assay.path or "", meas_type, tech_type, assay.platform) + warnings.warn(msg, AdvisoryIsaValidationWarning) + # Assure distinct assays, i.e. unique paths + if assay.path: + if assay.path in self._assay_paths: + tpl = "Assay path used more than once: {}" + msg = tpl.format(assay.path or "") + warnings.warn(msg, CriticalIsaValidationWarning) + else: + self._assay_paths.add(assay.path) + # Validate format of specific fields in assays if is_ontology_term_ref(assay.measurement_type): self._ontology_validator.validate(assay.measurement_type) if is_ontology_term_ref(assay.technology_type): diff --git a/altamisa/isatab/write_investigation.py b/altamisa/isatab/write_investigation.py index 8d1d818..e1c83b1 100644 --- a/altamisa/isatab/write_investigation.py +++ b/altamisa/isatab/write_investigation.py @@ -282,7 +282,7 @@ def _write_study_basic_info(self, study: models.StudyInfo): investigation_headers.STUDY_DESCRIPTION: [basic_info.description], investigation_headers.STUDY_SUBMISSION_DATE: [basic_info.submission_date], investigation_headers.STUDY_PUBLIC_RELEASE_DATE: [basic_info.public_release_date], - investigation_headers.STUDY_FILE_NAME: [basic_info.path], + investigation_headers.STUDY_FILE_NAME: [basic_info.path or ""], } comments = _extract_comments([basic_info]) headers = _extract_section_header(basic_info, investigation_headers.STUDY) @@ -371,8 +371,8 @@ def _write_study_factors(self, study: models.StudyInfo): def _write_study_assays(self, study: models.StudyInfo): # Write STUDY ASSAYS section section = _init_multi_column_section(investigation_headers.STUDY_ASSAYS_KEYS) - for assay in study.assays.values(): - section[investigation_headers.STUDY_ASSAY_FILE_NAME].append(assay.path) + for assay in study.assays: + section[investigation_headers.STUDY_ASSAY_FILE_NAME].append(assay.path or "") if is_ontology_term_ref(assay.measurement_type): section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( @@ -418,10 +418,9 @@ def _write_study_assays(self, study: models.StudyInfo): section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_PLATFORM].append(assay.platform) - comments = _extract_comments(study.assays.values()) + comments = _extract_comments(study.assays) headers = _extract_section_header( - list(study.assays.values())[0] if study.assays else None, - investigation_headers.STUDY_ASSAYS, + list(study.assays)[0] if study.assays else None, investigation_headers.STUDY_ASSAYS ) self._write_section(investigation_headers.STUDY_ASSAYS, section, comments, headers) diff --git a/tests/conftest.py b/tests/conftest.py index 44bc054..8d85b1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -239,3 +239,10 @@ def assay_file_exception_invalid_column_type(): ) with open(path, "rt") as file: yield file + + +@pytest.fixture +def only_investigation_file(): + path = os.path.join(os.path.dirname(__file__), "data/i_onlyinvest/i_onlyinvest.txt") + with open(path, "rt") as file: + yield file diff --git a/tests/data/i_assays/i_assays.txt b/tests/data/i_assays/i_assays.txt index a563754..d07874c 100644 --- a/tests/data/i_assays/i_assays.txt +++ b/tests/data/i_assays/i_assays.txt @@ -30,7 +30,7 @@ Investigation Person Roles Investigation Person Roles Term Accession Number Investigation Person Roles Term Source REF STUDY -Study Identifier s_assays_1 +Study Identifier s_assays Study Title Minimal Germline Study Study Description Comment[Study Grant Number] @@ -92,7 +92,7 @@ Study Person Roles Study Person Roles Term Accession Number Study Person Roles Term Source REF STUDY -Study Identifier s_assays_2 +Study Identifier s_assays Study Title Minimal Germline Study Study Description Comment[Study Grant Number] diff --git a/tests/data/i_assays2/i_assays2.txt b/tests/data/i_assays2/i_assays2.txt deleted file mode 100644 index d7d3317..0000000 --- a/tests/data/i_assays2/i_assays2.txt +++ /dev/null @@ -1,155 +0,0 @@ -ONTOLOGY SOURCE REFERENCE -Term Source Name OBI -Term Source File http://data.bioontology.org/ontologies/OBI -Term Source Version 31 -Term Source Description Ontology for Biomedical Investigations -INVESTIGATION -Investigation Identifier i_assays -Investigation Title Minimal Investigation -Investigation Description -Investigation Submission Date -Investigation Public Release Date -INVESTIGATION PUBLICATIONS -Investigation PubMed ID -Investigation Publication DOI -Investigation Publication Author List -Investigation Publication Title -Investigation Publication Status -Investigation Publication Status Term Accession Number -Investigation Publication Status Term Source REF -INVESTIGATION CONTACTS -Investigation Person Last Name -Investigation Person First Name -Investigation Person Mid Initials -Investigation Person Email -Investigation Person Phone -Investigation Person Fax -Investigation Person Address -Investigation Person Affiliation -Investigation Person Roles -Investigation Person Roles Term Accession Number -Investigation Person Roles Term Source REF -STUDY -Study Identifier s_assays_1 -Study Title Minimal Germline Study -Study Description -Comment[Study Grant Number] -Comment[Study Funding Agency] -Study Submission Date -Study Public Release Date -Study File Name s_assays.txt -STUDY DESIGN DESCRIPTORS -Study Design Type -Study Design Type Term Accession Number -Study Design Type Term Source REF -STUDY PUBLICATIONS -Study PubMed ID -Study Publication DOI -Study Publication Author List -Study Publication Title -Study Publication Status -Study Publication Status Term Accession Number -Study Publication Status Term Source REF -STUDY FACTORS -Study Factor Name -Study Factor Type -Study Factor Type Term Accession Number -Study Factor Type Term Source REF -STUDY ASSAYS -Study Assay File Name -Study Assay Measurement Type -Study Assay Measurement Type Term Accession Number -Study Assay Measurement Type Term Source REF -Study Assay Technology Type -Study Assay Technology Type Term Accession Number -Study Assay Technology Type Term Source REF -Study Assay Technology Platform -STUDY PROTOCOLS -Study Protocol Name sample collection nucleic acid sequencing -Study Protocol Type sample collection nucleic acid sequencing -Study Protocol Type Term Accession Number -Study Protocol Type Term Source REF -Study Protocol Description -Study Protocol URI -Study Protocol Version -Study Protocol Parameters Name -Study Protocol Parameters Name Term Accession Number -Study Protocol Parameters Name Term Source REF -Study Protocol Components Name -Study Protocol Components Type -Study Protocol Components Type Term Accession Number -Study Protocol Components Type Term Source REF -STUDY CONTACTS -Study Person Last Name -Study Person First Name -Study Person Mid Initials -Study Person Email -Study Person Phone -Study Person Fax -Study Person Address -Study Person Affiliation -Study Person Roles -Study Person Roles Term Accession Number -Study Person Roles Term Source REF -STUDY -Study Identifier s_assays_2 -Study Title Minimal Germline Study -Study Description -Comment[Study Grant Number] -Comment[Study Funding Agency] -Study Submission Date -Study Public Release Date -Study File Name s_assays.txt -STUDY DESIGN DESCRIPTORS -Study Design Type -Study Design Type Term Accession Number -Study Design Type Term Source REF -STUDY PUBLICATIONS -Study PubMed ID -Study Publication DOI -Study Publication Author List -Study Publication Title -Study Publication Status -Study Publication Status Term Accession Number -Study Publication Status Term Source REF -STUDY FACTORS -Study Factor Name -Study Factor Type -Study Factor Type Term Accession Number -Study Factor Type Term Source REF -STUDY ASSAYS -Study Assay File Name -Study Assay Measurement Type -Study Assay Measurement Type Term Accession Number -Study Assay Measurement Type Term Source REF -Study Assay Technology Type -Study Assay Technology Type Term Accession Number -Study Assay Technology Type Term Source REF -Study Assay Technology Platform -STUDY PROTOCOLS -Study Protocol Name sample collection nucleic acid sequencing -Study Protocol Type sample collection nucleic acid sequencing -Study Protocol Type Term Accession Number -Study Protocol Type Term Source REF -Study Protocol Description -Study Protocol URI -Study Protocol Version -Study Protocol Parameters Name -Study Protocol Parameters Name Term Accession Number -Study Protocol Parameters Name Term Source REF -Study Protocol Components Name -Study Protocol Components Type -Study Protocol Components Type Term Accession Number -Study Protocol Components Type Term Source REF -STUDY CONTACTS -Study Person Last Name -Study Person First Name -Study Person Mid Initials -Study Person Email -Study Person Phone -Study Person Fax -Study Person Address -Study Person Affiliation -Study Person Roles -Study Person Roles Term Accession Number -Study Person Roles Term Source REF diff --git a/tests/data/i_fullinvest2/i_fullinvest2.txt b/tests/data/i_fullinvest2/i_fullinvest2.txt index dea8112..0759fda 100644 --- a/tests/data/i_fullinvest2/i_fullinvest2.txt +++ b/tests/data/i_fullinvest2/i_fullinvest2.txt @@ -40,14 +40,14 @@ Investigation Person Roles Term Source REF ROLEO Comment[Investigation Person ORCID] 12345 0987654321 1357908642 Comment[Investigation Person REF] personA personB personC STUDY -Study Identifier BII-S-1 -Study Title Study of the impact of changes in flux on the transcriptome, proteome, endometabolome and exometabolome of the yeast Saccharomyces cerevisiae under different nutrient limitations +Study Identifier +Study Title Study Description We wished to study the impact of growth rate on the total complement of mRNA molecules, proteins, and metabolites in S. cerevisiae, independent of any nutritional or other physiological effects. To achieve this, we carried out our analyses on yeast grown in steady-state chemostat culture under four different nutrient limitations (glucose, ammonium, phosphate, and sulfate) at three different dilution (that is, growth) rates (D = u = 0.07, 0.1, and 0.2/hour, equivalent to population doubling times (Td) of 10 hours, 7 hours, and 3.5 hours, respectively; u = specific growth rate defined as grams of biomass generated per gram of biomass present per unit time). Comment[Study Grant Number] Comment[Study Funding Agency] Study Submission Date 2007-04-30 Study Public Release Date 2009-03-10 -Study File Name s_BII-S-1.txt +Study File Name Comment[Manuscript Licence] CC BY 3.0 Comment[Experimental Metadata Licence] CC0 Comment[Data Repository] @@ -79,7 +79,7 @@ Study Factor Type Term Accession Number http://purl.obolibrary.org/obo/CHEBI_244 Study Factor Type Term Source REF CHEBI PATO Comment[FactorsTest] 1 2 STUDY ASSAYS -Study Assay File Name a_proteome.txt a_metabolome.txt a_transcriptome.txt +Study Assay File Name a_proteome.txt a_transcriptome.txt Study Assay Measurement Type protein expression profiling metabolite profiling transcription profiling Study Assay Measurement Type Term Accession Number http://purl.obolibrary.org/obo/OBI_0000615 http://purl.obolibrary.org/obo/OBI_0000366 http://purl.obolibrary.org/obo/OBI_0000424 Study Assay Measurement Type Term Source REF OBI OBI OBI diff --git a/tests/data/i_onlyinvest/i_onlyinvest.txt b/tests/data/i_onlyinvest/i_onlyinvest.txt new file mode 100644 index 0000000..ec54a20 --- /dev/null +++ b/tests/data/i_onlyinvest/i_onlyinvest.txt @@ -0,0 +1,42 @@ +ONTOLOGY SOURCE REFERENCE +Term Source Name NCBITAXON OBI_BCGO PATO OBI UO CL CHEBI EFO ROLEO +Term Source File http://data.bioontology.org/ontologies/NCBITAXON http://data.bioontology.org/ontologies/OBI_BCGO http://data.bioontology.org/ontologies/PATO http://data.bioontology.org/ontologies/OBI http://data.bioontology.org/ontologies/UO http://data.bioontology.org/ontologies/CL http://data.bioontology.org/ontologies/CHEBI http://data.bioontology.org/ontologies/EFO http://data.bioontology.org/ontologies/ROLEO +Term Source Version 2 8 160 21 42 43 78 158 1 +Term Source Description National Center for Biotechnology Information (NCBI) Organismal Classification Beta Cell Genomics Ontology Phenotypic Quality Ontology Ontology for Biomedical Investigations Units of Measurement Ontology Cell Ontology Chemical Entities of Biological Interest Ontology Experimental Factor Ontology Role Ontology +Comment[Test] 1 2 3 4 5 6 7 8 9 +INVESTIGATION +Investigation Identifier BII-I-1 +Investigation Title Growth control of the eukaryote cell: a systems biology study in yeast +Investigation Description Background Cell growth underlies many key cellular and developmental processes, yet a limited number of studies have been carried out on cell-growth regulation. Comprehensive studies at the transcriptional, proteomic and metabolic levels under defined controlled conditions are currently lacking. Results Metabolic control analysis is being exploited in a systems biology study of the eukaryotic cell. Using chemostat culture, we have measured the impact of changes in flux (growth rate) on the transcriptome, proteome, endometabolome and exometabolome of the yeast Saccharomyces cerevisiae. Each functional genomic level shows clear growth-rate-associated trends and discriminates between carbon-sufficient and carbon-limited conditions. Genes consistently and significantly upregulated with increasing growth rate are frequently essential and encode evolutionarily conserved proteins of known function that participate in many protein-protein interactions. In contrast, more unknown, and fewer essential, genes are downregulated with increasing growth rate; their protein products rarely interact with one another. A large proportion of yeast genes under positive growth-rate control share orthologs with other eukaryotes, including humans. Significantly, transcription of genes encoding components of the TOR complex (a major controller of eukaryotic cell growth) is not subject to growth-rate regulation. Moreover, integrative studies reveal the extent and importance of post-transcriptional control, patterns of control of metabolic fluxes at the level of enzyme synthesis, and the relevance of specific enzymatic reactions in the control of metabolic fluxes during cell growth. Conclusion This work constitutes a first comprehensive systems biology study on growth-rate control in the eukaryotic cell. The results have direct implications for advanced studies on cell growth, in vivo regulation of metabolic fluxes for comprehensive metabolic engineering, and for the design of genome-scale systems biology models of the eukaryotic cell. +Investigation Submission Date 2007-04-30 +Investigation Public Release Date 2009-03-10 +Comment[Created With Configuration] +Comment[Last Opened With Configuration] +Comment[Owning Organisation URI] +Comment[Consortium URI] +Comment[Principal Investigator URI] +Comment[Investigation Keywords] +INVESTIGATION PUBLICATIONS +Investigation PubMed ID 17439666 1231222 1234121 +Investigation Publication DOI doi:10.1186/jbiol54 +Investigation Publication Author List Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, Oliver SG. Piatnochka IT. Monticelli G, Santori S. +Investigation Publication Title Growth control of the eukaryote cell: a systems biology study in yeast. Effect of prednisolone on the cardiovascular system in complex treatment of newly detected pulmonary tuberculosis Indications for the use of prostheses in the treatment of pathological fractures due to primary malignant and metastatic tumours of bone. +Comment[Subtitle] Something +Investigation Publication Status indexed in Pubmed published Published +Investigation Publication Status Term Accession Number http://www.ebi.ac.uk/efo/EFO_0001796 +Investigation Publication Status Term Source REF EFO +INVESTIGATION CONTACTS +Investigation Person Last Name Oliver Juan Leo +Investigation Person First Name Stephen Castrillo Zeef +Investigation Person Mid Initials G I A +Investigation Person Email stephen.oliver@test.mail +Investigation Person Phone 123456789 +Investigation Person Fax +49 123456789 +Investigation Person Address Oxford Road, Manchester M13 9PT, UK Oxford Road, Manchester M13 9PT, UK Oxford Road, Manchester M13 9PT, UK +Investigation Person Affiliation Faculty of Life Sciences, Michael Smith Building, University of Manchester Faculty of Life Sciences, Michael Smith Building, University of Manchester Faculty of Life Sciences, Michael Smith Building, University of Manchester +Investigation Person Roles corresponding author author author +Investigation Person Roles Term Accession Number http://purl.obolibrary.org/obo/RoleO_0000061 +Investigation Person Roles Term Source REF ROLEO +Comment[Investigation Person ORCID] 12345 0987654321 1357908642 +Comment[Investigation Person REF] personA personB personC + diff --git a/tests/test_parse_assay.py b/tests/test_parse_assay.py index 76cd709..3dcf125 100644 --- a/tests/test_parse_assay.py +++ b/tests/test_parse_assay.py @@ -94,7 +94,11 @@ def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_fi """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream(minimal_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", minimal_assay_file) @@ -103,10 +107,7 @@ def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_fi # Read and validate assay assay = reader.read() AssayValidator( - investigation, - investigation.studies[0], - investigation.studies[0].assays["a_minimal.txt"], - assay, + investigation, investigation.studies[0], investigation.studies[0].assays[0], assay ).validate() # Check results @@ -434,7 +435,11 @@ def test_assay_reader_small_assay(small_investigation_file, small_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream(small_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small_assay_file) @@ -444,10 +449,7 @@ def test_assay_reader_small_assay(small_investigation_file, small_assay_file): with pytest.warns(IsaWarning) as record: assay = reader.read() AssayValidator( - investigation, - investigation.studies[0], - investigation.studies[0].assays["a_small.txt"], - assay, + investigation, investigation.studies[0], investigation.studies[0].assays[0], assay ).validate() # Check warnings @@ -651,10 +653,7 @@ def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file) # Read assay assay = reader.read() AssayValidator( - investigation, - investigation.studies[0], - investigation.studies[0].assays["a_small2.txt"], - assay, + investigation, investigation.studies[0], investigation.studies[0].assays[0], assay ).validate() # Check results @@ -764,16 +763,11 @@ def test_assay_reader_gelelect(gelelect_investigation_file, gelelect_assay_file) # Read assay assay = reader.read() AssayValidator( - investigation, - investigation.studies[0], - investigation.studies[0].assays[ - "a_study01_protein_expression_profiling_gel_electrophoresis.txt" - ], - assay, + investigation, investigation.studies[0], investigation.studies[0].assays[0], assay ).validate() # Check warnings - assert 3 == len(record) + assert 4 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( diff --git a/tests/test_parse_investigation.py b/tests/test_parse_investigation.py index d59bbed..514ed6c 100644 --- a/tests/test_parse_investigation.py +++ b/tests/test_parse_investigation.py @@ -4,8 +4,14 @@ from datetime import date from pathlib import Path +import pytest from altamisa.constants import investigation_headers +from altamisa.exceptions import ( + IsaWarning, + CriticalIsaValidationWarning, + ModerateIsaValidationWarning, +) from altamisa.isatab import models from altamisa.isatab import InvestigationReader, InvestigationValidator @@ -14,7 +20,11 @@ def test_parse_minimal_investigation(minimal_investigation_file): # Read Investigation from file-like object reader = InvestigationReader.from_stream(minimal_investigation_file) investigation = reader.read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Check results # Investigation @@ -44,7 +54,7 @@ def test_parse_minimal_investigation(minimal_investigation_file): # Assays assert len(investigation.studies[0].assays) == 1 - assay = investigation.studies[0].assays["a_minimal.txt"] + assay = investigation.studies[0].assays[0] assert Path("a_minimal.txt") == assay.path # Study contacts @@ -55,7 +65,11 @@ def test_parse_small_investigation(small_investigation_file): # Read Investigation from file-like object reader = InvestigationReader.from_stream(small_investigation_file) investigation = reader.read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Check results # Investigation @@ -103,7 +117,7 @@ def test_parse_small_investigation(small_investigation_file): # Assays assert len(investigation.studies[0].assays) == 1 - assay = investigation.studies[0].assays["a_small.txt"] + assay = investigation.studies[0].assays[0] assert Path("a_small.txt") == assay.path # Study contacts @@ -348,7 +362,7 @@ def test_parse_full_investigation(full_investigation_file): (models.Comment("Extra Info", "a"),), expected_headers, ) - assert expected == study.assays["a_proteome.txt"] + assert expected == study.assays[0] expected = models.AssayInfo( models.OntologyTermRef( "transcription profiling", "http://purl.obolibrary.org/obo/OBI_0000424", "OBI" @@ -361,7 +375,7 @@ def test_parse_full_investigation(full_investigation_file): (models.Comment("Extra Info", "c"),), expected_headers, ) - assert expected == study.assays["a_transcriptome.txt"] + assert expected == study.assays[2] # Study 1 - Protocols assert 7 == len(study.protocols) @@ -566,7 +580,7 @@ def test_parse_full_investigation(full_investigation_file): (), [*investigation_headers.STUDY_ASSAYS_KEYS], ) - assert expected == study.assays["a_microarray.txt"] + assert expected == study.assays[0] # Study 2 - Protocols assert 10 == len(study.protocols) @@ -784,7 +798,7 @@ def test_parse_comment_investigation(comment_investigation_file): *investigation_headers.STUDY_ASSAYS_KEYS[5:], ], ) - assert expected == study.assays["a_transcriptome.txt"] + assert expected == study.assays[2] # Study 1 - Protocols assert 7 == len(study.protocols) @@ -878,7 +892,23 @@ def test_parse_assays_investigation(assays_investigation_file): # Read Investigation from file-like object reader = InvestigationReader.from_stream(assays_investigation_file) investigation = reader.read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 5 == len(record) + msg = "No assays declared in study 's_assays' of investigation 'i_assays.txt'" + assert record[0].category == CriticalIsaValidationWarning + assert str(record[0].message) == msg + msg = "Study identifier used more than once: s_assays" + assert record[1].category == CriticalIsaValidationWarning + assert str(record[1].message) == msg + msg = "Study path used more than once: s_assays.txt" + assert record[2].category == CriticalIsaValidationWarning + assert str(record[2].message) == msg + msg = "Study title used more than once: Minimal Germline Study" + assert record[3].category == ModerateIsaValidationWarning + assert str(record[3].message) == msg # Check results # Investigation @@ -890,3 +920,17 @@ def test_parse_assays_investigation(assays_investigation_file): # Assays assert 0 == len(investigation.studies[0].assays) assert 0 == len(investigation.studies[1].assays) + + +def test_parse_only_investigation(only_investigation_file): + # Read Investigation from file-like object + reader = InvestigationReader.from_stream(only_investigation_file) + investigation = reader.read() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) + msg = "No studies declared in investigation: i_onlyinvest.txt" + assert record[0].category == CriticalIsaValidationWarning + assert str(record[0].message) == msg diff --git a/tests/test_parse_study.py b/tests/test_parse_study.py index ccc89e8..77046c8 100644 --- a/tests/test_parse_study.py +++ b/tests/test_parse_study.py @@ -4,8 +4,10 @@ from datetime import date import os +import pytest from altamisa.constants import table_headers +from altamisa.exceptions import IsaWarning from altamisa.isatab import models from altamisa.isatab import ( InvestigationReader, @@ -73,7 +75,11 @@ def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_fi """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream(minimal_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", minimal_study_file) @@ -313,8 +319,12 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file def test_study_reader_small_study(small_investigation_file, small_study_file): """Use ``StudyReader`` to read in small study file.""" # Load investigation (tested elsewhere) - investigation = InvestigationReader.from_stream(small_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + investigation = InvestigationReader.from_stream(small_investigation_file).read() + InvestigationValidator(investigation).validate() + + # Check warnings + assert 1 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", small_study_file) diff --git a/tests/test_write_assay.py b/tests/test_write_assay.py index a5f422a..2fab3be 100644 --- a/tests/test_write_assay.py +++ b/tests/test_write_assay.py @@ -6,7 +6,12 @@ import os import pytest -from altamisa.exceptions import IsaWarning, ModerateIsaValidationWarning, ParseIsatabWarning +from altamisa.exceptions import ( + AdvisoryIsaValidationWarning, + IsaWarning, + ModerateIsaValidationWarning, + ParseIsatabWarning, +) from altamisa.isatab import ( InvestigationReader, InvestigationValidator, @@ -25,7 +30,7 @@ def _parse_write_assert_assay(investigation_file, tmp_path, quote=None, normaliz directory = os.path.normpath(os.path.dirname(investigation_file.name)) # Iterate assays for s, study_info in enumerate(investigation.studies): - for a, assay_info in enumerate(study_info.assays.values()): + for a, assay_info in enumerate(study_info.assays): if skip and str(assay_info.path) in skip: continue # Load assay @@ -59,11 +64,17 @@ def _parse_write_assert_assay(investigation_file, tmp_path, quote=None, normaliz def test_assay_writer_minimal_assay(minimal_investigation_file, tmp_path): - _parse_write_assert_assay(minimal_investigation_file, tmp_path) + with pytest.warns(IsaWarning) as record: + _parse_write_assert_assay(minimal_investigation_file, tmp_path) + # Check warnings + assert 1 == len(record) def test_assay_writer_minimal2_assay(minimal2_investigation_file, tmp_path): - _parse_write_assert_assay(minimal2_investigation_file, tmp_path) + with pytest.warns(IsaWarning) as record: + _parse_write_assert_assay(minimal2_investigation_file, tmp_path) + # Check warnings + assert 1 == len(record) def test_assay_writer_small_assay(small_investigation_file, tmp_path): @@ -71,13 +82,21 @@ def test_assay_writer_small_assay(small_investigation_file, tmp_path): _parse_write_assert_assay(small_investigation_file, tmp_path) # Check warnings - assert 1 == len(record) + assert 2 == len(record) + msg = ( + "Assay without platform:\nPath:\ta_small.txt" + "\nMeasurement Type:\texome sequencing assay" + "\nTechnology Type:\tnucleotide sequencing" + "\nTechnology Platform:\t" + ) + assert record[0].category == AdvisoryIsaValidationWarning + assert str(record[0].message) == msg msg = ( "Can't validate parameter values and names for process with undeclared protocol " '"Unknown" and name type "Data Transformation Name"' ) - assert record[0].category == ModerateIsaValidationWarning - assert str(record[0].message) == msg + assert record[1].category == ModerateIsaValidationWarning + assert str(record[1].message) == msg def test_assay_writer_small2_assay(small2_investigation_file, tmp_path): @@ -145,12 +164,15 @@ def test_assay_writer_gelelect(gelelect_investigation_file, tmp_path): with pytest.warns(IsaWarning) as record: _parse_write_assert_assay(gelelect_investigation_file, tmp_path, quote='"') # Check warnings - assert 3 == len(record) + assert 4 == len(record) msg = "Skipping empty ontology source: , , , " assert record[0].category == ParseIsatabWarning assert str(record[0].message) == msg - msg = '"Normalization Name" not supported by protocol type "normalization" (only "data normalization")' + msg = "Study without title:\nID:\tstudy01\nTitle:\t\nPath:\ts_study01.txt" assert record[1].category == ModerateIsaValidationWarning assert str(record[1].message) == msg + msg = '"Normalization Name" not supported by protocol type "normalization" (only "data normalization")' assert record[2].category == ModerateIsaValidationWarning assert str(record[2].message) == msg + assert record[3].category == ModerateIsaValidationWarning + assert str(record[3].message) == msg diff --git a/tests/test_write_investigation.py b/tests/test_write_investigation.py index cbb1897..8eab324 100644 --- a/tests/test_write_investigation.py +++ b/tests/test_write_investigation.py @@ -5,17 +5,26 @@ import filecmp import pytest -from altamisa.exceptions import IsaWarning, ParseIsatabWarning, WriteIsatabWarning +from altamisa.exceptions import ( + CriticalIsaValidationWarning, + IsaWarning, + ModerateIsaValidationWarning, + ParseIsatabWarning, + WriteIsatabWarning, +) from altamisa.isatab import InvestigationReader, InvestigationWriter, InvestigationValidator # Tests with one-time reading and writing -def test_parse_minimal_investigation(minimal_investigation_file, tmp_path): +def test_write_minimal_investigation(minimal_investigation_file, tmp_path): # Read Investigation from file-like object - investigation = InvestigationReader.from_stream(minimal_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + investigation = InvestigationReader.from_stream(minimal_investigation_file).read() + InvestigationValidator(investigation).validate() + # Check warnings + assert 1 == len(record) # Write Investigation to temporary file path = tmp_path / "i_minimal.txt" with pytest.warns(IsaWarning) as record: @@ -30,10 +39,13 @@ def test_parse_minimal_investigation(minimal_investigation_file, tmp_path): assert filecmp.cmp(minimal_investigation_file.name, path, shallow=False) -def test_parse_small_investigation(small_investigation_file, tmp_path): +def test_write_small_investigation(small_investigation_file, tmp_path): # Read Investigation from file-like object - investigation = InvestigationReader.from_stream(small_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + investigation = InvestigationReader.from_stream(small_investigation_file).read() + InvestigationValidator(investigation).validate() + # Check warnings + assert 1 == len(record) # Write Investigation to temporary file path = tmp_path / "i_small.txt" with pytest.warns(IsaWarning) as record: @@ -50,7 +62,7 @@ def test_parse_small_investigation(small_investigation_file, tmp_path): assert filecmp.cmp(small_investigation_file.name, path, shallow=False) -def test_parse_comment_investigation(comment_investigation_file, tmp_path): +def test_write_comment_investigation(comment_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream(comment_investigation_file).read() InvestigationValidator(investigation).validate() @@ -62,10 +74,28 @@ def test_parse_comment_investigation(comment_investigation_file, tmp_path): assert filecmp.cmp(comment_investigation_file.name, path, shallow=False) -def test_parse_full2_investigation(full2_investigation_file, tmp_path): +def test_write_full2_investigation(full2_investigation_file, tmp_path): # Read Investigation from file-like object - investigation = InvestigationReader.from_stream(full2_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + investigation = InvestigationReader.from_stream(full2_investigation_file).read() + InvestigationValidator(investigation).validate() + # Check warnings + assert 3 == len(record) + msg = "Study with incomplete minimal information (ID and path):\nID:\t\nTitle:\t\nPath:\t" + assert record[0].category == CriticalIsaValidationWarning + assert str(record[0].message) == msg + msg = "Study without title:\nID:\t\nTitle:\t\nPath:\t" + assert record[1].category == ModerateIsaValidationWarning + assert str(record[1].message) == msg + msg = ( + "Assay with incomplete minimal information (path, measurement and technology type):\n" + "Path:\t\n" + "Measurement Type:\tmetabolite profiling\n" + "Technology Type:\tmass spectrometry\n" + "Technology Platform:\tLC-MS/MS" + ) + assert record[2].category == CriticalIsaValidationWarning + assert str(record[2].message) == msg # Write Investigation to temporary file path = tmp_path / "i_fullinvest2.txt" with open(path, "wt") as file: @@ -74,22 +104,7 @@ def test_parse_full2_investigation(full2_investigation_file, tmp_path): assert filecmp.cmp(full2_investigation_file.name, path, shallow=False) -def test_parse_assays2_investigation(assays2_investigation_file, tmp_path): - # Read Investigation from file-like object - investigation = InvestigationReader.from_stream(assays2_investigation_file).read() - InvestigationValidator(investigation).validate() - # Write Investigation to temporary file - path = tmp_path / "i_assays2.txt" - with pytest.warns(IsaWarning) as record: - with open(path, "wt") as file: - InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() - # Check warnings - assert 12 == len(record) - # Compare input and output - assert filecmp.cmp(assays2_investigation_file.name, path, shallow=False) - - -def test_parse_BII_I_2_investigation(BII_I_2_investigation_file, tmp_path): +def test_write_BII_I_2_investigation(BII_I_2_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream(BII_I_2_investigation_file).read() InvestigationValidator(investigation).validate() @@ -104,10 +119,13 @@ def test_parse_BII_I_2_investigation(BII_I_2_investigation_file, tmp_path): # Tests with second reading and writing for normalization -def test_parse_assays_investigation(assays_investigation_file, tmp_path): +def test_write_assays_investigation(assays_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream(assays_investigation_file).read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + # Check warnings + assert 5 == len(record) # Write Investigation to temporary file path1 = tmp_path / "i_assays.txt" with pytest.warns(IsaWarning) as record: @@ -119,7 +137,10 @@ def test_parse_assays_investigation(assays_investigation_file, tmp_path): with open(path1, "rt") as file: reader = InvestigationReader.from_stream(file) investigation = reader.read() - InvestigationValidator(investigation).validate() + with pytest.warns(IsaWarning) as record: + InvestigationValidator(investigation).validate() + # Check warnings + assert 5 == len(record) # Write Investigation to second temporary file path2 = tmp_path / "i_assays_2.txt" with pytest.warns(IsaWarning) as record: @@ -131,7 +152,7 @@ def test_parse_assays_investigation(assays_investigation_file, tmp_path): assert filecmp.cmp(path1, path2, shallow=False) -def test_parse_full_investigation(full_investigation_file, tmp_path): +def test_write_full_investigation(full_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream(full_investigation_file).read() InvestigationValidator(investigation).validate() @@ -151,7 +172,7 @@ def test_parse_full_investigation(full_investigation_file, tmp_path): assert filecmp.cmp(path1, path2, shallow=False) -def test_parse_BII_I_1_investigation(BII_I_1_investigation_file, tmp_path): +def test_write_BII_I_1_investigation(BII_I_1_investigation_file, tmp_path): # Read Investigation from file-like object with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream(BII_I_1_investigation_file).read() diff --git a/tests/test_write_study.py b/tests/test_write_study.py index 4835858..e8ba014 100644 --- a/tests/test_write_study.py +++ b/tests/test_write_study.py @@ -6,7 +6,7 @@ import pytest import os -from altamisa.exceptions import IsaWarning, ParseIsatabWarning +from altamisa.exceptions import ModerateIsaValidationWarning, IsaWarning, ParseIsatabWarning from altamisa.isatab import ( InvestigationReader, InvestigationValidator, @@ -43,15 +43,24 @@ def _parse_write_assert(investigation_file, tmp_path, quote=None): def test_study_writer_minimal(minimal_investigation_file, tmp_path): - _parse_write_assert(minimal_investigation_file, tmp_path) + with pytest.warns(IsaWarning) as record: + _parse_write_assert(minimal_investigation_file, tmp_path) + # Check warnings + assert 1 == len(record) def test_study_writer_minimal2(minimal2_investigation_file, tmp_path): - _parse_write_assert(minimal2_investigation_file, tmp_path) + with pytest.warns(IsaWarning) as record: + _parse_write_assert(minimal2_investigation_file, tmp_path) + # Check warnings + assert 1 == len(record) def test_study_writer_small(small_investigation_file, tmp_path): - _parse_write_assert(small_investigation_file, tmp_path) + with pytest.warns(IsaWarning) as record: + _parse_write_assert(small_investigation_file, tmp_path) + # Check warnings + assert 1 == len(record) def test_study_writer_small2(small2_investigation_file, tmp_path): @@ -69,7 +78,10 @@ def test_study_writer_gelelect(gelelect_investigation_file, tmp_path): with pytest.warns(IsaWarning) as record: _parse_write_assert(gelelect_investigation_file, tmp_path, quote='"') # Check warnings - assert 1 == len(record) + assert 2 == len(record) msg = "Skipping empty ontology source: , , , " assert record[0].category == ParseIsatabWarning assert str(record[0].message) == msg + msg = "Study without title:\nID:\tstudy01\nTitle:\t\nPath:\ts_study01.txt" + assert record[1].category == ModerateIsaValidationWarning + assert str(record[1].message) == msg