Skip to content

Commit

Permalink
Merge branch 'master' into factor_value_lists
Browse files Browse the repository at this point in the history
  • Loading branch information
sellth committed Jan 16, 2024
2 parents 818b394 + 2572456 commit 09c300f
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 11 deletions.
14 changes: 10 additions & 4 deletions altamisa/isatab/parse_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ def from_stream(cls, study_id: str, input_file: TextIO, filename: Optional[str]
def __init__(self, study_id: str, input_file: TextIO, filename: Optional[str]):
self.study_id = study_id
self.input_file = input_file
self.filename = filename or getattr(input_file, "name", "<no file>")
self._filename = filename or getattr(input_file, "name", "<no file>")
self.unique_rows = set()
self.duplicate_rows = []
self._reader = csv.reader(input_file, delimiter="\t", quotechar='"')
Expand Down Expand Up @@ -848,6 +848,9 @@ def _read_next_line(self):
self.unique_rows.add("\t".join(self._line))
except StopIteration:
self._line = None
except UnicodeDecodeError as e: # pragma: no cover
msg = f"Invalid encoding of study file '{self._filename}' (use Unicode/UTF-8)."
raise ParseIsatabException(msg) from e
return prev_line

def read(self):
Expand All @@ -856,7 +859,7 @@ def read(self):
:returns: Nodes per row of the study file
"""
builder = _StudyRowBuilder(self.header, self.filename, self.study_id)
builder = _StudyRowBuilder(self.header, self._filename, self.study_id)
while True:
line = self._read_next_line()
if line:
Expand Down Expand Up @@ -939,7 +942,7 @@ def __init__(self, study_id: str, assay_id: str, input_file: TextIO, filename: O
self.study_id = study_id
self.assay_id = assay_id
self.input_file = input_file
self.filename = filename or getattr(input_file, "name", "<no file>")
self._filename = filename or getattr(input_file, "name", "<no file>")
self.unique_rows = set()
self.duplicate_rows = []
self._reader = csv.reader(input_file, delimiter="\t", quotechar='"')
Expand Down Expand Up @@ -970,6 +973,9 @@ def _read_next_line(self):
self.unique_rows.add("\t".join(self._line))
except StopIteration:
self._line = None
except UnicodeDecodeError as e: # pragma: no cover
msg = f"Invalid encoding of assay file '{self._filename}' (use Unicode/UTF-8)."
raise ParseIsatabException(msg) from e
return prev_line

def read(self):
Expand All @@ -978,7 +984,7 @@ def read(self):
:return: Nodes per row of the assay file
"""
builder = _AssayRowBuilder(self.header, self.filename, self.study_id, self.assay_id)
builder = _AssayRowBuilder(self.header, self._filename, self.study_id, self.assay_id)
while True:
line = self._read_next_line()
if line:
Expand Down
5 changes: 4 additions & 1 deletion altamisa/isatab/parse_investigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def _read_next_line(self) -> Optional[List[str]]:
self._line = list_strip(next(self._reader))
except StopIteration:
self._line = None
except UnicodeDecodeError as e: # pragma: no cover
msg = f"Invalid encoding of investigation file '{self._filename}' (use Unicode/UTF-8)."
raise ParseIsatabException(msg) from e
return prev_line

def _next_line_startswith_comment(self):
Expand Down Expand Up @@ -366,7 +369,7 @@ def _read_studies(self) -> Iterator[models.StudyInfo]:
line = self._read_next_line()
if not line or not line[0] == investigation_headers.STUDY: # pragma: no cover
tpl = "Expected {} but got {}"
msg = tpl.format(investigation_headers.INVESTIGATION, line)
msg = tpl.format(investigation_headers.STUDY, line)
raise ParseIsatabException(msg)
# Read the other lines in this section.
section, comment_keys = self._read_single_column_section(
Expand Down
2 changes: 2 additions & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ flake8 >=3.5.0
isort

pyright

syrupy
65 changes: 65 additions & 0 deletions tests/__snapshots__/test_apps.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# serializer version: 1
# name: test_isatab2isatab
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: i_minimal.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
'No reference headers available for section INVESTIGATION PUBLICATIONS. Applying default order.',
'No reference headers available for section INVESTIGATION CONTACTS. Applying default order.',
'No reference headers available for section STUDY DESIGN DESCRIPTORS. Applying default order.',
'No reference headers available for section STUDY PUBLICATIONS. Applying default order.',
'No reference headers available for section STUDY FACTORS. Applying default order.',
'No reference headers available for section STUDY CONTACTS. Applying default order.',
])
# ---
# name: test_isatab2isatab_input_is_output
'<Result IsaException("Can\'t output ISA-tab files to same directory as as input: /home/runner/work/altamisa/tests/data/i_minimal == /home/runner/work/altamisa/tests/data/i_minimal")>'
# ---
# name: test_isatab_validate
list([
'Incomplete ontology source; found: , Incomplete 1, 1, Incomplete 1, ()',
'Incomplete ontology source; found: Incomplete 2, , 2, Incomplete 2, ()',
'Ontology source name including whitespace(s); found: Incomplete 2, , 2, Incomplete 2, ()',
'Incomplete ontology source; found: Incomplete 3, Incomplete 3, , Incomplete 3, ()',
'Ontology source name including whitespace(s); found: Incomplete 3, Incomplete 3, , Incomplete 3, ()',
'Incomplete ontology source; found: Incomplete 4, Incomplete 4, 4, , ()',
'Ontology source name including whitespace(s); found: Incomplete 4, Incomplete 4, 4, , ()',
'''
Investigation with only one study contains metadata:
ID: i_warnings
Title: Investigation with Warnings
Path: i_warnings.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'Invalid mail address: invalid_mail',
'Invalid phone/fax number: CALL-ME',
'Invalid phone/fax number: FAX-ME',
'Invalid pubmed_id string: not-pubmed',
'Invalid doi string: not-a-doi',
'''
Assay without platform:
Path: a_warnings.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
'Assay path used more than once: a_warnings.txt',
"Found samples in assay 'a_warnings.txt' but not in parent study 's_warnings.txt':\\n0815-N2",
"Found samples in assay 'a_warnings.txt' but not in parent study 's_warnings.txt':\\n0815-N2",
])
# ---
15 changes: 9 additions & 6 deletions tests/test_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os.path

import pytest
from syrupy.assertion import SnapshotAssertion
from typer.testing import CliRunner

from altamisa.apps import isatab2dot, isatab2isatab, isatab_validate
Expand All @@ -12,18 +13,18 @@
runner = CliRunner()


def test_isatab_validate():
def test_isatab_validate(snapshot: SnapshotAssertion):
i_file = os.path.join(os.path.dirname(__file__), "data", "i_warnings", "i_warnings.txt")
argv = ["--input-investigation-file", i_file, "--show-duplicate-warnings"]

with pytest.warns(IsaWarning) as record:
result = runner.invoke(isatab_validate.app, argv)
assert result.exit_code == 0

assert 17 == len(record)
assert snapshot == [str(r.message) for r in record]


def test_isatab2isatab(tmpdir):
def test_isatab2isatab(tmpdir, snapshot: SnapshotAssertion):
i_file = os.path.join(os.path.dirname(__file__), "data", "i_minimal", "i_minimal.txt")
argv = [
"--input-investigation-file",
Expand All @@ -38,10 +39,10 @@ def test_isatab2isatab(tmpdir):
result = runner.invoke(isatab2isatab.app, argv)
assert result.exit_code == 0

assert 8 == len(record)
assert snapshot == [str(r.message) for r in record]


def test_isatab2isatab_input_is_output(tmpdir):
def test_isatab2isatab_input_is_output(tmpdir, snapshot: SnapshotAssertion):
i_file = os.path.join(os.path.dirname(__file__), "data", "i_minimal", "i_minimal.txt")
argv = [
"--input-investigation-file",
Expand All @@ -54,7 +55,9 @@ def test_isatab2isatab_input_is_output(tmpdir):

result = runner.invoke(isatab2isatab.app, argv)
assert result.exit_code == 1
assert "Can't output ISA-tab files to same directory as as input" in str(result)
assert snapshot == str(result).replace(
os.path.dirname(__file__), "/home/runner/work/altamisa/tests"
)


def test_isatab2dot(tmpdir):
Expand Down

0 comments on commit 09c300f

Please sign in to comment.