Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests for cli #153

Merged
merged 6 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions bia-ingest-shared-models/bia_ingest_sm/biostudies.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class Submission(BaseModel):
attributes: List[Attribute]

def as_tsv(self) -> str:
tsv_rep = f"Submission"
tsv_rep = "Submission"
if self.accno:
tsv_rep += f"\t{self.accno}"
tsv_rep += "\n"
Expand Down Expand Up @@ -136,7 +136,6 @@ class QueryResult(BaseModel):


def load_submission(accession_id: str) -> Submission:

url = STUDY_URL_TEMPLATE.format(accession=accession_id)
logger.info(f"Fetching submission from {url}")
headers = {
Expand All @@ -146,19 +145,18 @@ def load_submission(accession_id: str) -> Submission:

assert r.status_code == 200

submission = Submission.parse_raw(r.content)
submission = Submission.model_validate_json(r.content)

return submission


def attributes_to_dict(
attributes: List[Attribute],
) -> Dict[str, Optional[str | List[str]]]:

attr_dict = {}
for attr in attributes:
if attr.name in attr_dict:
if type(attr_dict[attr.name]) is list:
if isinstance(attr_dict[attr.name], list):
attr_dict[attr.name].append(attr.value)
else:
attr_dict[attr.name] = [
Expand All @@ -175,7 +173,7 @@ def find_file_lists_in_section(
) -> List[Dict[str, Union[str, None, List[str]]]]:
"""
Find all of the File Lists in a Section, recursively descending through the subsections.

Return a list of dictionaries.
"""

Expand Down Expand Up @@ -206,14 +204,12 @@ def find_file_lists_in_section(
def find_file_lists_in_submission(
submission: Submission,
) -> List[Dict[str, Union[str, None, List[str]]]]:

return find_file_lists_in_section(submission.section, [])


def flist_from_flist_fname(
accession_id: str, flist_fname: str, extra_attribute: Union[List[str], str] = None
) -> List[File]:

flist_url = FLIST_URI_TEMPLATE.format(
accession_id=accession_id, flist_fname=flist_fname
)
Expand All @@ -231,7 +227,7 @@ def flist_from_flist_fname(
fl = TypeAdapter(List[File]).validate_json(filtered_content)

if extra_attribute:
if type(extra_attribute) is not list:
if not isinstance(extra_attribute, list):
extra_attribute = [
extra_attribute,
]
Expand Down Expand Up @@ -269,7 +265,6 @@ def get_file_uri_template_for_accession(accession_id: str) -> str:


def find_files_in_submission_file_lists(submission: Submission) -> List[File]:

file_list_dicts = find_file_lists_in_submission(submission)
file_lists = []
for file_list_dict in file_list_dicts:
Expand Down Expand Up @@ -298,7 +293,6 @@ def find_files_in_submission(submission: Submission) -> List[File]:
all_files = find_files_in_submission_file_lists(submission)

def descend_and_find_files(section, files_list=[]):

section_type = type(section)
if section_type == Section:
for file in section.files:
Expand Down
26 changes: 21 additions & 5 deletions bia-ingest-shared-models/test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from unittest.mock import Mock
from typing import Dict
from pathlib import Path
import json
import pytest
from bia_ingest_sm.biostudies import Submission
from bia_ingest_sm.biostudies import Submission, requests
from .utils import accession_id
from bia_ingest_sm.cli_logging import ObjectValidationResult


@pytest.fixture
def base_path() -> Path:
"""Return full path to test directory

"""
"""Return full path to test directory"""
return Path(__file__).parent


Expand All @@ -23,4 +24,19 @@ def test_submission(base_path: Path) -> Submission:

@pytest.fixture
def result_summary():
return {accession_id: ObjectValidationResult()}
return {accession_id: ObjectValidationResult()}


@pytest.fixture
def mock_request_get(monkeypatch):
"""Requests.get mocked to read file from disk"""

def _mock_request_get(flist_url: str) -> Dict[str, str]:
data_dir = Path(__file__).parent / "data"
path_to_load = data_dir / Path(flist_url).name
return_value = Mock()
return_value.status_code = 200
return_value.content = path_to_load.read_text()
return return_value

monkeypatch.setattr(requests, "get", _mock_request_get)
71 changes: 71 additions & 0 deletions bia-ingest-shared-models/test/test_bia_ingest_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typer.testing import CliRunner
from bia_ingest_sm import cli
from bia_ingest_sm.conversion.utils import settings
from bia_ingest_sm import biostudies
from . import utils
from bia_shared_datamodels import bia_data_model

runner = CliRunner()

accession_id = "S-BIADTEST"

expected_objects_dict = {
"studies": utils.get_test_study(),
"experimental_imaging_dataset": utils.get_test_experimental_imaging_dataset(),
"specimens": utils.get_test_specimen(),
"biosamples": utils.get_test_biosample(),
"image_acquisitions": utils.get_test_image_acquisition(),
"specimen_growth_protocol": utils.get_test_specimen_growth_protocol(),
"specimen_imaging_protocol": utils.get_test_specimen_imaging_preparation_protocol(),
}

# File references are a special case as they depend on experimental dataset
expected_file_references = utils.get_test_file_reference(
["file_list_study_component_1.json", "file_list_study_component_2.json"]
)
expected_objects_dict["file_references"] = expected_file_references

n_expected_objects = 0
for expected_objects in expected_objects_dict.values():
if isinstance(expected_objects, list):
n_expected_objects += len(expected_objects)
else:
n_expected_objects += 1


def test_cli_writes_expected_files(
monkeypatch, tmp_path, test_submission, mock_request_get
):
monkeypatch.setattr(settings, "bia_data_dir", str(tmp_path))

def _load_submission(accession_id: str) -> biostudies.Submission:
return test_submission

monkeypatch.setattr(cli, "load_submission", _load_submission)

result = runner.invoke(cli.app, ["ingest", accession_id])
assert result.exit_code == 0

files_written = [f for f in tmp_path.rglob("*.json")]

assert len(files_written) == n_expected_objects

for dir_name, expected_objects in expected_objects_dict.items():
dir_path = tmp_path / dir_name / accession_id

if not isinstance(expected_objects, list):
expected_objects = [
expected_objects,
]
for expected_object in expected_objects:
if dir_name == "studies":
created_object_path = tmp_path / "studies" / f"{accession_id}.json"
else:
created_object_path = dir_path / f"{expected_object.uuid}.json"
created_object_type = getattr(
bia_data_model, expected_object.model.type_name
)
created_object = created_object_type.model_validate_json(
created_object_path.read_text()
)
assert created_object == expected_object
51 changes: 20 additions & 31 deletions bia-ingest-shared-models/test/test_file_reference.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,16 @@
""" Test FileReference creation
Test FileReference creation separately from other shared models as it
now has a different pattern of creation from other artefacts i.e.
it now needs a submitted dataset
"""Test FileReference creation

Test FileReference creation separately from other shared models as it
now has a different pattern of creation from other artefacts i.e.
it now needs a submitted dataset
"""

from typing import Dict
from pathlib import Path
from unittest.mock import Mock
import pytest
from . import utils
from bia_ingest_sm.conversion import (
experimental_imaging_dataset,
file_reference,
)
from bia_ingest_sm.biostudies import requests, File
from pydantic import TypeAdapter

# TODO: Mock requests.get correctly!!!
def mock_request_get(flist_url: str) -> Dict[str, str]:
data_dir = Path(__file__).parent / "data"
path_to_load = data_dir / Path(flist_url).name
return_value = Mock()
return_value.status_code = 200
return_value.content = path_to_load.read_text()
return return_value

from bia_ingest_sm.biostudies import File

requests.get = mock_request_get

# Get second study component as dataset in submission
datasets_in_submission = [
Expand All @@ -47,17 +30,19 @@ def test_get_file_reference_for_submission_dataset(test_submission, result_summa
accession_id=test_submission.accno,
submission_dataset=datasets_in_submission[0],
files_in_file_list=files_in_filelist,
result_summary=result_summary
result_summary=result_summary,
)
assert created == expected


def test_create_file_reference_for_study_component(test_submission, caplog, result_summary):

def test_create_file_reference_for_study_component(
test_submission, caplog, result_summary, mock_request_get
):
expected = {datasets_in_submission[0].title_id: utils.get_test_file_reference()}
created = file_reference.get_file_reference_by_dataset(
test_submission, datasets_in_submission=datasets_in_submission, result_summary=result_summary

test_submission,
datasets_in_submission=datasets_in_submission,
result_summary=result_summary,
)
assert created == expected

Expand All @@ -68,14 +53,18 @@ def test_create_file_reference_for_study_component(test_submission, caplog, resu
def test_create_file_reference_for_study_component_when_no_matching_sc_in_file_list(
test_submission, caplog, result_summary
):
"""Test attempted creation of study FileReferences when study
components in dataset do not match does in file_list
"""Test attempted creation of study FileReferences when study
components in dataset do not match does in file_list
"""

dataset = utils.get_test_experimental_imaging_dataset()[0]
dataset.title_id = "Test name not in file list"
created = file_reference.get_file_reference_by_dataset(
test_submission, datasets_in_submission=[dataset,], result_summary=result_summary
test_submission,
datasets_in_submission=[
dataset,
],
result_summary=result_summary,
)

assert created is None
Expand Down
Loading
Loading