diff --git a/eido/_version.py b/eido/_version.py index d31c31e..788da1f 100644 --- a/eido/_version.py +++ b/eido/_version.py @@ -1 +1 @@ -__version__ = "0.2.3" +__version__ = "0.2.4" diff --git a/eido/conversion_plugins.py b/eido/conversion_plugins.py index 01dd550..fe45805 100644 --- a/eido/conversion_plugins.py +++ b/eido/conversion_plugins.py @@ -42,8 +42,7 @@ def yaml_pep_filter(p, **kwargs) -> Dict[str, str]: """ from yaml import dump - data = p.config.to_dict() - return {"project": dump(data, default_flow_style=False)} + return {"project": dump(p.config, default_flow_style=False)} def csv_pep_filter(p, **kwargs) -> Dict[str, str]: @@ -70,7 +69,7 @@ def processed_pep_filter(p, **kwargs) -> Dict[str, str]: samples_as_objects = kwargs.get("samples_as_objects") subsamples_as_objects = kwargs.get("subsamples_as_objects") - prj_repr = p.config.to_dict() + prj_repr = p.config return { "project": str(prj_repr), diff --git a/eido/output_formatters.py b/eido/output_formatters.py index 8d20a12..f9d2486 100644 --- a/eido/output_formatters.py +++ b/eido/output_formatters.py @@ -110,7 +110,7 @@ def _convert_sample_to_row( ): value = sample[attribute][sample_index] else: - value = sample[attribute] + value = sample.get(attribute) sample_row.append(value or "") diff --git a/eido/validation.py b/eido/validation.py index e409a13..e75d7e6 100644 --- a/eido/validation.py +++ b/eido/validation.py @@ -43,6 +43,10 @@ def _validate_object(obj: Mapping, schema: Union[str, dict], sample_name_colname instance_name = error.instance[sample_name_colname] except KeyError: instance_name = "project" + except TypeError: + instance_name = obj["samples"][error.absolute_path[1]][ + sample_name_colname + ] errors_by_type[error.message].append( { "type": error.message, diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 5e81881..7c16b30 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,5 +2,5 @@ importlib-metadata; python_version < '3.10' jsonschema>=3.0.1 logmuse>=0.2.5 pandas -peppy>=0.40.6 +peppy>=0.40.7 ubiquerg>=0.5.2 diff --git a/tests/conftest.py b/tests/conftest.py index 905591e..8e47b94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -124,10 +124,25 @@ def save_result_mock(mocker): @pytest.fixture -def test_file_existence_schema(schemas_path): +def test_file_existing_schema(schemas_path): return os.path.join(schemas_path, "schema_test_file_exist.yaml") @pytest.fixture -def test_file_existance_pep(peps_path): - return os.path.join(peps_path, "test_file_existence", "project_config.yaml") +def test_file_existing_pep(peps_path): + return os.path.join(peps_path, "test_file_existing", "project_config.yaml") + + +@pytest.fixture +def test_schema_value_check(schemas_path): + return os.path.join(schemas_path, "value_check_schema.yaml") + + +@pytest.fixture +def test_file_value_check(peps_path): + return os.path.join(peps_path, "value_check_pep", "project_config.yaml") + + +@pytest.fixture +def test_multiple_subs(peps_path): + return os.path.join(peps_path, "multiple_subsamples", "project_config.yaml") diff --git a/tests/data/peps/multiple_subsamples/project_config.yaml b/tests/data/peps/multiple_subsamples/project_config.yaml new file mode 100644 index 0000000..e0e580b --- /dev/null +++ b/tests/data/peps/multiple_subsamples/project_config.yaml @@ -0,0 +1,19 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: + - subsample_table1.csv + - subsample_table2.csv + +sample_modifiers: + append: + local_files: LOCAL + genome: "fg" + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" + imply: + - if: + identifier: "frog1" + then: + genome: "frog_frog" diff --git a/tests/data/peps/multiple_subsamples/sample_table.csv b/tests/data/peps/multiple_subsamples/sample_table.csv new file mode 100644 index 0000000..7c06204 --- /dev/null +++ b/tests/data/peps/multiple_subsamples/sample_table.csv @@ -0,0 +1,5 @@ +sample_id,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/peps/multiple_subsamples/subsample_table1.csv b/tests/data/peps/multiple_subsamples/subsample_table1.csv new file mode 100644 index 0000000..f1b3c2f --- /dev/null +++ b/tests/data/peps/multiple_subsamples/subsample_table1.csv @@ -0,0 +1,6 @@ +sample_id,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/peps/multiple_subsamples/subsample_table2.csv b/tests/data/peps/multiple_subsamples/subsample_table2.csv new file mode 100644 index 0000000..5e6d298 --- /dev/null +++ b/tests/data/peps/multiple_subsamples/subsample_table2.csv @@ -0,0 +1,6 @@ +sample_id,random_string,subsample_name +frog_1,x_x,x +frog_1,y_y,y +frog_1,z_z,z +frog_2,xy_yx,xy +frog_2,xx_xx,xx diff --git a/tests/data/peps/test_file_existence/project_config.yaml b/tests/data/peps/test_file_existing/project_config.yaml similarity index 100% rename from tests/data/peps/test_file_existence/project_config.yaml rename to tests/data/peps/test_file_existing/project_config.yaml diff --git a/tests/data/peps/test_file_existence/sample_table.csv b/tests/data/peps/test_file_existing/sample_table.csv similarity index 100% rename from tests/data/peps/test_file_existence/sample_table.csv rename to tests/data/peps/test_file_existing/sample_table.csv diff --git a/tests/data/peps/test_file_existence/subsample_table.csv b/tests/data/peps/test_file_existing/subsample_table.csv similarity index 100% rename from tests/data/peps/test_file_existence/subsample_table.csv rename to tests/data/peps/test_file_existing/subsample_table.csv diff --git a/tests/data/peps/value_check_pep/project_config.yaml b/tests/data/peps/value_check_pep/project_config.yaml new file mode 100644 index 0000000..66c4380 --- /dev/null +++ b/tests/data/peps/value_check_pep/project_config.yaml @@ -0,0 +1,6 @@ +description: None +name: encode_prj +pep_version: 2.0.0 +project_name: value_check_pep +sample_table: sample_table.csv +subsample_table: [] diff --git a/tests/data/peps/value_check_pep/sample_table.csv b/tests/data/peps/value_check_pep/sample_table.csv new file mode 100644 index 0000000..cefc2aa --- /dev/null +++ b/tests/data/peps/value_check_pep/sample_table.csv @@ -0,0 +1,7 @@ +sample_name,file_name,genome,assay,cell_line,target,format_type +encode_4,ENCFF452DAM.bed.gz,hg38,Histone ChIP-seq,skeletal muscle myoblast,H3K36me3,narrowPeak +encode_20,ENCFF121AXG.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_21,ENCFF710ECJ.bed.gz,hg38,DNase-seq,RPMI7951,,broadPeak +encode_22,ENCFF945FZN.bed.gz,hg38,DNase-seq,RPMI7951,,narrowPeak +encode_23,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_24,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak1 diff --git a/tests/data/schemas/value_check_schema.yaml b/tests/data/schemas/value_check_schema.yaml new file mode 100644 index 0000000..fb2352d --- /dev/null +++ b/tests/data/schemas/value_check_schema.yaml @@ -0,0 +1,16 @@ +description: bedboss run-all pep schema +properties: + samples: + items: + properties: + format_type: + description: whether the regions are narrow (transcription factor implies + narrow, histone mark implies broad peaks) + enum: + - narrowPeak + - broadPeak + type: string + type: object + type: array +required: +- samples diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 179020e..a4aad8c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,4 +1,10 @@ -from eido.conversion import * +from eido.conversion import ( + run_filter, + get_available_pep_filters, + pep_conversion_plugins, + convert_project, +) +import peppy class TestConversionInfrastructure: @@ -74,3 +80,27 @@ def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly( assert save_result_mock.called assert conv_result == {"samples": output_pep_nextflow_taxprofiler} + + def test_multiple_subsamples(self, test_multiple_subs): + project = peppy.Project(test_multiple_subs, sample_table_index="sample_id") + + conversion = convert_project( + project, + "csv", + ) + assert isinstance(conversion["samples"], str) + conversion = convert_project( + project, + "basic", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml-samples", + ) + assert isinstance(conversion["samples"], str) diff --git a/tests/test_validations.py b/tests/test_validations.py index a217ca4..6bea0a4 100644 --- a/tests/test_validations.py +++ b/tests/test_validations.py @@ -1,7 +1,6 @@ import urllib import pytest -from jsonschema.exceptions import ValidationError from peppy import Project from peppy.utils import load_yaml @@ -140,9 +139,15 @@ def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg ) def test_validate_file_existance( - self, test_file_existance_pep, test_file_existence_schema + self, test_file_existing_pep, test_file_existing_schema ): - schema_path = test_file_existence_schema - prj = Project(test_file_existance_pep) + schema_path = test_file_existing_schema + prj = Project(test_file_existing_pep) with pytest.raises(PathAttrNotFoundError): validate_input_files(prj, schema_path) + + def test_validation_values(self, test_schema_value_check, test_file_value_check): + schema_path = test_schema_value_check + prj = Project(test_file_value_check) + with pytest.raises(EidoValidationError): + validate_project(project=prj, schema=schema_path)