From 35f652812aac593944010145dc4dfb5c4caaa7da Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Fri, 10 Feb 2023 20:32:24 -0600 Subject: [PATCH 01/15] Add function to get all dataset's meta-data keys --- bids/layout/layout.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 3462570fd..d29e8add1 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -659,6 +659,17 @@ def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = """ return query_entities(self.dataset, scope, sort, long_form=long_form) + def _get_metadata_keys(self): + """Return a list of all metadata keys found in the dataset.""" + + all_metadata_objects = self.dataset.select(self.schema.MetadataArtifact).objects() + + keys = set() + for obj in all_metadata_objects: + keys.update(obj['contents'].keys()) + + return keys + def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]: """Return contents of dataset_description.json. From 4565f9c08440bd96ff4ece68041196a3484193df Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Fri, 10 Feb 2023 20:36:43 -0600 Subject: [PATCH 02/15] Add metadata option to get_entities --- bids/layout/layout.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index d29e8add1..44b63a1e6 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -631,7 +631,7 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None def entities(self): return self.get_entities() - def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = True) -> dict: + def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = True, metadata: bool = False) -> dict: """Returns a unique set of entities found within the dataset as a dict. Each key of the resulting dict contains a list of values (with at least one element). @@ -657,7 +657,14 @@ def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = dict a unique set of entities found within the dataset as a dict """ - return query_entities(self.dataset, scope, sort, long_form=long_form) + + entities = query_entities(self.dataset, scope, sort, long_form=long_form) + + metadata = {} + if metadata is True: + metadata = self._get_metadata_keys() + + return {**entities, **metadata} def _get_metadata_keys(self): """Return a list of all metadata keys found in the dataset.""" From c1c24a4da930d271f4d3666564c7a01334538803 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 13 Feb 2023 15:23:39 -0600 Subject: [PATCH 03/15] Add metadata to get_entities, and __repr__ --- bids/layout/layout.py | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 44b63a1e6..10c64d4a8 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -1,6 +1,7 @@ import difflib import enum import os.path +import typing from collections import defaultdict from functools import partial from pathlib import Path @@ -10,6 +11,7 @@ from .models import BIDSFile from .utils import BIDSMetadata from .writing import build_path, write_to_file +from ..external import inflect from ..exceptions import ( BIDSEntityError, BIDSValidationError, @@ -341,11 +343,15 @@ def __getattr__(self, key): pass if key.startswith('get_'): orig_ent_name = key.replace('get_', '') - ent_name = self.schema.fuzzy_match_entity(orig_ent_name).name - if ent_name not in self.get_entities(): - raise BIDSEntityError( - "'get_{}' can't be called because '{}' isn't a " - "recognized entity name.".format(orig_ent_name, orig_ent_name)) + entities = self.get_entities(metadata=True) + if ent_name not in entities: + sing = inflect.engine().singular_noun(ent_name) + if sing in entities: + ent_name = sing + else: + raise BIDSEntityError( + "'get_{}' can't be called because '{}' isn't a " + "recognized entity name.".format(orig_ent_name, orig_ent_name)) return partial(self.get, return_type='id', target=ent_name) # Spit out default message if we get this far raise AttributeError("%s object has no attribute named %r" % @@ -631,7 +637,8 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None def entities(self): return self.get_entities() - def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = True, metadata: bool = False) -> dict: + def get_entities(self, scope: str = None, sort: bool = False, + long_form: bool = True, metadata: bool = True) -> dict: """Returns a unique set of entities found within the dataset as a dict. Each key of the resulting dict contains a list of values (with at least one element). @@ -658,24 +665,28 @@ def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = a unique set of entities found within the dataset as a dict """ - entities = query_entities(self.dataset, scope, sort, long_form=long_form) + entities = query_entities(self.dataset, scope, long_form=long_form) - metadata = {} if metadata is True: - metadata = self._get_metadata_keys() - - return {**entities, **metadata} + results = {**entities, **self._get_unique_metadata()} + + if sort: + results = {k: sorted(v) for k, v in sorted(results.items())} + + return results - def _get_metadata_keys(self): - """Return a list of all metadata keys found in the dataset.""" + def _get_unique_metadata(self): + """Return a list of all unique metadata key and values found in the dataset.""" all_metadata_objects = self.dataset.select(self.schema.MetadataArtifact).objects() - keys = set() + metadata = defaultdict(set) for obj in all_metadata_objects: - keys.update(obj['contents'].keys()) + for k, v in obj['contents'].items(): + if isinstance(v, typing.Hashable): + metadata[k].add(v) - return keys + return metadata def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]: """Return contents of dataset_description.json. From 57205e3b98271b9e450422d98febb98626d09944 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 11:54:40 -0600 Subject: [PATCH 04/15] Implement invalid filters rules --- bids/layout/layout.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 10c64d4a8..0dd3d04dc 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -342,7 +342,7 @@ def __getattr__(self, key): except KeyError: pass if key.startswith('get_'): - orig_ent_name = key.replace('get_', '') + ent_name = key.replace('get_', '') entities = self.get_entities(metadata=True) if ent_name not in entities: sing = inflect.engine().singular_noun(ent_name) @@ -351,7 +351,7 @@ def __getattr__(self, key): else: raise BIDSEntityError( "'get_{}' can't be called because '{}' isn't a " - "recognized entity name.".format(orig_ent_name, orig_ent_name)) + "recognized entity name.".format(ent_name, ent_name)) return partial(self.get, return_type='id', target=ent_name) # Spit out default message if we get this far raise AttributeError("%s object has no attribute named %r" % @@ -550,8 +550,8 @@ def count_matches(f): def get(self, return_type: str = 'object', target: str = None, scope: str = None, extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, - regex_search=None, - **entities) -> Union[List[str], List[object]]: + regex_search=None, invalid_filters: str = 'error', + **filters) -> Union[List[str], List[object]]: """Retrieve files and/or metadata from the current Layout. Parameters @@ -606,12 +606,30 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None if regex_search is None: regex_search = self._regex_search + self_entities = self.get_entities() + + if invalid_filters != 'allow': + bad_filters = set(filters.keys()) - set(self_entities.keys()) + if bad_filters: + if invalid_filters == 'drop': + for bad_filt in bad_filters: + filters.pop(bad_filt) + elif invalid_filters == 'error': + first_bad = list(bad_filters)[0] + msg = "'{}' is not a recognized entity. ".format(first_bad) + ents = list(self_entities.keys()) + suggestions = difflib.get_close_matches(first_bad, ents) + if suggestions: + msg += "Did you mean {}? ".format(suggestions) + raise ValueError(msg + "If you're sure you want to impose " + "this constraint, set " + "invalid_filters='allow'.") + # Provide some suggestions if target is specified and invalid. if return_type in ("dir", "id"): if target is None: raise TargetError(f'If return_type is "id" or "dir", a valid target ' 'entity must also be specified.') - self_entities = self.get_entities() if target not in self_entities: potential = list(self_entities.keys()) suggestions = difflib.get_close_matches(target, potential) @@ -622,7 +640,7 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None raise TargetError(f"Unknown target '{target}'. {message}") folder = self.dataset - result = query(folder, return_type, target, scope, extension, suffix, regex_search, **entities) + result = query(folder, return_type, target, scope, extension, suffix, regex_search, **filters) if return_type == 'file': result = natural_sort(result) if return_type == "object": From 82a4c44fea2b3e24d8026fcc48e921b75f43a041 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 15:18:30 -0600 Subject: [PATCH 05/15] Add relative paths to BIDSFile --- bids/layout/layout.py | 12 ++++++------ bids/layout/models.py | 10 +++++++--- bids/layout/tests/test_layout.py | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 0dd3d04dc..f6a4f6fc4 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -298,6 +298,7 @@ def __init__( self.validationReport = None self._regex_search = regex_search + self._absolute_paths = absolute_paths if validate: self.validationReport = self.validate() @@ -320,11 +321,6 @@ def __init__( "indexer no longer has any effect and will be removed", DeprecationWarning ) - if absolute_paths is not None: - warnings.warn( - "absolute_paths no longer has any effect and will be removed", - DeprecationWarning - ) if kwargs: warnings.warn(f"Unknown keyword arguments: {kwargs}") if config is not None: @@ -640,13 +636,17 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None raise TargetError(f"Unknown target '{target}'. {message}") folder = self.dataset + + if not self._absolute_paths: + filters['absolute_path'] = False + result = query(folder, return_type, target, scope, extension, suffix, regex_search, **filters) if return_type == 'file': result = natural_sort(result) if return_type == "object": if result: result = natural_sort( - [BIDSFile(res) for res in result], + [BIDSFile(res, absolute_path=self._absolute_paths) for res in result], "path" ) return result diff --git a/bids/layout/models.py b/bids/layout/models.py index bfd3067ba..48f3fc82f 100644 --- a/bids/layout/models.py +++ b/bids/layout/models.py @@ -37,10 +37,11 @@ def from_filename(cls, filename): break return cls(path) - def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None): + def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None, absolute_paths=True): self._path = None self._artifact = None self._schema = schema + self._absolute_path = absolute_paths if isinstance(file_ref, (str, os.PathLike)): self._path = Path(file_ref) elif isinstance(file_ref, Artifact): @@ -51,7 +52,10 @@ def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None): def path(self): """ Convenience property for accessing path as a string.""" try: - return self._artifact.get_absolute_path() + if self._absolute_path: + return self._artifact.get_absolute_path() + else: + return self._artifact.get_relative_path() except AttributeError: return str(self._path) @@ -80,7 +84,7 @@ def __fspath__(self): @property def relpath(self): """ No longer have access to the BIDSLayout root directory """ - raise NotImplementedError + raise self._artifact.get_relative_path() def get_associations(self, kind=None, include_parents=False): """Get associated files, optionally limiting by association kind. diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 5f094cf1f..4708bb064 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -360,7 +360,7 @@ def test_query_derivatives(layout_ds005_derivs): extension='.tsv') result = [f.path for f in result] assert len(result) == 49 - assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result + assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result # Fails due to absolute path result = layout_ds005_derivs.get(suffix='events', return_type='object', scope='raw', extension='.tsv') assert len(result) == 48 From 9faa52c3d5195d201cd1bc545f2e8760487a3774 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 15:24:00 -0600 Subject: [PATCH 06/15] Fix absolute paths --- bids/layout/layout.py | 5 ++--- bids/layout/models.py | 6 ++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index f6a4f6fc4..7f9a8635c 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -637,10 +637,9 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None folder = self.dataset - if not self._absolute_paths: - filters['absolute_path'] = False + result = query(folder, return_type, target, scope, extension, suffix, regex_search, + absolute_paths=self._absolute_paths, **filters) - result = query(folder, return_type, target, scope, extension, suffix, regex_search, **filters) if return_type == 'file': result = natural_sort(result) if return_type == "object": diff --git a/bids/layout/models.py b/bids/layout/models.py index 48f3fc82f..934d793f2 100644 --- a/bids/layout/models.py +++ b/bids/layout/models.py @@ -37,11 +37,13 @@ def from_filename(cls, filename): break return cls(path) - def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None, absolute_paths=True): + def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None, + absolute_path=True): + self._path = None self._artifact = None self._schema = schema - self._absolute_path = absolute_paths + self._absolute_path = absolute_path if isinstance(file_ref, (str, os.PathLike)): self._path = Path(file_ref) elif isinstance(file_ref, Artifact): From 2c9b5dc0709fe1ae0ff218c76285c707982ca109 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 15:55:09 -0600 Subject: [PATCH 07/15] Add xfails --- bids/layout/layout.py | 2 +- bids/layout/models.py | 2 +- bids/layout/tests/test_layout.py | 20 +++++++++++++------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 7f9a8635c..653bdd541 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -267,7 +267,7 @@ def __init__( database_path: Optional[str]=None, reset_database: Optional[bool]=None, indexer: Optional[Callable]=None, - absolute_paths: Optional[bool]=None, + absolute_paths: Optional[bool]=True, ignore: Optional[List[str]]=None, force_index: Optional[List[str]]=None, **kwargs, diff --git a/bids/layout/models.py b/bids/layout/models.py index 934d793f2..10e215571 100644 --- a/bids/layout/models.py +++ b/bids/layout/models.py @@ -86,7 +86,7 @@ def __fspath__(self): @property def relpath(self): """ No longer have access to the BIDSLayout root directory """ - raise self._artifact.get_relative_path() + return self._artifact.get_relative_path() def get_associations(self, kind=None, include_parents=False): """Get associated files, optionally limiting by association kind. diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 4708bb064..b74b56097 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -108,6 +108,7 @@ def test_get_file(layout_ds005_derivs): class TestDerivativeAsRoot: + @pytest.mark.xfail(reason="derivative as root not yet supported") def test_dataset_without_datasettype_parsed_as_raw(self): dataset_path = Path("ds005_derivs", "format_errs", "no_dataset_type") unvalidated = BIDSLayout( @@ -121,17 +122,20 @@ def test_dataset_without_datasettype_parsed_as_raw(self): validated = BIDSLayout(Path(get_test_data_path()) / dataset_path) assert len(validated.get()) == 1 + @pytest.mark.xfail(reason="derivative as root not yet supported") def test_dataset_missing_generatedby_fails_validation(self): dataset_path = Path("ds005_derivs", "format_errs", "no_pipeline_description") with pytest.raises(BIDSDerivativesValidationError): BIDSLayout(Path(get_test_data_path()) / dataset_path) + @pytest.mark.xfail(reason="derivative as root not yet supported") def test_correctly_formatted_derivative_loads_as_derivative(self): dataset_path = Path("ds005_derivs", "dummy") layout = BIDSLayout(Path(get_test_data_path()) / dataset_path) assert len(layout.get()) == 4 assert len(layout.get(desc="preproc")) == 3 + @pytest.mark.xfail(reason="derivative as root not yet supported") @pytest.mark.parametrize( "dataset_path", [ @@ -240,7 +244,7 @@ def test_get_with_bad_target(layout_7t_trt): msg = str(exc.value) assert 'subject' in msg and 'reconstruction' not in msg - +@pytest.mark.xfail(reason="datatype not yet implemented") def test_get_bvals_bvecs(layout_ds005): dwifile = layout_ds005.get(subject="01", datatype="dwi")[0] result = layout_ds005.get_bval(dwifile.path) @@ -325,7 +329,7 @@ def test_get_return_sorted(layout_7t_trt): paths = layout_7t_trt.get(target='subject', return_type='file') assert natural_sort(paths) == paths - +@pytest.mark.xfail(reason="selecting adding of derivatives is not suported") def test_layout_with_derivs(layout_ds005_derivs): assert layout_ds005_derivs.root == join(get_test_data_path(), 'ds005') assert isinstance(layout_ds005_derivs.files, dict) @@ -338,7 +342,7 @@ def test_layout_with_derivs(layout_ds005_derivs): entities = deriv.query_entities(long_form=True) assert 'subject' in entities - +@pytest.mark.xfail(reason="out of tree derivatives are not supported") def test_layout_with_multi_derivs(layout_ds005_multi_derivs): assert layout_ds005_multi_derivs.root == join(get_test_data_path(), 'ds005') assert isinstance(layout_ds005_multi_derivs.files, dict) @@ -358,18 +362,18 @@ def test_layout_with_multi_derivs(layout_ds005_multi_derivs): def test_query_derivatives(layout_ds005_derivs): result = layout_ds005_derivs.get(suffix='events', return_type='object', extension='.tsv') - result = [f.path for f in result] + result = [f.relpath for f in result] assert len(result) == 49 assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result # Fails due to absolute path result = layout_ds005_derivs.get(suffix='events', return_type='object', scope='raw', extension='.tsv') assert len(result) == 48 - result = [f.path for f in result] + result = [f.relpath for f in result] assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' not in result result = layout_ds005_derivs.get(suffix='events', return_type='object', desc='extra', extension='.tsv') assert len(result) == 1 - result = [f.path for f in result] + result = [f.relpath for f in result] assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result @@ -396,6 +400,7 @@ def test_get_tr(layout_7t_trt): # XXX 0.14: Add dot to extension (difficult to parametrize with module-scoped fixture) +@pytest.mark.xfail(reason="scope and config not implemented") def test_parse_file_entities_from_layout(layout_synthetic): layout = layout_synthetic filename = '/sub-03_ses-07_run-4_desc-bleargh_sekret.nii.gz' @@ -429,7 +434,8 @@ def test_path_arguments(): assert layout.get(scope='derivatives/events') assert not layout.get(scope='nonexistent') - + +@pytest.mark.xfail(reason="adding derivatives is not supported") def test_get_dataset_description(layout_ds005_derivs): dd = layout_ds005_derivs.get_dataset_description() assert isinstance(dd, dict) From 0876d494ccb7d92ddc56728e34e6b81892c804bd Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 16:25:04 -0600 Subject: [PATCH 08/15] get_procs doesn't exist, because long-name is used --- bids/layout/tests/test_layout.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index b74b56097..2696c5d36 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -188,9 +188,9 @@ def test_get_metadata4(layout_ds005): def test_get_metadata_meg(layout_ds117): funcs = ['get_subjects', 'get_sessions', 'get_tasks', 'get_runs', - 'get_acquisitions', 'get_procs'] + 'get_acquisitions', 'get_processing'] assert all([hasattr(layout_ds117, f) for f in funcs]) - procs = layout_ds117.get_procs() + procs = layout_ds117.get_processing() assert procs == ['sss'] target = 'sub-02/ses-meg/meg/sub-02_ses-meg_task-facerecognition_run-01_meg.fif' target = target.split('/') @@ -233,16 +233,17 @@ def test_get_metadata_error(layout_7t_trt): with pytest.raises(KeyError) as err: result['Missing'] - +# Changed this test to not expect 'reconstruction' and 'proc' +# which AFAIK ae not in 7t_trt def test_get_with_bad_target(layout_7t_trt): with pytest.raises(TargetError) as exc: layout_7t_trt.get(target='unicorn', return_type='id') msg = str(exc.value) - assert 'subject' in msg and 'reconstruction' in msg and 'proc' in msg + assert 'subject' in msg and 'session' in msg and 'acquisition' in msg with pytest.raises(TargetError) as exc: layout_7t_trt.get(target='subject') msg = str(exc.value) - assert 'subject' in msg and 'reconstruction' not in msg + assert 'subject' in msg and 'session' not in msg @pytest.mark.xfail(reason="datatype not yet implemented") def test_get_bvals_bvecs(layout_ds005): From 307aa8cbb0c344d3d6de57d262c43fbb927ad264 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 16:34:09 -0600 Subject: [PATCH 09/15] Add comment about Enum --- bids/layout/tests/test_layout.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 2696c5d36..8c597949a 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -241,7 +241,7 @@ def test_get_with_bad_target(layout_7t_trt): msg = str(exc.value) assert 'subject' in msg and 'session' in msg and 'acquisition' in msg with pytest.raises(TargetError) as exc: - layout_7t_trt.get(target='subject') + layout_7t_trt.get(target='sub') msg = str(exc.value) assert 'subject' in msg and 'session' not in msg @@ -313,7 +313,11 @@ def test_get_val_enum_any(layout_7t_trt): suffix='bold', acquisition="*") assert len(bold_files) == 2 - +# The following test should have argubly been a failure in pybids +# but it was not. Session does not exist in dataset, yet since Query +# Enum is set to optional the query passes even with 'ignore_filters' +# set to False. +@pytest.mark.xfail(reason="Optional entity using Enum not supported.") def test_get_val_enum_any_optional(layout_7t_trt, layout_ds005): # layout with sessions bold_files = layout_7t_trt.get(suffix='bold', run=1, subject='01') From 490ed73e3914207c1d457cdc94dcab1ae9d8b15b Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Mon, 27 Feb 2023 17:26:11 -0600 Subject: [PATCH 10/15] Mark all xfails --- bids/layout/layout.py | 56 +++++++++++++++++++------------- bids/layout/tests/conftest.py | 4 +-- bids/layout/tests/test_layout.py | 46 ++++++++++++++------------ 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 653bdd541..b00bf4a7f 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -599,42 +599,52 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None list of :obj:`bids.layout.BIDSFile` or str A list of BIDSFiles (default) or strings (see return_type). """ + + # error check on users accidentally passing in filters + if isinstance(filters.get('filters'), dict): + raise RuntimeError('You passed in filters as a dictionary named ' + 'filters; please pass the keys in as named ' + 'keywords to the `get()` call. For example: ' + '`layout.get(**filters)`.') + if regex_search is None: regex_search = self._regex_search self_entities = self.get_entities() - if invalid_filters != 'allow': - bad_filters = set(filters.keys()) - set(self_entities.keys()) - if bad_filters: - if invalid_filters == 'drop': - for bad_filt in bad_filters: - filters.pop(bad_filt) - elif invalid_filters == 'error': - first_bad = list(bad_filters)[0] - msg = "'{}' is not a recognized entity. ".format(first_bad) - ents = list(self_entities.keys()) - suggestions = difflib.get_close_matches(first_bad, ents) - if suggestions: - msg += "Did you mean {}? ".format(suggestions) - raise ValueError(msg + "If you're sure you want to impose " - "this constraint, set " - "invalid_filters='allow'.") + def _suggest(target): + """Suggest a valid value for an entity.""" + potential = list(self_entities.keys()) + suggestions = difflib.get_close_matches(target, potential) + if suggestions: + message = "Did you mean one of: {}?".format(suggestions) + else: + message = "Valid options are: {}".format(potential) + return message # Provide some suggestions if target is specified and invalid. if return_type in ("dir", "id"): if target is None: raise TargetError(f'If return_type is "id" or "dir", a valid target ' 'entity must also be specified.') + if target not in self_entities: - potential = list(self_entities.keys()) - suggestions = difflib.get_close_matches(target, potential) - if suggestions: - message = "Did you mean one of: {}?".format(suggestions) - else: - message = "Valid targets are: {}".format(potential) - raise TargetError(f"Unknown target '{target}'. {message}") + raise TargetError(f"Unknown target '{target}'. {_suggest(target)}") + if invalid_filters != 'allow': + bad_filters = set(filters.keys()) - set(self_entities.keys()) + if bad_filters: + if invalid_filters == 'drop': + for bad_filt in bad_filters: + filters.pop(bad_filt) + elif invalid_filters == 'error': + first_bad = list(bad_filters)[0] + message = _suggest(first_bad) + raise ValueError(f"Unknown entity. {message}", + "If you're sure you want to impose ", + "this constraint, set ", + "invalid_filters='allow'.") + folder = self.dataset result = query(folder, return_type, target, scope, extension, suffix, regex_search, diff --git a/bids/layout/tests/conftest.py b/bids/layout/tests/conftest.py index e03d33490..19abb42bb 100644 --- a/bids/layout/tests/conftest.py +++ b/bids/layout/tests/conftest.py @@ -18,11 +18,11 @@ def layout_7t_trt_relpath(): data_dir = join(get_test_data_path(), '7t_trt') return BIDSLayout(data_dir, absolute_paths=False) - +# AD: Manually ignoring derivatives for now @pytest.fixture(scope="module") def layout_ds005(): data_dir = join(get_test_data_path(), 'ds005') - return BIDSLayout(data_dir) + return BIDSLayout(data_dir, ignore=['derivatives', 'models']) @pytest.fixture(scope="module") diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 8c597949a..7ccc170ac 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -241,7 +241,7 @@ def test_get_with_bad_target(layout_7t_trt): msg = str(exc.value) assert 'subject' in msg and 'session' in msg and 'acquisition' in msg with pytest.raises(TargetError) as exc: - layout_7t_trt.get(target='sub') + layout_7t_trt.get(target='sub', return_type='id') msg = str(exc.value) assert 'subject' in msg and 'session' not in msg @@ -363,13 +363,14 @@ def test_layout_with_multi_derivs(layout_ds005_multi_derivs): preproc = layout_ds005_multi_derivs.get(desc='preproc') assert len(preproc) == 3 - +@pytest.mark.xfail(reason="Fails because derivatives are indexed by ancpbids as a normal file," + "so path is relatively to main root, not to the derivative root") def test_query_derivatives(layout_ds005_derivs): result = layout_ds005_derivs.get(suffix='events', return_type='object', extension='.tsv') result = [f.relpath for f in result] assert len(result) == 49 - assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result # Fails due to absolute path + assert 'sub-01_task-mixedgamblestask_run-01_desc-extra_events.tsv' in result result = layout_ds005_derivs.get(suffix='events', return_type='object', scope='raw', extension='.tsv') assert len(result) == 48 @@ -387,7 +388,9 @@ def test_derivative_getters(): full_layout = BIDSLayout(synth_path) assert set(full_layout.get_spaces()) == {'MNI152NLin2009cAsym', 'T1w'} - +## The following fails because ancpids does not index meta-data correctly +## when an entity is missing. E.g. when a run has no run entity, it should +## match to a more general sidecar, but it does not. def test_get_tr(layout_7t_trt): # Bad subject, should fail with pytest.raises(NoMatchError) as exc: @@ -451,7 +454,7 @@ def test_get_dataset_description(layout_ds005_derivs): names = {'Mixed-gambles task'} assert set([d['Name'] for d in dd]) == names - +@pytest.mark.xfail(reason="handling wrong dtypes not implemented") def test_get_with_wrong_dtypes(layout_7t_trt): ''' Test automatic dtype sanitization. ''' l = layout_7t_trt @@ -490,22 +493,24 @@ def test_get_with_regex_search_bad_dtype(layout_7t_trt): # Two runs (1 per session) for each of subjects '10' and '01' assert len(results) == 4 - +# AD: Changed this test to use a entity found in ds005 +# 'session' was not in dataset, therefore not suggested def test_get_with_invalid_filters(layout_ds005): l = layout_ds005 # Raise error with suggestions - with pytest.raises(ValueError, match='session'): - l.get(subject='12', ses=True, invalid_filters='error') - with pytest.raises(ValueError, match='session'): - l.get(subject='12', ses=True) + with pytest.raises(ValueError, match='subject'): + l.get(sub='12', invalid_filters='error') + with pytest.raises(ValueError, match='subject'): + l.get(sub='12', ses=True) # Silently drop amazing - res_without = l.get(subject='12', suffix='bold') + res_without = l.get(subject='12', suffix='bold', return_type='file') res_drop = l.get(subject='12', suffix='bold', amazing='!!!', - invalid_filters='drop') + invalid_filters='drop', return_type='file') assert res_without == res_drop assert len(res_drop) == 3 # Retain amazing, producing empty set - allow_res = l.get(subject='12', amazing=True, invalid_filters='allow') + allow_res = l.get(subject='12', amazing=True, invalid_filters='allow', + return_type='file') assert allow_res == [] # assert warning when filters are passed in @@ -515,15 +520,16 @@ def test_get_with_invalid_filters(layout_ds005): # Correct call: l.get(**filters) - +# AD: Fixed by returying file. This test was failing because the sets +# didnt handle BIDSFile object correctly. def test_get_with_query_constants_in_match_list(layout_ds005): l = layout_ds005 - get1 = l.get(subject='12', run=1, suffix='bold') - get_none = l.get(subject='12', run=None, suffix='bold') - get_any = l.get(subject='12', run='*', suffix='bold') - get1_and_none = l.get(subject='12', run=[None, 1], suffix='bold') - get1_and_any = l.get(subject='12', run=['*', 1], suffix='bold') - get_none_and_any = l.get(subject='12', run=['*', None], suffix='bold') + get1 = l.get(subject='12', run=1, suffix='bold', return_type='file') + get_none = l.get(subject='12', run=None, suffix='bold', return_type='file') + get_any = l.get(subject='12', run='*', suffix='bold', return_type='file') + get1_and_none = l.get(subject='12', run=[None, 1], suffix='bold', return_type='file') + get1_and_any = l.get(subject='12', run=['*', 1], suffix='bold', return_type='file') + get_none_and_any = l.get(subject='12', run=['*', None], suffix='bold', return_type='file') assert set(get1_and_none) == set(get1) | set(get_none) assert set(get1_and_any) == set(get1) | set(get_any) assert set(get_none_and_any) == set(get_none) | set(get_any) From 3bec37edf43307d48f2259834fdb2b100cf386a0 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Tue, 28 Feb 2023 13:04:33 -0600 Subject: [PATCH 11/15] Reorganize validation logic --- bids/layout/layout.py | 124 +++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 49 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index b00bf4a7f..e67025bfd 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -543,6 +543,78 @@ def count_matches(f): else match for match in matches] return matches if all_ else matches[0] if matches else None + def _sanitize_validate_query(self, target, filters, return_type, invalid_filters): + """Sanitize and validate query parameters + + Parameters + ---------- + target : str + Name of the target entity to get results for. + filters : dict + Dictionary of filters to apply to the query. + return_type : str + The type of object to return. Must be one of 'filename', + 'object', or 'dir'. + invalid_filters : str + What to do when an invalid filter is encountered. Must be one + of 'error', 'drop', or 'allow'. + + Returns + ------- + target : str + The sanitized target. + filters : dict + The sanitized filters. + """ + # error check on users accidentally passing in filters + if isinstance(filters.get('filters'), dict): + raise RuntimeError('You passed in filters as a dictionary named ' + 'filters; please pass the keys in as named ' + 'keywords to the `get()` call. For example: ' + '`layout.get(**filters)`.') + + # Entity filtering + if filters: + # TODO: Implement _sanitize_query_dtypes + + # TODO: Implement _sanitize_query_entities + # i.e. optional Query entities. + pass + + # Provide some suggestions for target and filter names + def _suggest(target): + """Suggest a valid value for an entity.""" + potential = list(self.get_entities().keys()) + suggestions = difflib.get_close_matches(target, potential) + if suggestions: + message = "Did you mean one of: {}?".format(suggestions) + else: + message = "Valid options are: {}".format(potential) + return message + + if return_type in ("dir", "id"): + if target is None: + raise TargetError(f'If return_type is "id" or "dir", a valid target ' + 'entity must also be specified.') + + if target not in self.get_entities(): + raise TargetError(f"Unknown target '{target}'. {_suggest(target)}") + + if invalid_filters != 'allow': + bad_filters = set(filters.keys()) - set(self.get_entities().keys()) + if bad_filters: + if invalid_filters == 'drop': + for bad_filt in bad_filters: + filters.pop(bad_filt) + elif invalid_filters == 'error': + first_bad = list(bad_filters)[0] + message = _suggest(first_bad) + raise ValueError(f"Unknown entity. {message}", + "If you're sure you want to impose ", + "this constraint, set ", + "invalid_filters='allow'.") + + return target, filters def get(self, return_type: str = 'object', target: str = None, scope: str = None, extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, @@ -599,51 +671,12 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None list of :obj:`bids.layout.BIDSFile` or str A list of BIDSFiles (default) or strings (see return_type). """ - - # error check on users accidentally passing in filters - if isinstance(filters.get('filters'), dict): - raise RuntimeError('You passed in filters as a dictionary named ' - 'filters; please pass the keys in as named ' - 'keywords to the `get()` call. For example: ' - '`layout.get(**filters)`.') if regex_search is None: regex_search = self._regex_search - self_entities = self.get_entities() - - def _suggest(target): - """Suggest a valid value for an entity.""" - potential = list(self_entities.keys()) - suggestions = difflib.get_close_matches(target, potential) - if suggestions: - message = "Did you mean one of: {}?".format(suggestions) - else: - message = "Valid options are: {}".format(potential) - return message - - # Provide some suggestions if target is specified and invalid. - if return_type in ("dir", "id"): - if target is None: - raise TargetError(f'If return_type is "id" or "dir", a valid target ' - 'entity must also be specified.') - - if target not in self_entities: - raise TargetError(f"Unknown target '{target}'. {_suggest(target)}") - - if invalid_filters != 'allow': - bad_filters = set(filters.keys()) - set(self_entities.keys()) - if bad_filters: - if invalid_filters == 'drop': - for bad_filt in bad_filters: - filters.pop(bad_filt) - elif invalid_filters == 'error': - first_bad = list(bad_filters)[0] - message = _suggest(first_bad) - raise ValueError(f"Unknown entity. {message}", - "If you're sure you want to impose ", - "this constraint, set ", - "invalid_filters='allow'.") + # Sanitize & validate query + target, filters = self._sanitize_validate_query(target, filters) folder = self.dataset @@ -921,11 +954,4 @@ def __repr__(self): n_runs = len(set(ents['run'])) if 'run' in ents else 0 s = ("BIDS Layout: ...{} | Subjects: {} | Sessions: {} | " "Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs)) - return s - - -class Query(enum.Enum): - """Enums for use with BIDSLayout.get().""" - NONE = 1 # Entity must not be present - REQUIRED = ANY = 2 # Entity must be defined, but with an arbitrary value - OPTIONAL = 3 # Entity may or may not be defined + return s \ No newline at end of file From 57ff99fa26f6c2a096d7bcfeee5def799519f1cb Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Tue, 28 Feb 2023 14:00:07 -0600 Subject: [PATCH 12/15] Check only against schema for valid filters --- bids/layout/layout.py | 46 +++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index e67025bfd..8c80938ff 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -573,23 +573,17 @@ def _sanitize_validate_query(self, target, filters, return_type, invalid_filters 'keywords to the `get()` call. For example: ' '`layout.get(**filters)`.') - # Entity filtering - if filters: - # TODO: Implement _sanitize_query_dtypes - - # TODO: Implement _sanitize_query_entities - # i.e. optional Query entities. - pass + schema_entities = [e.name for e in self.schema.EntityEnum] # Provide some suggestions for target and filter names def _suggest(target): """Suggest a valid value for an entity.""" - potential = list(self.get_entities().keys()) + potential = list(schema_entities) suggestions = difflib.get_close_matches(target, potential) if suggestions: message = "Did you mean one of: {}?".format(suggestions) else: - message = "Valid options are: {}".format(potential) + message = "" return message if return_type in ("dir", "id"): @@ -597,11 +591,11 @@ def _suggest(target): raise TargetError(f'If return_type is "id" or "dir", a valid target ' 'entity must also be specified.') - if target not in self.get_entities(): + if target not in schema_entities: raise TargetError(f"Unknown target '{target}'. {_suggest(target)}") if invalid_filters != 'allow': - bad_filters = set(filters.keys()) - set(self.get_entities().keys()) + bad_filters = set(filters.keys()) - set(schema_entities) if bad_filters: if invalid_filters == 'drop': for bad_filt in bad_filters: @@ -609,10 +603,21 @@ def _suggest(target): elif invalid_filters == 'error': first_bad = list(bad_filters)[0] message = _suggest(first_bad) - raise ValueError(f"Unknown entity. {message}", - "If you're sure you want to impose ", - "this constraint, set ", - "invalid_filters='allow'.") + raise ValueError( + f"Unknown entity '{first_bad}'. {message}. If you're sure you want to impose " + \ + "this constraint, set invalid_filters='allow'.") + + # Process Query Enum + if filters: + for k, val in filters.items(): + if val == Query.REQUIRED: + val = '.+' + + elif val == Query.OPTIONAL: + val = '.*' + + elif val == Query.NONE: + pass return target, filters @@ -676,7 +681,8 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None regex_search = self._regex_search # Sanitize & validate query - target, filters = self._sanitize_validate_query(target, filters) + target, filters = self._sanitize_validate_query(target, filters, return_type, + invalid_filters) folder = self.dataset @@ -954,4 +960,10 @@ def __repr__(self): n_runs = len(set(ents['run'])) if 'run' in ents else 0 s = ("BIDS Layout: ...{} | Subjects: {} | Sessions: {} | " "Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs)) - return s \ No newline at end of file + return s + +class Query(enum.Enum): + """Enums for use with BIDSLayout.get().""" + NONE = 1 # Entity must not be present + REQUIRED = ANY = 2 # Entity must be defined, but with an arbitrary value + OPTIONAL = 3 # Entity may or may not be defined From 5fa4591832172818b82c7e2bb2129228cad7dba5 Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Tue, 28 Feb 2023 14:12:00 -0600 Subject: [PATCH 13/15] Clean up suggestion messages --- bids/layout/layout.py | 26 +++++++------------------- bids/layout/tests/test_layout.py | 6 +++--- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 8c80938ff..f5b313253 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -543,7 +543,7 @@ def count_matches(f): else match for match in matches] return matches if all_ else matches[0] if matches else None - def _sanitize_validate_query(self, target, filters, return_type, invalid_filters): + def _sanitize_validate_query(self, target, filters, return_type, invalid_filters, regex_search): """Sanitize and validate query parameters Parameters @@ -581,7 +581,7 @@ def _suggest(target): potential = list(schema_entities) suggestions = difflib.get_close_matches(target, potential) if suggestions: - message = "Did you mean one of: {}?".format(suggestions) + message = ". Did you mean one of: {}?".format(suggestions) else: message = "" return message @@ -592,7 +592,7 @@ def _suggest(target): 'entity must also be specified.') if target not in schema_entities: - raise TargetError(f"Unknown target '{target}'. {_suggest(target)}") + raise TargetError(f"Unknown target '{target}'{_suggest(target)}") if invalid_filters != 'allow': bad_filters = set(filters.keys()) - set(schema_entities) @@ -604,22 +604,10 @@ def _suggest(target): first_bad = list(bad_filters)[0] message = _suggest(first_bad) raise ValueError( - f"Unknown entity '{first_bad}'. {message}. If you're sure you want to impose " + \ + f"Unknown entity '{first_bad}'{message} If you're sure you want to impose " + \ "this constraint, set invalid_filters='allow'.") - # Process Query Enum - if filters: - for k, val in filters.items(): - if val == Query.REQUIRED: - val = '.+' - - elif val == Query.OPTIONAL: - val = '.*' - - elif val == Query.NONE: - pass - - return target, filters + return target, filters, regex_search def get(self, return_type: str = 'object', target: str = None, scope: str = None, extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None, @@ -681,8 +669,8 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None regex_search = self._regex_search # Sanitize & validate query - target, filters = self._sanitize_validate_query(target, filters, return_type, - invalid_filters) + target, filters, regex_search = self._sanitize_validate_query(target, filters, return_type, + invalid_filters, regex_search) folder = self.dataset diff --git a/bids/layout/tests/test_layout.py b/bids/layout/tests/test_layout.py index 7ccc170ac..8674efb4c 100644 --- a/bids/layout/tests/test_layout.py +++ b/bids/layout/tests/test_layout.py @@ -233,13 +233,13 @@ def test_get_metadata_error(layout_7t_trt): with pytest.raises(KeyError) as err: result['Missing'] -# Changed this test to not expect 'reconstruction' and 'proc' -# which AFAIK ae not in 7t_trt +# Changed this test to only expect suggestions if target is close def test_get_with_bad_target(layout_7t_trt): with pytest.raises(TargetError) as exc: layout_7t_trt.get(target='unicorn', return_type='id') msg = str(exc.value) - assert 'subject' in msg and 'session' in msg and 'acquisition' in msg + assert msg == "Unknown target 'unicorn'" + with pytest.raises(TargetError) as exc: layout_7t_trt.get(target='sub', return_type='id') msg = str(exc.value) From 69e907080e21a9892cde60752c41bdf64e896cbf Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Tue, 28 Feb 2023 14:12:11 -0600 Subject: [PATCH 14/15] Add attempt at Query Enum --- bids/layout/layout.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index f5b313253..735ca0b17 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -607,6 +607,19 @@ def _suggest(target): f"Unknown entity '{first_bad}'{message} If you're sure you want to impose " + \ "this constraint, set invalid_filters='allow'.") + # Process Query Enum + if filters: + for k, val in filters.items(): + if val in [a for a in Query]: + regex_search = True # Force true if these are defined + if val == Query.REQUIRED: + filters[k] = '.+' + elif val == Query.OPTIONAL: + filters[k] = '.*' + elif val == Query.NONE: + # Regex for match no value -- guess from Copilot + filters[k] = '^(?!.*[^/])' + return target, filters, regex_search def get(self, return_type: str = 'object', target: str = None, scope: str = None, From 58790d7627ca9fd7b99b1172eb68471208be3e5f Mon Sep 17 00:00:00 2001 From: Alejandro de la Vega Date: Tue, 28 Feb 2023 14:37:53 -0600 Subject: [PATCH 15/15] Update bids/layout/layout.py Co-authored-by: Chris Markiewicz --- bids/layout/layout.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/bids/layout/layout.py b/bids/layout/layout.py index 735ca0b17..44aabd63e 100644 --- a/bids/layout/layout.py +++ b/bids/layout/layout.py @@ -610,15 +610,14 @@ def _suggest(target): # Process Query Enum if filters: for k, val in filters.items(): - if val in [a for a in Query]: - regex_search = True # Force true if these are defined - if val == Query.REQUIRED: - filters[k] = '.+' - elif val == Query.OPTIONAL: - filters[k] = '.*' - elif val == Query.NONE: - # Regex for match no value -- guess from Copilot - filters[k] = '^(?!.*[^/])' + if val == Query.OPTIONAL: + del filters[k] + elif val == Query.REQUIRED: + regex_search = True # Force true if these are defined + filters[k] = '.+' + elif val == Query.NONE: + regex_search = True + filters[k] = '^$' return target, filters, regex_search