diff --git a/src/bidsi/bids_model.py b/src/bidsi/bids_model.py index e2d95db..c35d032 100644 --- a/src/bidsi/bids_model.py +++ b/src/bidsi/bids_model.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re from dataclasses import dataclass from functools import cached_property from pathlib import Path @@ -35,6 +36,19 @@ def relative_entity_dir(self, entity: BidsEntity) -> Path: rel_path /= entity.datatype return rel_path + def _clean_bids_field(self, value: str) -> str: + """Clean BIDS field value. + + Replaces all non-alphanumeric characters. + + Args: + value (str): Value to clean. + + Returns: + str: Cleaned value. + """ + return re.sub(r"[^a-zA-Z0-9]", "", value) + def relative_entity_path(self, entity: BidsEntity) -> Path: """Return relative path to entity.""" return self.relative_entity_dir(entity) / self.entity_formatter(entity) @@ -126,7 +140,39 @@ def subject_ids(self) -> List[str]: return list(set([entity.subject_id for entity in self.entities])) -@dataclass(frozen=True) +class EntityFieldStringDescriptor: + """Descriptor Object for BIDS fields. + + Cleans field values using a regular expression. + """ + + def __init__(self, *, clean_regex: str, default: Optional[str] = None) -> None: + """Initialize EntityFieldStringDescriptor.""" + self._default = default + self._clean_regex = clean_regex + + def __set_name__(self, owner: type[BidsEntity], name: str) -> None: + """Set name with underscore prefix.""" + self._name = "_" + name + + def __get__(self, obj: BidsEntity, type: type[BidsEntity]) -> str: + """Get value or default.""" + if obj is None: + raise AttributeError("No default value for BidsEntityStringDescriptor.") + + return str(getattr(obj, self._name, self._default)) + + def __set__(self, obj: object, value: str) -> None: + """Set value after substituting clean_regex matches with empty string.""" + value = re.sub(self._clean_regex, "", value) + setattr(obj, self._name, value) + + +# TODO: Add support for cleaning optional fields. +# TODO: Can BidsEntity be frozen? + + +@dataclass class BidsEntity: """Model of BIDS entity, a representation of data within the BIDS structure. @@ -134,10 +180,18 @@ class BidsEntity: Only one of file or tabular_data should be set. """ - subject_id: str - datatype: str - task_name: str - suffix: str + subject_id: EntityFieldStringDescriptor = EntityFieldStringDescriptor( + clean_regex=r"[^a-zA-Z0-9]" + ) + datatype: EntityFieldStringDescriptor = EntityFieldStringDescriptor( + clean_regex=r"[^a-zA-Z0-9]" + ) + task_name: EntityFieldStringDescriptor = EntityFieldStringDescriptor( + clean_regex=r"[^a-zA-Z0-9]" + ) + suffix: EntityFieldStringDescriptor = EntityFieldStringDescriptor( + clean_regex=r"[^a-zA-Z0-9]" + ) session_id: Optional[str] = None metadata: Optional[Dict[str, str]] = None file_path: Optional[Path] = None @@ -152,6 +206,13 @@ def is_tabular_data(self) -> bool: """Return True if entity is tabular data.""" return self.tabular_data is not None + @classmethod + def bids_field( + cls, clean_regex: str, default: Optional[str] = None + ) -> EntityFieldStringDescriptor: + """Return BidsEntity field descriptor.""" + return EntityFieldStringDescriptor(clean_regex=clean_regex, default=default) + class BidsBuilder: """Builder for BIDS Model.""" diff --git a/src/bidsi/bids_writer.py b/src/bidsi/bids_writer.py index e079c17..3c66dc1 100644 --- a/src/bidsi/bids_writer.py +++ b/src/bidsi/bids_writer.py @@ -15,7 +15,6 @@ from .bids_model import BidsBuilder, BidsConfig, BidsEntity, BidsModel LOG = logging.getLogger(__name__) -LOG.setLevel(logging.DEBUG) class MergeStrategy(Enum): @@ -24,16 +23,21 @@ class MergeStrategy(Enum): NO_MERGE: Do not merge, exit with error on conflict. OVERWRITE: Overwrite existing files on conflict. KEEP: Keep existing files on conflict. - RENAME_FILE: Rename files on conflict using run-label increments. - NEW_SESSION: Write new session folder for all data additions. + RENAME_SEQUENTIAL: Rename files on conflict using run-label increments. """ - UNKNOWN_MERGE = 0 - NO_MERGE = 1 - OVERWRITE = 2 - KEEP = 3 - RENAME_ENTITIES = 5 - NEW_SESSION = 6 + # Do not merge, only proceed with empty BIDS root. + # Exit with error on conflict. Default. + NO_MERGE = 0 + + # Overwrite existing files on conflict. + OVERWRITE = 1 + + # Keep existing files on conflict. + KEEP = 2 + + # Rename files on conflict using run-label increments. + RENAME_SEQUENTIAL = 3 def __str__(self) -> str: """Return string representation of MergeStrategy.""" @@ -80,7 +84,7 @@ def __exit__( return self.write() def builder(self) -> BidsBuilder: - """Return BIDS builder.""" + """Return BIDS builder, creating new if does not already exist.""" if self._builder is None: self._builder = BidsBuilder() return self._builder @@ -103,7 +107,7 @@ def _merge_tsv(self, path: Path, data: pd.DataFrame) -> None: data.to_csv(path, sep="\t", index=False) def _ensure_directory_path(self, path: Path, is_dir: bool = False) -> None: - """Ensure directory path exists.""" + """Ensure directory path, or path to parent dir of file exists, or create.""" if is_dir: if not path.exists(): path.mkdir(parents=True, exist_ok=True) @@ -133,32 +137,26 @@ def _merge_entity( return elif self._entity_merge_strategy == MergeStrategy.KEEP: return - elif self._entity_merge_strategy == MergeStrategy.RENAME_ENTITIES: - raise NotImplementedError("RENAME_ENTITIES merge strategy not implemented.") - elif self._entity_merge_strategy == MergeStrategy.NEW_SESSION: - raise NotImplementedError("New session merge strategy not implemented.") else: raise ValueError(f"Unknown merge strategy {self._entity_merge_strategy}.") def write(self) -> bool: """Write BIDS structure to disk.""" if self._bids is None and self._builder is None: - raise ValueError("No BIDS model to write.") + raise ValueError("No BIDS model or builder to write.") if self._bids is None and self._builder is not None: self._bids = self._builder.build() # Unwrap Optional value for type-checking. if self._bids is None: - raise ValueError("No BIDS model or BIDS Builder to write.") + raise ValueError("No BIDS model to write.") # Write BIDS structure # Confirm root LOG.info(f"Writing BIDS structure to {self._bids_root}") - self._bids_root.mkdir(parents=True, exist_ok=True) + self._ensure_directory_path(self._bids_root, is_dir=True) if len(list(self._bids_root.iterdir())) > 0: - if self._entity_merge_strategy == MergeStrategy.UNKNOWN_MERGE: - raise ValueError("BIDS root is not empty, merge strategy required.") if self._entity_merge_strategy == MergeStrategy.NO_MERGE: raise ValueError("BIDS root is not empty, cannot merge.") @@ -178,11 +176,12 @@ def write(self) -> bool: # Write subject folders for entity in self._bids.entities: - LOG.info(f"Writing entity {entity.subject_id}") if entity.file_path is not None: + LOG.info(f"Writing Path entity {entity.subject_id}") fp = entity.file_path self._merge_entity(entity, lambda path: shutil.copy2(fp, path)) elif entity.tabular_data is not None: + LOG.info(f"Writing tabular data entity {entity.subject_id}") tb = entity.tabular_data self._merge_entity( entity, diff --git a/tests/test_bids_model.py b/tests/test_bids_model.py new file mode 100644 index 0000000..ca71f3c --- /dev/null +++ b/tests/test_bids_model.py @@ -0,0 +1,39 @@ +"""Tests for the BidsEntity model.""" + +from bidsi import BidsEntity + + +def test_bids_entity_descriptor_cleans_values() -> None: + """Test that BidsEntity descriptor cleans values.""" + bids = BidsEntity( + subject_id="01_AZ", + task_name="*task*00", + datatype="func_", + run_id="1", + suffix=".suffix=", + metadata={"ke_y": "_va_lue"}, + session_id="-01-az", + ) + assert bids.subject_id == "01AZ" + assert bids.task_name == "task00" + assert bids.datatype == "func" + assert bids.run_id == "1" + assert bids.suffix == "suffix" + + +def test_bids_entity_descriptor_cleans_optional_values() -> None: + """Test that BidsEntity descriptor cleans values.""" + bids = BidsEntity( + subject_id="01_AZ", + task_name="*task*00", + datatype="func_", + run_id="1", + suffix=".suffix=", + metadata={"ke_y": "_va_lue"}, + session_id="-01-az", + ) + assert bids.subject_id == "01AZ" + assert bids.task_name == "task00" + assert bids.datatype == "func" + assert bids.run_id == "1" + assert bids.suffix == "suffix"