Skip to content

Commit

Permalink
Clean BidsEntity fields, add test
Browse files Browse the repository at this point in the history
  • Loading branch information
gsch-cmi committed Jun 4, 2024
1 parent 6d3e365 commit 16c65cc
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 26 deletions.
71 changes: 66 additions & 5 deletions src/bidsi/bids_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import re
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
Expand Down Expand Up @@ -35,6 +36,19 @@ def relative_entity_dir(self, entity: BidsEntity) -> Path:
rel_path /= entity.datatype
return rel_path

def _clean_bids_field(self, value: str) -> str:
"""Clean BIDS field value.
Replaces all non-alphanumeric characters.
Args:
value (str): Value to clean.
Returns:
str: Cleaned value.
"""
return re.sub(r"[^a-zA-Z0-9]", "", value)

def relative_entity_path(self, entity: BidsEntity) -> Path:
"""Return relative path to entity."""
return self.relative_entity_dir(entity) / self.entity_formatter(entity)
Expand Down Expand Up @@ -126,18 +140,58 @@ def subject_ids(self) -> List[str]:
return list(set([entity.subject_id for entity in self.entities]))


@dataclass(frozen=True)
class EntityFieldStringDescriptor:
"""Descriptor Object for BIDS fields.
Cleans field values using a regular expression.
"""

def __init__(self, *, clean_regex: str, default: Optional[str] = None) -> None:
"""Initialize EntityFieldStringDescriptor."""
self._default = default
self._clean_regex = clean_regex

def __set_name__(self, owner: type[BidsEntity], name: str) -> None:
"""Set name with underscore prefix."""
self._name = "_" + name

def __get__(self, obj: BidsEntity, type: type[BidsEntity]) -> str:
"""Get value or default."""
if obj is None:
raise AttributeError("No default value for BidsEntityStringDescriptor.")

return str(getattr(obj, self._name, self._default))

def __set__(self, obj: object, value: str) -> None:
"""Set value after substituting clean_regex matches with empty string."""
value = re.sub(self._clean_regex, "", value)
setattr(obj, self._name, value)


# TODO: Add support for cleaning optional fields.
# TODO: Can BidsEntity be frozen?


@dataclass
class BidsEntity:
"""Model of BIDS entity, a representation of data within the BIDS structure.
Entities can be files or tabular data.
Only one of file or tabular_data should be set.
"""

subject_id: str
datatype: str
task_name: str
suffix: str
subject_id: EntityFieldStringDescriptor = EntityFieldStringDescriptor(
clean_regex=r"[^a-zA-Z0-9]"
)
datatype: EntityFieldStringDescriptor = EntityFieldStringDescriptor(
clean_regex=r"[^a-zA-Z0-9]"
)
task_name: EntityFieldStringDescriptor = EntityFieldStringDescriptor(
clean_regex=r"[^a-zA-Z0-9]"
)
suffix: EntityFieldStringDescriptor = EntityFieldStringDescriptor(
clean_regex=r"[^a-zA-Z0-9]"
)
session_id: Optional[str] = None
metadata: Optional[Dict[str, str]] = None
file_path: Optional[Path] = None
Expand All @@ -152,6 +206,13 @@ def is_tabular_data(self) -> bool:
"""Return True if entity is tabular data."""
return self.tabular_data is not None

@classmethod
def bids_field(
cls, clean_regex: str, default: Optional[str] = None
) -> EntityFieldStringDescriptor:
"""Return BidsEntity field descriptor."""
return EntityFieldStringDescriptor(clean_regex=clean_regex, default=default)


class BidsBuilder:
"""Builder for BIDS Model."""
Expand Down
41 changes: 20 additions & 21 deletions src/bidsi/bids_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from .bids_model import BidsBuilder, BidsConfig, BidsEntity, BidsModel

LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)


class MergeStrategy(Enum):
Expand All @@ -24,16 +23,21 @@ class MergeStrategy(Enum):
NO_MERGE: Do not merge, exit with error on conflict.
OVERWRITE: Overwrite existing files on conflict.
KEEP: Keep existing files on conflict.
RENAME_FILE: Rename files on conflict using run-label increments.
NEW_SESSION: Write new session folder for all data additions.
RENAME_SEQUENTIAL: Rename files on conflict using run-label increments.
"""

UNKNOWN_MERGE = 0
NO_MERGE = 1
OVERWRITE = 2
KEEP = 3
RENAME_ENTITIES = 5
NEW_SESSION = 6
# Do not merge, only proceed with empty BIDS root.
# Exit with error on conflict. Default.
NO_MERGE = 0

# Overwrite existing files on conflict.
OVERWRITE = 1

# Keep existing files on conflict.
KEEP = 2

# Rename files on conflict using run-label increments.
RENAME_SEQUENTIAL = 3

def __str__(self) -> str:
"""Return string representation of MergeStrategy."""
Expand Down Expand Up @@ -80,7 +84,7 @@ def __exit__(
return self.write()

def builder(self) -> BidsBuilder:
"""Return BIDS builder."""
"""Return BIDS builder, creating new if does not already exist."""
if self._builder is None:
self._builder = BidsBuilder()
return self._builder
Expand All @@ -103,7 +107,7 @@ def _merge_tsv(self, path: Path, data: pd.DataFrame) -> None:
data.to_csv(path, sep="\t", index=False)

def _ensure_directory_path(self, path: Path, is_dir: bool = False) -> None:
"""Ensure directory path exists."""
"""Ensure directory path, or path to parent dir of file exists, or create."""
if is_dir:
if not path.exists():
path.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -133,32 +137,26 @@ def _merge_entity(
return
elif self._entity_merge_strategy == MergeStrategy.KEEP:
return
elif self._entity_merge_strategy == MergeStrategy.RENAME_ENTITIES:
raise NotImplementedError("RENAME_ENTITIES merge strategy not implemented.")
elif self._entity_merge_strategy == MergeStrategy.NEW_SESSION:
raise NotImplementedError("New session merge strategy not implemented.")
else:
raise ValueError(f"Unknown merge strategy {self._entity_merge_strategy}.")

def write(self) -> bool:
"""Write BIDS structure to disk."""
if self._bids is None and self._builder is None:
raise ValueError("No BIDS model to write.")
raise ValueError("No BIDS model or builder to write.")

if self._bids is None and self._builder is not None:
self._bids = self._builder.build()

# Unwrap Optional value for type-checking.
if self._bids is None:
raise ValueError("No BIDS model or BIDS Builder to write.")
raise ValueError("No BIDS model to write.")

# Write BIDS structure
# Confirm root
LOG.info(f"Writing BIDS structure to {self._bids_root}")
self._bids_root.mkdir(parents=True, exist_ok=True)
self._ensure_directory_path(self._bids_root, is_dir=True)
if len(list(self._bids_root.iterdir())) > 0:
if self._entity_merge_strategy == MergeStrategy.UNKNOWN_MERGE:
raise ValueError("BIDS root is not empty, merge strategy required.")
if self._entity_merge_strategy == MergeStrategy.NO_MERGE:
raise ValueError("BIDS root is not empty, cannot merge.")

Expand All @@ -178,11 +176,12 @@ def write(self) -> bool:

# Write subject folders
for entity in self._bids.entities:
LOG.info(f"Writing entity {entity.subject_id}")
if entity.file_path is not None:
LOG.info(f"Writing Path entity {entity.subject_id}")
fp = entity.file_path
self._merge_entity(entity, lambda path: shutil.copy2(fp, path))
elif entity.tabular_data is not None:
LOG.info(f"Writing tabular data entity {entity.subject_id}")
tb = entity.tabular_data
self._merge_entity(
entity,
Expand Down
39 changes: 39 additions & 0 deletions tests/test_bids_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Tests for the BidsEntity model."""

from bidsi import BidsEntity


def test_bids_entity_descriptor_cleans_values() -> None:
"""Test that BidsEntity descriptor cleans values."""
bids = BidsEntity(
subject_id="01_AZ",
task_name="*task*00",
datatype="func_",
run_id="1",
suffix=".suffix=",
metadata={"ke_y": "_va_lue"},
session_id="-01-az",
)
assert bids.subject_id == "01AZ"
assert bids.task_name == "task00"
assert bids.datatype == "func"
assert bids.run_id == "1"
assert bids.suffix == "suffix"


def test_bids_entity_descriptor_cleans_optional_values() -> None:
"""Test that BidsEntity descriptor cleans values."""
bids = BidsEntity(
subject_id="01_AZ",
task_name="*task*00",
datatype="func_",
run_id="1",
suffix=".suffix=",
metadata={"ke_y": "_va_lue"},
session_id="-01-az",
)
assert bids.subject_id == "01AZ"
assert bids.task_name == "task00"
assert bids.datatype == "func"
assert bids.run_id == "1"
assert bids.suffix == "suffix"

0 comments on commit 16c65cc

Please sign in to comment.