Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Concatenate across directions as well as runs #965

Merged
merged 6 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions xcp_d/tests/test_utils_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,52 @@ def test_get_entity(datasets):
)
with pytest.raises(ValueError, match="Unknown space"):
xbids.get_entity(fname, "space")


def test_group_across_runs():
"""Test group_across_runs."""
in_files = [
"/path/sub-01_task-axcpt_run-03_bold.nii.gz",
"/path/sub-01_task-rest_run-03_bold.nii.gz",
"/path/sub-01_task-rest_run-01_bold.nii.gz",
"/path/sub-01_task-axcpt_run-02_bold.nii.gz",
"/path/sub-01_task-rest_run-02_bold.nii.gz",
"/path/sub-01_task-axcpt_run-01_bold.nii.gz",
]
grouped_files = xbids.group_across_runs(in_files)
assert isinstance(grouped_files, list)
assert len(grouped_files[0]) == 3
assert grouped_files[0] == [
"/path/sub-01_task-axcpt_run-01_bold.nii.gz",
"/path/sub-01_task-axcpt_run-02_bold.nii.gz",
"/path/sub-01_task-axcpt_run-03_bold.nii.gz",
]
assert len(grouped_files[1]) == 3
assert grouped_files[1] == [
"/path/sub-01_task-rest_run-01_bold.nii.gz",
"/path/sub-01_task-rest_run-02_bold.nii.gz",
"/path/sub-01_task-rest_run-03_bold.nii.gz",
]

in_files = [
"/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
]
grouped_files = xbids.group_across_runs(in_files)
assert isinstance(grouped_files, list)
assert len(grouped_files[0]) == 2
assert grouped_files[0] == [
"/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
]
assert len(grouped_files[1]) == 4
assert grouped_files[1] == [
"/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
"/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
]
27 changes: 21 additions & 6 deletions xcp_d/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,11 @@ def get_entity(filename, entity):


def group_across_runs(in_files):
"""Group preprocessed BOLD files by unique sets of entities, ignoring run.
"""Group preprocessed BOLD files by unique sets of entities, ignoring run and direction.

We only ignore direction for the sake of HCP.
This may lead to small problems for non-HCP datasets that differentiate scans based on
both run and direction.

Parameters
----------
Expand All @@ -913,20 +917,31 @@ def group_across_runs(in_files):

# First, extract run information and sort the input files by the runs,
# so that any cases where files are not already in ascending run order get fixed.
run_numbers = []
run_numbers, directions = [], []
for in_file in in_files:
run = get_entity(in_file, "run")
if run is None:
run = 0

direction = get_entity(in_file, "dir")
if direction is None:
direction = "none"

run_numbers.append(int(run))
directions.append(direction)

# Combine the three lists into a list of tuples
combined_data = list(zip(run_numbers, directions, in_files))

# Sort the list of tuples first by run and then by direction
sorted_data = sorted(combined_data, key=lambda x: (x[0], x[1], x[2]))

# Sort the files by the run numbers.
zipped_pairs = zip(run_numbers, in_files)
sorted_in_files = [x for _, x in sorted(zipped_pairs)]
# Sort the file list
sorted_in_files = [item[2] for item in sorted_data]

# Extract the unique sets of entities (i.e., the filename, minus the run entity).
# Extract the unique sets of entities (i.e., the filename, minus the run and dir entities).
unique_filenames = [re.sub("_run-[0-9]+_", "_", os.path.basename(f)) for f in sorted_in_files]
unique_filenames = [re.sub("_dir-[0-9a-zA-Z]+_", "_", f) for f in unique_filenames]

# Assign each in_file to a group of files with the same entities, except run.
out_files, grouped_unique_filenames = [], []
Expand Down
2 changes: 1 addition & 1 deletion xcp_d/workflows/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ def init_subject_wf(
)

n_runs = len(preproc_files)
preproc_files = group_across_runs(preproc_files)
preproc_files = group_across_runs(preproc_files) # group files across runs and directions
run_counter = 0
for ent_set, task_files in enumerate(preproc_files):
# Assuming TR is constant across runs for a given combination of entities.
Expand Down
4 changes: 2 additions & 2 deletions xcp_d/workflows/concatenation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def init_concatenate_data_wf(
dcan_qc,
name="concatenate_data_wf",
):
"""Concatenate postprocessed data.
"""Concatenate postprocessed data across runs and directions.

Workflow Graph
.. workflow::
Expand Down Expand Up @@ -99,7 +99,7 @@ def init_concatenate_data_wf(
workflow = Workflow(name=name)

workflow.__desc__ = """
Postprocessing derivatives from multi-run tasks were then concatenated across runs.
Postprocessing derivatives from multi-run tasks were then concatenated across runs and directions.
"""

inputnode = pe.Node(
Expand Down
Loading