Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add support for experimental design aggregation #724

Draft
wants to merge 38 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
27c21bf
add initial prototype of design aggregation tool
leej3 May 11, 2021
bc6cd91
tidy arg parsing
leej3 May 11, 2021
bba5284
move get_events_collection_to variables.io
Shotgunosine May 11, 2021
0d2a888
fix data path during testing
leej3 May 11, 2021
4e41306
fix event file reading
leej3 May 11, 2021
14d32df
tidy get_events_collection
leej3 May 12, 2021
0fb031b
move loading of regressor into function
leej3 May 12, 2021
8ba2902
move loading of phys and stim files into function
leej3 May 12, 2021
605c2c0
output sampling rate not needed for reading input
leej3 May 12, 2021
b115bc0
move source logic out of get_rec_collection
leej3 May 12, 2021
63f84a5
will not drop na in records or reg collections for now
leej3 May 12, 2021
1ee5de1
use tempdir for output during test
leej3 May 12, 2021
31776fa
remove output-tsv arg and start sparse/dense saving
leej3 May 12, 2021
bc39cd6
have tfm manager check for densification or deletion of sparse variables
Shotgunosine May 12, 2021
044386b
parametrize tests
leej3 May 13, 2021
ae83df9
remove stutter
leej3 May 13, 2021
a8fb923
add test for sampling rate with associated fix
leej3 May 13, 2021
ce7a50b
move test output to the pytest temp dir
leej3 May 13, 2021
810f29e
oops
leej3 May 13, 2021
76c0c54
consider the sparse variables
leej3 May 13, 2021
a2fba92
correct indentation bug
leej3 May 13, 2021
4a6dac0
update TODOs
leej3 May 13, 2021
1094c2f
fix sparse var saving
Shotgunosine May 13, 2021
e1a977a
more fixes for sparse/dense
leej3 May 13, 2021
1889e41
add model with convolution
leej3 May 13, 2021
a1764f1
Fix sparse variable filtering
Shotgunosine May 13, 2021
5369664
fix check columns in output dataframes
Shotgunosine May 13, 2021
34a209f
use click for cli
leej3 May 13, 2021
3c57020
enh don't rely on run node for get events collection
Shotgunosine May 13, 2021
979ec10
enh remove run node from rec and reg loading
Shotgunosine May 13, 2021
69c3720
remove params, kwargs no longer captured in params
leej3 May 14, 2021
02cd6fc
add transforms reading function
leej3 May 14, 2021
a9ae623
add additional support for transformation parsing
leej3 May 14, 2021
857c5e7
Apply suggestions from code review
May 29, 2021
520bab8
Merge remote-tracking branch 'origin/master' into add_design_aggregator
leej3 May 29, 2021
6344816
rename and move to cli
leej3 May 29, 2021
bb47b4c
make ta default to tr
leej3 May 29, 2021
14391a9
improve parsing of transforms_in
leej3 May 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bids/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"layout",
"reports",
"utils",
"variables"
"variables",
"statsmodels_design_synthesizer",
]

due.cite(Doi("10.1038/sdata.2016.44"),
Expand Down
41 changes: 36 additions & 5 deletions bids/modeling/transformations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@
import pandas as pd

from bids.utils import listify, convert_JSON
from bids.variables import SparseRunVariable
from bids.variables import SparseRunVariable, BIDSRunVariableCollection
from bids.modeling import transformations as pbt


class Transformation(metaclass=ABCMeta):

### Class-level settings ###
Expand Down Expand Up @@ -405,13 +404,13 @@ class TransformerManager(object):
If None, the PyBIDS transformations module is used.
"""

def __init__(self, default=None):
def __init__(self, default=None, save_pre_dense=False):
self.transformations = {}
if default is None:
# Default to PyBIDS transformations
default = pbt
self.default = default

self.save_pre_dense = save_pre_dense
def _sanitize_name(self, name):
""" Replace any invalid/reserved transformation names with acceptable
equivalents.
Expand Down Expand Up @@ -448,6 +447,7 @@ def transform(self, collection, transformations):
transformations : list
List of transformations to apply.
"""
changed_vars = []
for t in transformations:
t = convert_JSON(t) # make sure all keys are snake case
kwargs = dict(t)
Expand All @@ -462,5 +462,36 @@ def transform(self, collection, transformations):
"explicitly register a handler, or pass a"
" default module that supports it." % name)
func = getattr(self.default, name)
func(collection, cols, **kwargs)

# check for sparse variables here and save them
# We want everything sparse (the last time it was sparse during the
# transformation process) and everything that is dense at the end
# of the transformations. This will allow downstream users to add
# convolutions etc. as they please.
matching_sparse_cols = []
if self.save_pre_dense:
for variable in collection.match_variables(cols, return_type='variable'):
if isinstance(variable, SparseRunVariable):
matching_sparse_cols.append(variable.clone())

func(collection, cols, **kwargs)

# check here to see if those variables are still sparse
# if so, continue, if not, save the sparse variables prior to transformation
if len(matching_sparse_cols) > 0:
for variable in matching_sparse_cols:
name = variable.name
matching_post_tfm = collection.match_variables(name, return_type='variable')
assert len(matching_post_tfm) < 2
if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
changed_vars.append(variable)

if self.save_pre_dense:
if len(changed_vars) > 0:
changed_vars = BIDSRunVariableCollection(changed_vars)
assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()])
return collection, changed_vars
else:
return collection, None
return collection

122 changes: 122 additions & 0 deletions bids/statsmodels_design_synthesizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#! /usr/bin/env python
import argparse
import sys
import json
from pathlib import Path
import pandas as pd
import numpy as np
from collections import namedtuple
from bids.modeling import transformations
from bids.utils import convert_JSON
from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections
from bids.layout.utils import parse_file_entities
from bids.variables.io import get_events_collection, parse_transforms
from bids.variables.entities import RunNode
import click

from . import __version__


@click.command()
@click.version_option(__version__, prog_name='statsmodels_design_sythesizer')
@click.option(
"--events-tsv", required=True, help="Path to events TSV")
@click.option(
"--transforms", required=True, help="Path to transform or model json"
)
@click.option(
"--nvol", required=True, type=int, help="Number of volumes in func time-series"
)
@click.option(
"--tr", required=True, type=float, help="TR for func time series"
)
@click.option(
"--ta", required=True, type=float, help="TA for events")
leej3 marked this conversation as resolved.
Show resolved Hide resolved
@click.option(
"--output-sampling-rate",
required=False,
type=float,
help="Output sampling rate in Hz when a full design matrix is desired.",
)
@click.option(
"--output-dir",
required=False,
help="Path to directory to write processed event files.",
)
def main(**kwargs):
statsmodels_design_synthesizer(**kwargs)


def statsmodels_design_synthesizer(
*,
events_tsv,
transforms,
nvol,
tr,
ta,
output_sampling_rate=None,
output_dir=None,
):

output_dir = Path(output_dir or "design_synthesizer")
output_dir.mkdir(exist_ok=True)
model_transforms = parse_transforms(transforms)
duration = nvol * tr

# Get relevant collection
coll_df = pd.read_csv(events_tsv, delimiter="\t")
RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols'])

#run_info = RunInfo(parse_file_entities(events_tsv), duration)
# TODO: this will need to be implemented without RunNode to break cyclic
# dependencies if transformations is to be extracted
run_info = RunInfo(parse_file_entities(events_tsv), duration, tr, None, nvol)
coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info))

# perform transformations, additionally save variables that were changed.
# If a column is transformed but not densified it will not be in
# colls_pre_densification.
colls, colls_pre_densification = (
transformations.TransformerManager(save_pre_dense=True)
.transform(coll, model_transforms)
)

# Save sparse vars
if colls_pre_densification is not None:
final_sparse_colls = BIDSRunVariableCollection(colls.get_sparse_variables())
final_sparse_names = set([vv for vv in final_sparse_colls.variables])
pre_dense_names = set([vv for vv in colls_pre_densification.variables])
shared_names = final_sparse_names.intersection(pre_dense_names)

if len(shared_names) > 0:
raise ValueError(
f"""Somehow you've ended up with a copy of {shared_names} in both the final
transformed variables and in the pre-densification variables. Did you delete a
variable and recreate one with same name?"""
)
output = merge_collections(
[colls_pre_densification, final_sparse_colls]
)
assert output.all_sparse()

df_sparse = output.to_df()
else:
df_sparse = colls.to_df(include_dense=False)

df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
# Save dense vars
try:
df_dense = colls.to_df(include_sparse=False)
df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
except ValueError:
pass

# Save full design_matrix
if output_sampling_rate:
df_full = colls.to_df(sampling_rate=output_sampling_rate)
df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")



if __name__ == "__main__":
sys.exit(main()) # pragma: no cover""Main module."""
109 changes: 109 additions & 0 deletions bids/tests/data/ds005/models/ds-005_type-convolution_model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"Name": "test_model",
"Description": "simple test model",
"Nodes": [
{
"Name": "run",
"Level": "Run",
"GroupBy": [
"run",
"subject"
],
"Model": {
"X": [
"RT",
"gain"
],
"Formula": "0 + RT * gain"
leej3 marked this conversation as resolved.
Show resolved Hide resolved
},
"Transformations": [
{
"Name": "Factor",
"Input": "trial_type"
},
{
"Name": "Rename",
"Input": "trial_type.parametric gain",
"Output": "gain"
},
{
"Name": "Threshold",
"Input": "respcat",
"Output": "pos_respcat",
"Binarize": true
},
{
"Name": "Scale",
"Input": "RT"
},
{
"Name": "Convolve",
"Input": ["gain", "pos_respcat"],
"Model": "spm"
}
],
"DummyContrasts": {
"Type": "t"
leej3 marked this conversation as resolved.
Show resolved Hide resolved
}
},
{
"Name": "participant",
"Level": "Subject",
"Model": {
"X": [
"@intercept"
leej3 marked this conversation as resolved.
Show resolved Hide resolved
]
},
"DummyContrasts": {
"Type": "FEMA"
}
},
{
"Name": "by-group",
"Level": "Dataset",
"Model": {
"X": [
"@intercept"
]
},
"DummyContrasts": {
"Type": "t"
}
},
{
"Name": "group-diff",
"Level": "Dataset",
"Model": {
"X": [
"@intercept",
"sex"
]
},
"DummyContrasts": {
"Type": "t"
}
}
],
"Edges": [
{
"Source": "run",
"Destination": "participant",
"GroupBy": [
"subject",
"contrast"
]
},
{
"Source": "participant",
"Destination": "by-group",
"GroupBy": [
"sex"
]
},
{
"Source": "participant",
"Destination": "group-diff",
"GroupBy": []
Comment on lines +91 to +106
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GroupBy should now be in the nodes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Shotgunosine could you look at this one.

}
]
}
Loading