bids-standard · leej3 · May 11, 2021 · May 11, 2021 · May 11, 2021 · May 11, 2021
diff --git a/bids/__init__.py b/bids/__init__.py
@@ -13,7 +13,8 @@
     "layout",
     "reports",
     "utils",
-    "variables"
+    "variables",
+    "statsmodels_design_synthesizer",
 ]
 
 due.cite(Doi("10.1038/sdata.2016.44"),

diff --git a/bids/modeling/transformations/base.py b/bids/modeling/transformations/base.py
@@ -11,10 +11,9 @@
 import pandas as pd
 
 from bids.utils import listify, convert_JSON
-from bids.variables import SparseRunVariable
+from bids.variables import SparseRunVariable, BIDSRunVariableCollection
 from bids.modeling import transformations as pbt
 
-
 class Transformation(metaclass=ABCMeta):
 
     ### Class-level settings ###
@@ -405,13 +404,13 @@ class TransformerManager(object):
             If None, the PyBIDS transformations module is used.
     """
 
-    def __init__(self, default=None):
+    def __init__(self, default=None, save_pre_dense=False):
         self.transformations = {}
         if default is None:
             # Default to PyBIDS transformations
             default = pbt
         self.default = default
-
+        self.save_pre_dense = save_pre_dense
     def _sanitize_name(self, name):
         """ Replace any invalid/reserved transformation names with acceptable
         equivalents.
@@ -448,6 +447,7 @@ def transform(self, collection, transformations):
         transformations : list
             List of transformations to apply.
         """
+        changed_vars = []
         for t in transformations:
             t = convert_JSON(t) # make sure all keys are snake case
             kwargs = dict(t)
@@ -462,5 +462,36 @@ def transform(self, collection, transformations):
                                      "explicitly register a handler, or pass a"
                                      " default module that supports it." % name)
                 func = getattr(self.default, name)
-                func(collection, cols, **kwargs)
+
+            # check for sparse variables here and save them
+            # We want everything sparse (the last time it was sparse during the
+            # transformation process) and everything that is dense at the end
+            # of the transformations. This will allow downstream users to add
+            # convolutions etc. as they please.
+            matching_sparse_cols = []
+            if self.save_pre_dense:
+                for variable in collection.match_variables(cols, return_type='variable'):
+                    if isinstance(variable, SparseRunVariable):
+                        matching_sparse_cols.append(variable.clone())
+
+            func(collection, cols, **kwargs)
+
+            # check here to see if those variables are still sparse
+            # if so, continue, if not, save the sparse variables prior to transformation
+            if len(matching_sparse_cols) > 0:
+                for variable in matching_sparse_cols:
+                    name = variable.name
+                    matching_post_tfm = collection.match_variables(name, return_type='variable')
+                    assert len(matching_post_tfm) < 2
+                    if (len(matching_post_tfm) == 0) or not isinstance(matching_post_tfm[0], SparseRunVariable):
+                        changed_vars.append(variable)
+
+        if self.save_pre_dense:
+            if len(changed_vars) > 0:
+                changed_vars = BIDSRunVariableCollection(changed_vars)
+                assert np.all([isinstance(vv, SparseRunVariable) for vv in changed_vars.variables.values()])
+                return collection, changed_vars
+            else:
+                return collection, None
         return collection
+
diff --git a/bids/statsmodels_design_synthesizer.py b/bids/statsmodels_design_synthesizer.py
@@ -0,0 +1,122 @@
+#! /usr/bin/env python
+import argparse
+import sys
+import json
+from pathlib import Path
+import pandas as pd
+import numpy as np
+from collections import namedtuple
+from bids.modeling import transformations
+from bids.utils import convert_JSON
+from bids.variables import BIDSRunVariableCollection, SparseRunVariable, merge_collections
+from bids.layout.utils import parse_file_entities
+from bids.variables.io import get_events_collection, parse_transforms
+from bids.variables.entities import RunNode
+import click
+
+from . import __version__
+
+
+@click.command()
+@click.version_option(__version__, prog_name='statsmodels_design_sythesizer')
+@click.option(
+    "--events-tsv", required=True, help="Path to events TSV")
+@click.option(
+        "--transforms", required=True, help="Path to transform or model json"
+    )
+@click.option(
+        "--nvol", required=True, type=int, help="Number of volumes in func time-series"
+    )
+@click.option(
+        "--tr", required=True, type=float, help="TR for func time series"
+    )
+@click.option(
+    "--ta", required=True, type=float, help="TA for events")
+@click.option(
+        "--output-sampling-rate",
+        required=False,
+        type=float,
+        help="Output sampling rate in Hz when a full design matrix is desired.",
+    )
+@click.option(
+        "--output-dir",
+        required=False,
+        help="Path to directory to write processed event files.",
+    )
+def main(**kwargs):
+    statsmodels_design_synthesizer(**kwargs)
+
+
+def  statsmodels_design_synthesizer(
+    *,
+    events_tsv,
+    transforms,
+    nvol,
+    tr,
+    ta,
+    output_sampling_rate=None,
+    output_dir=None,
+ ):
+
+    output_dir = Path(output_dir  or "design_synthesizer")
+    output_dir.mkdir(exist_ok=True) 
+    model_transforms = parse_transforms(transforms)
+    duration = nvol * tr
+
+    # Get relevant collection
+    coll_df = pd.read_csv(events_tsv, delimiter="\t")
+    RunInfo = namedtuple('RunInfo', ['entities', 'duration', 'tr', 'image', 'n_vols'])
+
+    #run_info = RunInfo(parse_file_entities(events_tsv), duration)
+    # TODO: this will need to be implemented without RunNode to break cyclic
+    # dependencies if transformations is to be extracted
+    run_info = RunInfo(parse_file_entities(events_tsv), duration, tr, None, nvol)
+    coll = BIDSRunVariableCollection(get_events_collection(coll_df, run_info))
+
+    # perform transformations, additionally save variables that were changed.
+    # If a column is transformed but not densified it will not be in
+    # colls_pre_densification.
+    colls, colls_pre_densification = (
+        transformations.TransformerManager(save_pre_dense=True)
+        .transform(coll, model_transforms)
+        )
+
+    # Save sparse vars
+    if colls_pre_densification is not None:
+        final_sparse_colls = BIDSRunVariableCollection(colls.get_sparse_variables())
+        final_sparse_names = set([vv for vv in final_sparse_colls.variables])
+        pre_dense_names = set([vv for vv in colls_pre_densification.variables])
+        shared_names = final_sparse_names.intersection(pre_dense_names)
+
+        if len(shared_names) > 0:
+            raise ValueError(
+        f"""Somehow you've ended up with a copy of {shared_names} in both the final
+        transformed variables and in the pre-densification variables. Did you delete a
+        variable and recreate one with same name?"""
+        )
+        output = merge_collections(
+            [colls_pre_densification, final_sparse_colls]
+        )
+        assert output.all_sparse()
+
+        df_sparse = output.to_df()
+    else:
+        df_sparse = colls.to_df(include_dense=False)
+
+    df_sparse.to_csv(output_dir / "transformed_events.tsv", index=None, sep="\t", na_rep="n/a")
+    # Save dense vars
+    try:
+        df_dense = colls.to_df(include_sparse=False)
+        df_dense.to_csv(output_dir / "transformed_time_series.tsv", index=None, sep="\t", na_rep="n/a")
+    except ValueError:
+        pass
+
+    # Save full design_matrix
+    if output_sampling_rate:
+        df_full = colls.to_df(sampling_rate=output_sampling_rate)
+        df_full.to_csv(output_dir / "aggregated_design.tsv", index=None, sep="\t", na_rep="n/a")
+
+
+
+if __name__ == "__main__":
+    sys.exit(main())  # pragma: no cover""Main module."""
diff --git a/bids/tests/data/ds005/models/ds-005_type-convolution_model.json b/bids/tests/data/ds005/models/ds-005_type-convolution_model.json
@@ -0,0 +1,109 @@
+{
+    "Name": "test_model",
+    "Description": "simple test model",
+    "Nodes": [
+        {
+            "Name": "run",
+            "Level": "Run",
+            "GroupBy": [
+                "run",
+                "subject"
+            ],
+            "Model": {
+                "X": [
+                    "RT",
+                    "gain"
+                ],
+                "Formula": "0 + RT * gain"
+            },
+            "Transformations": [
+                {
+                    "Name": "Factor",
+                    "Input": "trial_type"
+                },
+                {
+                    "Name": "Rename",
+                    "Input": "trial_type.parametric gain",
+                    "Output": "gain"
+                },
+                {
+                    "Name": "Threshold",
+                    "Input": "respcat",
+                    "Output": "pos_respcat",
+                    "Binarize": true
+                },
+                {
+                    "Name": "Scale",
+                    "Input": "RT"
+                },
+                {
+                  "Name": "Convolve",
+                  "Input": ["gain", "pos_respcat"],
+                  "Model": "spm"
+                }
+                     ],
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        },
+        {
+            "Name": "participant",
+            "Level": "Subject",
+            "Model": {
+                "X": [
+                    "@intercept"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "FEMA"
+            }
+        },
+        {
+            "Name": "by-group",
+            "Level": "Dataset",
+            "Model": {
+                "X": [
+                    "@intercept"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        },
+        {
+            "Name": "group-diff",
+            "Level": "Dataset",
+            "Model": {
+                "X": [
+                    "@intercept",
+                    "sex"
+                ]
+            },
+            "DummyContrasts": {
+                "Type": "t"
+            }
+        }
+    ],
+    "Edges": [
+        {
+            "Source": "run",
+            "Destination": "participant",
+            "GroupBy": [
+                "subject",
+                "contrast"
+            ]
+        },
+        {
+            "Source": "participant",
+            "Destination": "by-group",
+            "GroupBy": [
+                "sex"
+            ]
+        },
+        {
+            "Source": "participant",
+            "Destination": "group-diff",
+            "GroupBy": []
+        }
+    ]
+}