coffeine-labs · dengemann · Jul 22, 2023 · Jul 12, 2023 · Jul 12, 2023 · Jul 12, 2023
diff --git a/coffeine/__init__.py b/coffeine/__init__.py
@@ -26,6 +26,6 @@
 
 from .pipelines import make_filter_bank_transformer, make_filter_bank_regressor, make_filter_bank_classifier   # noqa
 
-from .power_features import compute_features  # noqa
+from .power_features import compute_features, get_frequency_bands, compute_coffeine, make_coffeine_df  # noqa
 
 from .spatial_filters import ProjIdentitySpace, ProjCommonSpace, ProjLWSpace, ProjRandomSpace, ProjSPoCSpace  # noqa
diff --git a/coffeine/pipelines.py b/coffeine/pipelines.py
@@ -137,15 +137,15 @@ def make_filter_bank_transformer(
     vectorization_params : dict | None
         The parameters for the vectorization step.
     kernel : None | 'gaussian' | sklearn.Pipeline
-        The Kernel option for kernel regression. If 'gaussian', a Gaussian Kernel
-        will be added per column and the results will be summed over frequencies.
-        If sklearn.pipeline.Pipeline is passed, it should return a meaningful
-        kernel.
+        The Kernel option for kernel regression. If 'gaussian', a Gaussian
+        Kernel will be added per column and the results will be summed over
+        frequencies. If sklearn.pipeline.Pipeline is passed, it should return
+        a meaningful kernel.
     combine_kernels : None | 'sum' | sklearn.pipeline.Pipeline
-        If kernel is used and multiple columns are defined, this option determines
-        how a combined kernel is constructed. 'sum' adds the kernels with equal
-        weights. A custom pipeline pipeline can be passed to implement alternative
-        rules.
+        If kernel is used and multiple columns are defined, this option
+        determines how a combined kernel is constructed. 'sum' adds the
+        kernels with equal weights. A custom pipeline pipeline can be passed to
+        implement alternative rules.
     categorical_interaction : str
         The column in the input data frame containing a binary descriptor
         used to fit 2-way interaction effects.
@@ -233,15 +233,18 @@ def _get_projector_vectorizer(projection, vectorization,  kernel=None):
     # add Kernel options
     if (isinstance(kernel, Pipeline) and not
             isinstance(kernel, (BaseEstimator, TransformerMixin))):
-        raise ValueError('Custom kernel must be an estimator and a transformer).')
+        raise ValueError(
+            'Custom kernel must be an estimator and a transformer).'
+        )
     elif kernel == 'gaussian':
         kernel = (
             'gaussiankernel', GaussianKernel
         )
         combine_kernels = 'sum'
 
     filter_bank_transformer = make_column_transformer(
-        *_get_projector_vectorizer(*steps, kernel=kernel), remainder='passthrough'
+        *_get_projector_vectorizer(*steps, kernel=kernel),
+        remainder='passthrough'
     )
 
     if combine_kernels is not None:

diff --git a/coffeine/power_features.py b/coffeine/power_features.py
@@ -1,34 +1,36 @@
+from typing import Union
 import numpy as np
+import pandas as pd
 from scipy.stats import trim_mean
 from pyriemann.estimation import CospCovariances
 import mne
 from mne.io import BaseRaw
 from mne.epochs import BaseEpochs
 
 
-def _compute_covs_raw(raw, clean_events, frequency_bands, duration):
+def _compute_covs_raw(raw, clean_events, frequency_bands, duration, method):
     covs = list()
     for _, fb in frequency_bands.items():
         rf = raw.copy().load_data().filter(fb[0], fb[1])
         ec = mne.Epochs(
             rf, clean_events, event_id=3000, tmin=0, tmax=duration,
             proj=True, baseline=None, reject=None, preload=False, decim=1,
             picks=None)
-        cov = mne.compute_covariance(ec, method='oas', rank=None)
+        cov = mne.compute_covariance(ec, method=method, rank=None)
         covs.append(cov.data)
     return np.array(covs)
 
 
-def _compute_covs_epochs(epochs, frequency_bands):
+def _compute_covs_epochs(epochs, frequency_bands, method):
     covs = list()
     for _, fb in frequency_bands.items():
         ec = epochs.copy().load_data().filter(fb[0], fb[1])
-        cov = mne.compute_covariance(ec, method='oas', rank=None)
+        cov = mne.compute_covariance(ec, method=method, rank=None)
         covs.append(cov.data)
     return np.array(covs)
 
 
-def _compute_cross_frequency_covs(epochs, frequency_bands):
+def _compute_cross_frequency_covs(epochs, frequency_bands, method):
     epochs_frequency_bands = []
     for ii, (fbname, fb) in enumerate(frequency_bands.items()):
         ef = epochs.copy().load_data().filter(fb[0], fb[1])
@@ -40,7 +42,7 @@ def _compute_cross_frequency_covs(epochs, frequency_bands):
     for e in epochs_frequency_bands[1:]:
         epochs_final.add_channels([e], force_update_info=True)
     n_chan = epochs_final.info['nchan']
-    cov = mne.compute_covariance(epochs_final, method='oas', rank=None)
+    cov = mne.compute_covariance(epochs_final, method=method, rank=None)
     corr = np.corrcoef(
         epochs_final.get_data().transpose((1, 0, 2)).reshape(n_chan, -1))
     return cov.data, corr
@@ -53,6 +55,214 @@ def _compute_cospectral_covs(epochs, n_fft, n_overlap, fmin, fmax, fs):
     return cospectral_covs.transform(X).mean(axis=0).transpose((2, 0, 1))
 
 
+def get_frequency_bands(collection: str = 'ipeg',
+                        subset: Union[list, tuple, None] = None) -> dict:
+    """Get pre-specified frequency bands based on the literature.
+
+    Next to sets of bands for defining filterbank models, the aggregate
+    defined in the corresponding literature are provided.
+
+    .. note::
+        The HCP-MEG[1] frequency band was historically based on the
+        documentation of the MEG analysis from the HCP-500 MEG2 release:
+        https://wiki.humanconnectome.org/display/PublicData/MEG+Data+FAQ
+
+        As frequencies below 1.5Hz were omitted the work presented in [2,3]
+        also defined a 'low' band (0.1 - 1.5Hz) while retaining the the other
+        frequencies.
+
+    .. note::
+        The IPEG frequency bands were developed in [4].
+
+    .. note::
+        Additional band definitions can be added as per (pull) request.
+
+    Parameters
+    ----------
+    collection : {'ipeg', 'ipeg_aggregated', 'hcp', 'hcp_aggregated'}
+        The set of frequency bands. Defaults to 'hcp'.
+    subset : list-like
+        A selection of valid keys to return a subset of frequency
+        bands from a collection.
+
+    Returns
+    -------
+    frequency_bands : dict
+        The band definitions.
+
+    References
+    ----------
+    [1] Larson-Prior, L. J., R. Oostenveld, S. Della Penna, G. Michalareas,
+        F. Prior, A. Babajani-Feremi, J-M Schoffelen, et al. 2013.
+        “Adding Dynamics to the Human Connectome Project with MEG.”
+        NeuroImage 80 (October): 190–201.
+    [2] D. Sabbagh, P. Ablin, G. Varoquaux, A. Gramfort, and D.A. Engemann.
+        Predictive regression modeling with MEG/EEG: from source power
+        to signals and cognitive states.
+        *NeuroImage*, page 116893,2020. ISSN 1053-8119.
+        https://doi.org/10.1016/j.neuroimage.2020.116893
+    [3] D. A. Engemann, O. Kozynets, D. Sabbagh, G. Lemaître, G. Varoquaux,
+        F. Liem, and A. Gramfort Combining magnetoencephalography with
+        magnetic resonance imaging enhances learning of surrogate-biomarkers.
+        eLife, 9:e54055, 2020 <https://elifesciences.org/articles/54055>
+    [4] Jobert, M., Wilson, F.J., Ruigt, G.S., Brunovsky, M., Prichep,
+        L.S., Drinkenburg, W.H. and IPEG Pharmaco-EEG Guideline Committee,
+        2012. Guidelines for the recording and evaluation of pharmaco-EEG data
+        in man: the International Pharmaco-EEG Society (IPEG).
+        Neuropsychobiology, 66(4), pp.201-220.
+    """
+    frequency_bands = dict()
+    if collection == 'ipeg':
+        frequency_bands.update({
+            "delta": (1.5, 6.0),
+            "theta": (6.0, 8.5),
+            "alpha1": (8.5, 10.5),
+            "alpha2": (10.5, 12.5),
+            "beta1": (12.5, 18.5),
+            "beta2": (18.5, 21.0),
+            "beta3": (21.0, 30.0),
+            "gamma": (30.0, 40.0),
+        })  # total: 1.5-30; dominant: 6-12.5
+    if collection == 'ipeg_aggregated':
+        frequency_bands.update({
+            'total': (1.5, 30),
+            'dominant': (6, 12.5)
+        })
+    elif collection == 'hcp':
+        # https://www.humanconnectome.org/storage/app/media/documentation/
+        # s500/hcps500meg2releasereferencemanual.pdf
+        frequency_bands.update({    
+            'low': (0.1, 1.5),  # added later in [2,3].
+            'delta': (1.5, 4.0),
+            'theta': (4.0, 8.0),
+            'alpha': (8.0, 15.0),
+            'beta_low': (15.0, 26.0),
+            'beta_high': (26.0, 35.0),
+            'gamma_low': (35.0, 50.0),
+            'gamma_mid': (50.0, 76.0),
+            'gamma_high': (76.0, 120.0)
+        })
+    elif collection == 'hcp_aggregated':
+        frequency_bands.update({
+            'wide_band': (1.5, 150.0)
+        })
+    if subset is not None:
+        frequency_bands = {
+            band: freqs for band, freqs in frequency_bands.items()
+            if band in subset
+        }
+    return frequency_bands
+
+
+def make_coffeine_df(C: np.ndarray,
+                     names: Union[dict, list, tuple, None] = None):
+    """Put covariances in coffeine Data Frame.
+
+    Parameters
+    ----------
+    C : np.ndarray, shape(n_obs, n_frequencies, n_channels, n_channels)
+        A 2D collection of symmetric matrices. First dimension: samples.
+        Second dimension: batches within observations (e.g. frequencies).
+    names : dict or list-like, defaults to None
+        A descriptor for the second dimension of `C`. It is used to make
+        the columns of the coffeine Data Frame
+
+    Returns
+    -------
+    C_df : pd.DataFrame
+        The DataFrame of object type with lists of covariances accessible
+        as columns.
+    """
+    assert C.ndim == 4
+    assert C.shape[2] == C.shape[3]
+
+    names_ = None
+    if names is None:
+        names_ = [f'c{cc}' for cc in range(C.shape[1])]
+    else:
+        names_ = names
+
+    C_df = pd.DataFrame(
+        {name: list(C[:, ii]) for ii, name in enumerate(names_)}
+    )
+    return C_df
+
+
+def compute_coffeine(
+        inst: Union[mne.io.BaseRaw, mne.BaseEpochs],
+        frequencies: Union[str, tuple, dict] = 'ipeg',
+        methods_params: Union[None, dict] = None
+        ) -> pd.DataFrame:
+    """Compute & spectral features as SPD matrices in a Data Frame.
+
+    Parameters
+    ----------
+    inst : mne.io.Raw | mne.Epochs or list-like
+        The MNE instance containing raw signals from which to compute
+        the features. If list-like, expected to contain MNE-Instances.
+    frequencies : str | dict
+        The frequency parameter. Either the name of a collection supported
+        by `get_frequency_bands`or a dictionary of frequency names and ranges.
+    methods_params : dict
+        The methods paramaters used in the down-stream function for feature
+        computation.
+
+    Returns
+    -------
+    C_df : pd.DataFrame
+        The coffeine DataFrame with columns filled with object arrays of
+        covariances.
+    """
+    instance_list = list()
+    if isinstance(inst, mne.io.BaseRaw):
+        instance_list.append(inst)
+    elif isinstance(inst, mne.BaseEpochs):
+        if len(inst) == 1:
+            instance_list.append(inst)
+        elif len(inst) > 1:
+            for ii in range(len(inst)):
+                instance_list.append(inst[ii])
+    elif isinstance(inst, list):
+        instance_list.extend(inst)
+    else:
+        raise ValueError('Unexpected value for instance.')
+    assert len(instance_list) >= 1
+    frequencies_ = None
+    if frequencies in ('ipeg', 'hcp'):
+        frequencies_ = get_frequency_bands(collection=frequencies)
+    elif isinstance(frequencies, tuple) and frequencies[0] in ('ipeg', 'hcp'):
+        frequencies_ = get_frequency_bands(
+            collection=frequencies[0], subset=frequencies[1]
+        )
+    elif isinstance(frequencies, dict):
+        frequencies_ = frequencies
+    else:
+        raise NotImplementedError(
+            'Currently, only collection names or fully-spelled band ranges are'
+            ' supported as frequency definitions.'
+        )
+
+    freq_values = sum([list(v) for v in frequencies_.values()], [])
+    methods_params_fb_bands_ = dict(
+        features=('covs',), n_fft=1024, n_overlap=512,
+        cov_method='oas', fs=instance_list[0].info['sfreq'],
+        frequency_bands=frequencies_,
+        fmin=min(freq_values), fmax=max(freq_values)
+    )
+    if methods_params is not None:
+        methods_params_fb_bands_.update(methods_params)
+
+    C = list()
+    for ii, this_inst in enumerate(instance_list):
+        features, feature_info = compute_features(
+            this_inst, **methods_params_fb_bands_
+        )
+        C.append(features['covs'])
+    C = np.array(C)
+    C_df = make_coffeine_df(C=C, names=frequencies_)
+    return C_df, feature_info
+
+
 def compute_features(
         inst,
         features=('psds', 'covs'),
@@ -65,6 +275,7 @@ def compute_features(
         fmax=30,
         frequency_bands=None,
         clean_func=lambda x: x,
+        cov_method='oas',
         n_jobs=1):
     """Compute features from raw data or clean epochs.
 
@@ -106,6 +317,10 @@ def compute_features(
         If nothing is provided, defaults to {'alpha': (8.0, 12.0)}.
     clean_func : lambda function
         If nothing is provided, defaults to lambda x: x.
+    cov_method : str (default 'oas')
+        The covariance estimator to be used. Ignored for feature types not
+        not related to covariances. Must be a method accepted by MNE's
+        covariance functions. 
     n_jobs : int
         If nothing is provided, defaults to 1.
 
@@ -136,13 +351,14 @@ def compute_features(
         clean_events = events[epochs_clean.selection]
         if 'covs' in features:
             covs = _compute_covs_raw(inst, clean_events, frequency_bands_,
-                                     duration)
+                                     duration, method=cov_method)
             computed_features['covs'] = covs
 
     elif isinstance(inst, BaseEpochs):
         epochs_clean = clean_func(inst)
         if 'covs' in features:
-            covs = _compute_covs_epochs(epochs_clean, frequency_bands_)
+            covs = _compute_covs_epochs(epochs_clean, frequency_bands_,
+                                        method=cov_method)
             computed_features['covs'] = covs
     else:
         raise ValueError('Inst must be raw or epochs.')
@@ -163,8 +379,8 @@ def compute_features(
 
     if 'psds' in features:
         spectrum = epochs_clean.compute_psd(
-                method="welch", fmin=fmin, fmax=fmax, n_fft=n_fft,
-                n_overlap=n_overlap, average='mean', picks=None)
+            method="welch", fmin=fmin, fmax=fmax, n_fft=n_fft,
+            n_overlap=n_overlap, average='mean', picks=None)
         psds_clean = spectrum.get_data()
         psds = trim_mean(psds_clean, 0.25, axis=0)
         computed_features['psds'] = psds
@@ -174,14 +390,15 @@ def compute_features(
             'cross_frequency_corrs' in features):
         (cross_frequency_covs,
             cross_frequency_corrs) = _compute_cross_frequency_covs(
-            epochs_clean, frequency_bands_)
+            epochs_clean, frequency_bands_, method=cov_method)
         computed_features['cross_frequency_covs'] = cross_frequency_covs
         computed_features['cross_frequency_corrs'] = cross_frequency_corrs
 
     if 'cospectral_covs' in features:
         cospectral_covs = _compute_cospectral_covs(epochs_clean, n_fft,
                                                    n_overlap,
-                                                   fmin, fmax, fs)
+                                                   fmin, fmax, fs,
+                                                   method=cov_method)
         computed_features['cospectral_covs'] = cospectral_covs
 
     return computed_features, res