Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add frequency API and compute coffeine API #51

Merged
merged 34 commits into from
Jul 22, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
22b70a4
add frequency API and compute coffeine API
dengemann Jul 12, 2023
4ede86f
move new code to library
dengemann Jul 12, 2023
29396e6
update other notebook
dengemann Jul 12, 2023
3e5e887
fix pep8 ...
dengemann Jul 12, 2023
bdf4329
fix pep8 ...
dengemann Jul 12, 2023
84bd372
pep8 ...
dengemann Jul 12, 2023
35c3cfd
... fixes
dengemann Jul 12, 2023
3aa9158
fix kernel notebook
dengemann Jul 12, 2023
4e69b6f
add proper html docs [WIP] ...
dengemann Jul 16, 2023
123e0dd
rm not needed files
dengemann Jul 16, 2023
dacb831
fix sphinx errors
dengemann Jul 16, 2023
d7ff4e6
snapshot of docs
dengemann Jul 17, 2023
b9a0a20
add build workflow
dengemann Jul 18, 2023
70dd8fb
fixes
dengemann Jul 18, 2023
ec958c6
fixes..
dengemann Jul 18, 2023
ff34c4b
...fixes
dengemann Jul 18, 2023
0f06397
...fixes
dengemann Jul 18, 2023
7af2005
fix pandoc attempt
dengemann Jul 18, 2023
10d03fc
fix pandoc attempt 2
dengemann Jul 18, 2023
d21bfa1
fix updloader ...
dengemann Jul 18, 2023
242146b
fix manifest ...
dengemann Jul 18, 2023
655671e
fix doc path ...
dengemann Jul 18, 2023
8ca92f3
add missing docs and clean up stuff
dengemann Jul 20, 2023
33e4e82
update docs + fixes
dengemann Jul 20, 2023
4182b86
... fixes
dengemann Jul 20, 2023
fe0473e
... fixes
dengemann Jul 20, 2023
ba21a31
... fixes
dengemann Jul 20, 2023
0a1224a
... fixes ...
dengemann Jul 20, 2023
4b40c08
upgrade testing to python=3.9
dengemann Jul 20, 2023
8739790
add citation
dengemann Jul 20, 2023
400ae9e
doc fixes
dengemann Jul 20, 2023
babd18b
add get_frequencies test
dengemann Jul 20, 2023
fcfc274
add another test ...
dengemann Jul 21, 2023
611d612
add test for compute coffeine
dengemann Jul 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coffeine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@

from .pipelines import make_filter_bank_transformer, make_filter_bank_regressor, make_filter_bank_classifier # noqa

from .power_features import compute_features # noqa
from .power_features import compute_features, get_frequency_bands, compute_coffeine, make_coffeine_df # noqa

from .spatial_filters import ProjIdentitySpace, ProjCommonSpace, ProjLWSpace, ProjRandomSpace, ProjSPoCSpace # noqa
23 changes: 13 additions & 10 deletions coffeine/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,15 @@ def make_filter_bank_transformer(
vectorization_params : dict | None
The parameters for the vectorization step.
kernel : None | 'gaussian' | sklearn.Pipeline
The Kernel option for kernel regression. If 'gaussian', a Gaussian Kernel
will be added per column and the results will be summed over frequencies.
If sklearn.pipeline.Pipeline is passed, it should return a meaningful
kernel.
The Kernel option for kernel regression. If 'gaussian', a Gaussian
Kernel will be added per column and the results will be summed over
frequencies. If sklearn.pipeline.Pipeline is passed, it should return
a meaningful kernel.
combine_kernels : None | 'sum' | sklearn.pipeline.Pipeline
If kernel is used and multiple columns are defined, this option determines
how a combined kernel is constructed. 'sum' adds the kernels with equal
weights. A custom pipeline pipeline can be passed to implement alternative
rules.
If kernel is used and multiple columns are defined, this option
determines how a combined kernel is constructed. 'sum' adds the
kernels with equal weights. A custom pipeline pipeline can be passed to
implement alternative rules.
categorical_interaction : str
The column in the input data frame containing a binary descriptor
used to fit 2-way interaction effects.
Expand Down Expand Up @@ -233,15 +233,18 @@ def _get_projector_vectorizer(projection, vectorization, kernel=None):
# add Kernel options
if (isinstance(kernel, Pipeline) and not
isinstance(kernel, (BaseEstimator, TransformerMixin))):
raise ValueError('Custom kernel must be an estimator and a transformer).')
raise ValueError(
'Custom kernel must be an estimator and a transformer).'
)
elif kernel == 'gaussian':
kernel = (
'gaussiankernel', GaussianKernel
)
combine_kernels = 'sum'

filter_bank_transformer = make_column_transformer(
*_get_projector_vectorizer(*steps, kernel=kernel), remainder='passthrough'
*_get_projector_vectorizer(*steps, kernel=kernel),
remainder='passthrough'
)

if combine_kernels is not None:
Expand Down
241 changes: 229 additions & 12 deletions coffeine/power_features.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,36 @@
from typing import Union
import numpy as np
import pandas as pd
from scipy.stats import trim_mean
from pyriemann.estimation import CospCovariances
import mne
from mne.io import BaseRaw
from mne.epochs import BaseEpochs


def _compute_covs_raw(raw, clean_events, frequency_bands, duration):
def _compute_covs_raw(raw, clean_events, frequency_bands, duration, method):
covs = list()
for _, fb in frequency_bands.items():
rf = raw.copy().load_data().filter(fb[0], fb[1])
ec = mne.Epochs(
rf, clean_events, event_id=3000, tmin=0, tmax=duration,
proj=True, baseline=None, reject=None, preload=False, decim=1,
picks=None)
cov = mne.compute_covariance(ec, method='oas', rank=None)
cov = mne.compute_covariance(ec, method=method, rank=None)
apmellot marked this conversation as resolved.
Show resolved Hide resolved
covs.append(cov.data)
return np.array(covs)


def _compute_covs_epochs(epochs, frequency_bands):
def _compute_covs_epochs(epochs, frequency_bands, method):
covs = list()
for _, fb in frequency_bands.items():
ec = epochs.copy().load_data().filter(fb[0], fb[1])
cov = mne.compute_covariance(ec, method='oas', rank=None)
cov = mne.compute_covariance(ec, method=method, rank=None)
covs.append(cov.data)
return np.array(covs)


def _compute_cross_frequency_covs(epochs, frequency_bands):
def _compute_cross_frequency_covs(epochs, frequency_bands, method):
epochs_frequency_bands = []
for ii, (fbname, fb) in enumerate(frequency_bands.items()):
ef = epochs.copy().load_data().filter(fb[0], fb[1])
Expand All @@ -40,7 +42,7 @@ def _compute_cross_frequency_covs(epochs, frequency_bands):
for e in epochs_frequency_bands[1:]:
epochs_final.add_channels([e], force_update_info=True)
n_chan = epochs_final.info['nchan']
cov = mne.compute_covariance(epochs_final, method='oas', rank=None)
cov = mne.compute_covariance(epochs_final, method=method, rank=None)
corr = np.corrcoef(
epochs_final.get_data().transpose((1, 0, 2)).reshape(n_chan, -1))
return cov.data, corr
Expand All @@ -53,6 +55,214 @@ def _compute_cospectral_covs(epochs, n_fft, n_overlap, fmin, fmax, fs):
return cospectral_covs.transform(X).mean(axis=0).transpose((2, 0, 1))


def get_frequency_bands(collection: str = 'ipeg',
subset: Union[list, tuple, None] = None) -> dict:
"""Get pre-specified frequency bands based on the literature.

Next to sets of bands for defining filterbank models, the aggregate
defined in the corresponding literature are provided.

.. note::
The HCP-MEG[1] frequency band was historically based on the
documentation of the MEG analysis from the HCP-500 MEG2 release:
https://wiki.humanconnectome.org/display/PublicData/MEG+Data+FAQ

As frequencies below 1.5Hz were omitted the work presented in [2,3]
also defined a 'low' band (0.1 - 1.5Hz) while retaining the the other
frequencies.

.. note::
The IPEG frequency bands were developed in [4].

.. note::
Additional band definitions can be added as per (pull) request.

Parameters
----------
collection : {'ipeg', 'ipeg_aggregated', 'hcp', 'hcp_aggregated'}
The set of frequency bands. Defaults to 'hcp'.
subset : list-like
A selection of valid keys to return a subset of frequency
bands from a collection.

Returns
-------
frequency_bands : dict
The band definitions.

References
----------
[1] Larson-Prior, L. J., R. Oostenveld, S. Della Penna, G. Michalareas,
F. Prior, A. Babajani-Feremi, J-M Schoffelen, et al. 2013.
“Adding Dynamics to the Human Connectome Project with MEG.”
NeuroImage 80 (October): 190–201.
[2] D. Sabbagh, P. Ablin, G. Varoquaux, A. Gramfort, and D.A. Engemann.
Predictive regression modeling with MEG/EEG: from source power
to signals and cognitive states.
*NeuroImage*, page 116893,2020. ISSN 1053-8119.
https://doi.org/10.1016/j.neuroimage.2020.116893
[3] D. A. Engemann, O. Kozynets, D. Sabbagh, G. Lemaître, G. Varoquaux,
F. Liem, and A. Gramfort Combining magnetoencephalography with
magnetic resonance imaging enhances learning of surrogate-biomarkers.
eLife, 9:e54055, 2020 <https://elifesciences.org/articles/54055>
[4] Jobert, M., Wilson, F.J., Ruigt, G.S., Brunovsky, M., Prichep,
L.S., Drinkenburg, W.H. and IPEG Pharmaco-EEG Guideline Committee,
2012. Guidelines for the recording and evaluation of pharmaco-EEG data
in man: the International Pharmaco-EEG Society (IPEG).
Neuropsychobiology, 66(4), pp.201-220.
"""
frequency_bands = dict()
if collection == 'ipeg':
frequency_bands.update({
"delta": (1.5, 6.0),
"theta": (6.0, 8.5),
"alpha1": (8.5, 10.5),
"alpha2": (10.5, 12.5),
"beta1": (12.5, 18.5),
"beta2": (18.5, 21.0),
"beta3": (21.0, 30.0),
"gamma": (30.0, 40.0),
}) # total: 1.5-30; dominant: 6-12.5
if collection == 'ipeg_aggregated':
frequency_bands.update({
'total': (1.5, 30),
'dominant': (6, 12.5)
})
elif collection == 'hcp':
# https://www.humanconnectome.org/storage/app/media/documentation/
# s500/hcps500meg2releasereferencemanual.pdf
frequency_bands.update({
'low': (0.1, 1.5), # added later in [2,3].
'delta': (1.5, 4.0),
'theta': (4.0, 8.0),
'alpha': (8.0, 15.0),
'beta_low': (15.0, 26.0),
'beta_high': (26.0, 35.0),
'gamma_low': (35.0, 50.0),
'gamma_mid': (50.0, 76.0),
'gamma_high': (76.0, 120.0)
})
elif collection == 'hcp_aggregated':
frequency_bands.update({
'wide_band': (1.5, 150.0)
})
if subset is not None:
frequency_bands = {
band: freqs for band, freqs in frequency_bands.items()
if band in subset
}
return frequency_bands


def make_coffeine_df(C: np.ndarray,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@apmellot @antoinecollas @apmellot @DavidSabbagh @hubertjb @bmalezieux

A small but important & overdue function :) What do you think about its naming? I think I am in principle happy with it. Other intuitive options would be to call it make_coffeine_data_frame or make_covariance_data_frame. I would be OK with making it explicit (data_frame rather than df). Slight preference for coffeine make_coffeine_data_frame for branding and energizing weirdness. I understand make_coffeine_data_frame could be more boring and predictable (in the dispassionate scientific sense), which would be not bad either.

Any thoughts? (for the bigger picture, please take minute to zoom out and look at the entire PR / the linked issues).

Copy link
Collaborator

@apmellot apmellot Jul 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer make_coffeine_data_frame to be consistent with the function compute_coffeine

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well compute_coffeine is of course also up for discussion but I feel good about it actually.
I can live with make_coffeine_data_frame and make_covariance_data_frame – although I have slight preference for coffeine as what we support here is not necessarily a covariance but any SPD matrix.

names: Union[dict, list, tuple, None] = None):
"""Put covariances in coffeine Data Frame.

Parameters
----------
C : np.ndarray, shape(n_obs, n_frequencies, n_channels, n_channels)
A 2D collection of symmetric matrices. First dimension: samples.
Second dimension: batches within observations (e.g. frequencies).
names : dict or list-like, defaults to None
A descriptor for the second dimension of `C`. It is used to make
the columns of the coffeine Data Frame

Returns
-------
C_df : pd.DataFrame
The DataFrame of object type with lists of covariances accessible
as columns.
"""
assert C.ndim == 4
assert C.shape[2] == C.shape[3]

names_ = None
if names is None:
names_ = [f'c{cc}' for cc in range(C.shape[1])]
else:
names_ = names

C_df = pd.DataFrame(
{name: list(C[:, ii]) for ii, name in enumerate(names_)}
)
return C_df


def compute_coffeine(
inst: Union[mne.io.BaseRaw, mne.BaseEpochs],
frequencies: Union[str, tuple, dict] = 'ipeg',
methods_params: Union[None, dict] = None
) -> pd.DataFrame:
"""Compute & spectral features as SPD matrices in a Data Frame.

Parameters
----------
inst : mne.io.Raw | mne.Epochs or list-like
The MNE instance containing raw signals from which to compute
the features. If list-like, expected to contain MNE-Instances.
frequencies : str | dict
The frequency parameter. Either the name of a collection supported
by `get_frequency_bands`or a dictionary of frequency names and ranges.
methods_params : dict
The methods paramaters used in the down-stream function for feature
computation.

Returns
-------
C_df : pd.DataFrame
The coffeine DataFrame with columns filled with object arrays of
covariances.
"""
instance_list = list()
if isinstance(inst, mne.io.BaseRaw):
instance_list.append(inst)
elif isinstance(inst, mne.BaseEpochs):
if len(inst) == 1:
instance_list.append(inst)
elif len(inst) > 1:
for ii in range(len(inst)):
instance_list.append(inst[ii])
elif isinstance(inst, list):
instance_list.extend(inst)
else:
raise ValueError('Unexpected value for instance.')
assert len(instance_list) >= 1
frequencies_ = None
if frequencies in ('ipeg', 'hcp'):
frequencies_ = get_frequency_bands(collection=frequencies)
elif isinstance(frequencies, tuple) and frequencies[0] in ('ipeg', 'hcp'):
frequencies_ = get_frequency_bands(
collection=frequencies[0], subset=frequencies[1]
)
elif isinstance(frequencies, dict):
frequencies_ = frequencies
else:
raise NotImplementedError(
'Currently, only collection names or fully-spelled band ranges are'
' supported as frequency definitions.'
)

freq_values = sum([list(v) for v in frequencies_.values()], [])
methods_params_fb_bands_ = dict(
features=('covs',), n_fft=1024, n_overlap=512,
cov_method='oas', fs=instance_list[0].info['sfreq'],
frequency_bands=frequencies_,
fmin=min(freq_values), fmax=max(freq_values)
)
if methods_params is not None:
methods_params_fb_bands_.update(methods_params)

C = list()
for ii, this_inst in enumerate(instance_list):
features, feature_info = compute_features(
this_inst, **methods_params_fb_bands_
)
C.append(features['covs'])
C = np.array(C)
C_df = make_coffeine_df(C=C, names=frequencies_)
return C_df, feature_info


def compute_features(
inst,
features=('psds', 'covs'),
Expand All @@ -65,6 +275,7 @@ def compute_features(
fmax=30,
frequency_bands=None,
clean_func=lambda x: x,
cov_method='oas',
n_jobs=1):
"""Compute features from raw data or clean epochs.

Expand Down Expand Up @@ -106,6 +317,10 @@ def compute_features(
If nothing is provided, defaults to {'alpha': (8.0, 12.0)}.
clean_func : lambda function
If nothing is provided, defaults to lambda x: x.
cov_method : str (default 'oas')
The covariance estimator to be used. Ignored for feature types not
not related to covariances. Must be a method accepted by MNE's
covariance functions.
n_jobs : int
If nothing is provided, defaults to 1.

Expand Down Expand Up @@ -136,13 +351,14 @@ def compute_features(
clean_events = events[epochs_clean.selection]
if 'covs' in features:
covs = _compute_covs_raw(inst, clean_events, frequency_bands_,
duration)
duration, method=cov_method)
computed_features['covs'] = covs

elif isinstance(inst, BaseEpochs):
epochs_clean = clean_func(inst)
if 'covs' in features:
covs = _compute_covs_epochs(epochs_clean, frequency_bands_)
covs = _compute_covs_epochs(epochs_clean, frequency_bands_,
method=cov_method)
computed_features['covs'] = covs
else:
raise ValueError('Inst must be raw or epochs.')
Expand All @@ -163,8 +379,8 @@ def compute_features(

if 'psds' in features:
spectrum = epochs_clean.compute_psd(
method="welch", fmin=fmin, fmax=fmax, n_fft=n_fft,
n_overlap=n_overlap, average='mean', picks=None)
method="welch", fmin=fmin, fmax=fmax, n_fft=n_fft,
n_overlap=n_overlap, average='mean', picks=None)
psds_clean = spectrum.get_data()
psds = trim_mean(psds_clean, 0.25, axis=0)
computed_features['psds'] = psds
Expand All @@ -174,14 +390,15 @@ def compute_features(
'cross_frequency_corrs' in features):
(cross_frequency_covs,
cross_frequency_corrs) = _compute_cross_frequency_covs(
epochs_clean, frequency_bands_)
epochs_clean, frequency_bands_, method=cov_method)
computed_features['cross_frequency_covs'] = cross_frequency_covs
computed_features['cross_frequency_corrs'] = cross_frequency_corrs

if 'cospectral_covs' in features:
cospectral_covs = _compute_cospectral_covs(epochs_clean, n_fft,
n_overlap,
fmin, fmax, fs)
fmin, fmax, fs,
method=cov_method)
computed_features['cospectral_covs'] = cospectral_covs

return computed_features, res
Loading
Loading