Skip to content

Commit

Permalink
Add surrogate data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
tsbinns committed Aug 2, 2024
1 parent 00c381a commit e4f40cc
Show file tree
Hide file tree
Showing 7 changed files with 472 additions and 171 deletions.
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,5 @@ Dataset functions
.. autosummary::
:toctree: generated/

make_signals_in_freq_bands
make_signals_in_freq_bands
make_surrogate_data
11 changes: 11 additions & 0 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ @book{OppenheimEtAl1999
year = {1999}
}

@article{PellegriniEtAl2023,
title={Identifying good practices for detecting inter-regional linear functional connectivity from {EEG}},
author={Pellegrini, Franziska and Delorme, Arnaud and Nikulin, Vadim and Haufe, Stefan},
doi={10.1016/j.neuroimage.2023.120218},
journal={NeuroImage},
volume={277},
pages={120218},
year={2023},
publisher={Elsevier}
}

@book{SekiharaNagarajan2008,
author = {Sekihara, Kensuke and Nagarajan, Srikantan S.},
doi = {10.1007/978-3-540-79370-0},
Expand Down
2 changes: 1 addition & 1 deletion mne_connectivity/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
SpectroTemporalConnectivity,
TemporalConnectivity,
)
from .datasets import make_signals_in_freq_bands
from .datasets import make_signals_in_freq_bands, make_surrogate_data
from .decoding import CoherencyDecomposition
from .effective import phase_slope_index
from .envelope import envelope_correlation, symmetric_orth
Expand Down
1 change: 1 addition & 0 deletions mne_connectivity/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .frequency import make_signals_in_freq_bands
from .surrogate import make_surrogate_data
141 changes: 141 additions & 0 deletions mne_connectivity/datasets/surrogate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Authors: Thomas S. Binns <[email protected]>
#
# License: BSD (3-clause)

import numpy as np
from mne.time_frequency import EpochsSpectrum, EpochsSpectrumArray
from mne.utils import _validate_type


def make_surrogate_data(data, n_shuffles=1000, rng_seed=None, return_generator=True):
"""Create surrogate data for a null hypothesis of connectivity.
Parameters
----------
data : ~mne.time_frequency.EpochsSpectrum | ~mne.time_frequency.EpochsSpectrumArray
The Fourier coefficients to create the null hypothesis surrogate data for. Can
be generated from :meth:`mne.Epochs.compute_psd` with ``output='complex'``
(requires ``mne >= 1.8``).
n_shuffles : int (default 1000)
The number of surrogate datasets to create.
rng_seed : int | None (default None)
The seed to use for the random number generator. If `None`, no seed is
specified.
return_generator : bool (default True)
Whether or not to return the surrogate data as a :term:`generator` object
instead of a :class:`list`. This allows iterating over the surrogates without
having to keep them all in memory.
Returns
-------
surrogate_data : list of ~mne.time_frequency.EpochsSpectrum
The surrogate data for the null hypothesis with ``n_shuffles`` entries. Returned
as a :term:`generator` if ``return_generator=True``.
Notes
-----
Surrogate data is generated by randomly shuffling the order of epochs, independently
for each channel. This destroys the covariance of the data, such that connectivity
estimates should reflect the null hypothesis of no genuine connectivity between
signals (i.e., only interactions due to background noise)
:footcite:`PellegriniEtAl2023`.
For the surrogate data to properly reflect a null hypothesis, the data which is
shuffled **must not** have a temporal structure that is consistent across epochs.
Examples of this data include evoked potentials, where a stimulus is presented or an
action performed at a set time during each epoch. Such data should not be used for
generating surrogates, as even after shuffling the epochs, it will still show a high
degree of residual connectivity between channels. As a result, connectivity
estimates from your surrogate data will capture genuine interactions, instead of the
desired background noise. Treating these estimates as a null hypothesis will
increase the likelihood of a type II (false negative) error, i.e., that there is no
significant connectivity in your data.
Appropriate data for generating surrogates includes data from a resting state,
inter-trial period, or similar. Here, a strong temporal consistency across epochs is
not assumed, reducing the chances that connectivity information of interest is
captured in your surrogate connectivity estimates.
In situations where you want to assess whether evoked data has significant
connectivity, you can generate your surrogate connectivity estimates from non-evoked
data (e.g., rest data, inter-trial data) and compare this to your true connectivity
estimates from the evoked data.
Regardless of whether you are working with evoked or non-evoked data, **you should
always compare true and surrogate connectivity estimates from epochs of the same
duration**. This will ensure that spectral information is captured with the same
accuracy in both sets of connectivity estimates. Ideally, **you should also compare
true and surrogate connectivity estimates from the same number of epochs** to avoid
a noise bias for one set of estimates (fewer epochs gives noisier connectivity
estimates).
.. versionadded:: 0.8
References
----------
.. footbibliography::
"""
# Validate inputs
_validate_type(
data,
(EpochsSpectrum, EpochsSpectrumArray),
"data",
"mne.time_frequency.EpochsSpectrum or mne.time_frequency.EpochsSpectrumArray",
)
if not np.iscomplexobj(data.get_data()):
raise TypeError("values in `data` must be complex-valued")
n_epochs, n_chans = data.get_data().shape[:2]
if n_epochs == 1:
raise ValueError("data must contain more than one epoch for shuffling")
if n_chans == 1:
raise ValueError("data must contain more than one channel for shuffling")

_validate_type(n_shuffles, "int-like", "n_shuffles", "int")
if n_shuffles < 1:
raise ValueError("number of shuffles must be >= 1")

_validate_type(return_generator, bool, "return_generator", "bool")
# rng_seed checked by NumPy later

# Make surrogate data and package into EpochsSpectrum objects
surrogate_data = _shuffle_coefficients(data, n_shuffles, rng_seed)
if not return_generator:
surrogate_data = [shuffle for shuffle in surrogate_data]

return surrogate_data


def _shuffle_coefficients(data, n_shuffles, rng_seed):
"""Shuffle coefficients over epochs to create surrogate data.
Surrogate data for each shuffle packaged into an EpochsSpectrum object, which are
together returned as a generator to minimise memory demand.
"""
# Extract data array and EpochsSpectrum information
data_arr = data.get_data()
state = data.__getstate__()
defaults = dict(
method=None,
fmin=None,
fmax=None,
tmin=None,
tmax=None,
picks=None,
exclude=(),
proj=None,
remove_dc=None,
n_jobs=None,
verbose=None,
)

# Make surrogate data
rng = np.random.default_rng(rng_seed)
for _ in range(n_shuffles):
# Shuffle epochs for each channel independently
surrogate_arr = np.zeros_like(data_arr, dtype=data_arr.dtype)
for chan_i in range(data_arr.shape[1]):
surrogate_arr[:, chan_i] = rng.permutation(data_arr[:, chan_i], axis=0)

# Package surrogate data for this shuffle
state["data"] = surrogate_arr
yield EpochsSpectrum(state, **defaults) # return surrogate data as a generator
Loading

0 comments on commit e4f40cc

Please sign in to comment.