From 904bc018973f2984c422dbbeea71787e6924ef85 Mon Sep 17 00:00:00 2001 From: CesarCaballeroGaudes Date: Mon, 25 Nov 2019 16:54:23 +0100 Subject: [PATCH 01/19] change stats.py to compute OLS based on Pseudo-Inverse & Compute Z-statistics --- tedana/stats.py | 96 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 11 deletions(-) diff --git a/tedana/stats.py b/tedana/stats.py index 601d800a0..e7de40ab2 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -12,6 +12,46 @@ RepLGR = logging.getLogger('REPORT') RefLGR = logging.getLogger('REFERENCES') +@due.dcite(references.T2Z_TRANSFORM, + description='Introduces T-to-Z transform.') +@due.dcite(references.T2Z_IMPLEMENTATION, + description='Python implementation of T-to-Z transform.') + +def t_to_z(t_values, dof): + """ + From Vanessa Sochat's TtoZ package. + """ + + # check if t_values is np.array, and convert if required + t_values = np.asanyarray(t_values) + + # Select just the nonzero voxels + nonzero = t_values[t_values != 0] + + # We will store our results here + z_values = np.zeros(len(nonzero)) + + # Select values less than or == 0, and greater than zero + c = np.zeros(len(nonzero)) + k1 = (nonzero <= c) + k2 = (nonzero > c) + # Subset the data into two sets + t1 = nonzero[k1] + t2 = nonzero[k2] + + # Calculate p values for <=0 + p_values_t1 = stats.t.cdf(t1, df=dof) + z_values_t1 = stats.norm.ppf(p_values_t1) + + # Calculate p values for > 0 + p_values_t2 = stats.t.cdf(-t2, df=dof) + z_values_t2 = -stats.norm.ppf(p_values_t2) + z_values[k1] = z_values_t1 + z_values[k2] = z_values_t2 + # Write new image to file + out = np.zeros(t_values.shape) + out[t_values != 0] = z_values + return out def getfbounds(n_echos): """ @@ -74,28 +114,27 @@ def computefeats2(data, mmix, mask=None, normalize=True): # demean masked data if mask is not None: data = data[mask, ...] + # normalize data (minus mean and divide by std) data_vn = stats.zscore(data, axis=-1) - # get betas of `data`~`mmix` and limit to range [-0.999, 0.999] - data_R = get_coeffs(data_vn, mmix, mask=None) - data_R[data_R < -0.999] = -0.999 - data_R[data_R > 0.999] = 0.999 - - # R-to-Z transform - data_Z = np.arctanh(data_R) + # get betas and z-values of `data`~`mmix` + # mmix is normalized internally + data_R, data_Z = get_coeffs(data_vn, mmix, mask=None, add_const=False, compute_zvalues=True) if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T - # normalize data + # normalize data (only division by std) if normalize: + # minus mean and divided by std data_Zm = stats.zscore(data_Z, axis=0) + # adding back the mean data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) return data_Z -def get_coeffs(data, X, mask=None, add_const=False): +def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df=1): """ Performs least-squares fit of `X` against `data` @@ -109,11 +148,18 @@ def get_coeffs(data, X, mask=None, add_const=False): Boolean mask array add_const : bool, optional Add intercept column to `X` before fitting. Default: False + compute_zvalues : bool, optional + Compute z-values of the betas (predictors) + min_df : integer, optional + Integer to give warning if # df <= min_df Returns ------- betas : (S [x E] x C) :obj:`numpy.ndarray` Array of `S` sample betas for `C` predictors + z_values : (S [x E] x C) :obj:`numpy.ndarray` + Array of `S` sample z-values for `C` predictors + """ if data.ndim not in [2, 3]: raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim)) @@ -144,11 +190,39 @@ def get_coeffs(data, X, mask=None, add_const=False): if add_const: # add intercept, if specified X = np.column_stack([X, np.ones((len(X), 1))]) - betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T + # least squares estimation + betas = np.dot(np.linalg.pinv(X),mdata) + + if compute_zvalues: + # compute t-values of betas (estimates) and then convert to z-values + # first compute number of degrees of freedom + df = mdata.shape[0] - X.shape[1] + if df == 0: + LGR.error('ERROR: No degrees of freedom left in least squares calculation. Stopping!!') + else: + elif df <= min_df: + LGR.warning('Number of degrees of freedom in least-square estimation is less than {}'.format(min_df+1)) + # compute residual sum of squares (RSS) + RSS = np.sum(np.power(mdata - np.dot(X, betas.T),2),axis=0)/df + RSS = RSS[:,np.newaxis] + C = np.diag(np.linalg.pinv(np.dot(X.T,X))) + C = C[:,np.newaxis] + std_betas = np.sqrt(np.dot(RSS,C.T)) + z_values = t_to_z(betas / std_betas,df) + if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] + if compute_zvalues: + z_values = z_values[:, :-1] if mask is not None: betas = utils.unmask(betas, mask) + if compute_zvalues: + z_values = utils.unmask(z_values, mask) + + if compute_zvalues: + return betas, z_values + else: + return betas - return betas + From 8587b79da3e26d6100bda14ca5323780fc7e1cf7 Mon Sep 17 00:00:00 2001 From: CesarCaballeroGaudes Date: Mon, 25 Nov 2019 17:14:53 +0100 Subject: [PATCH 02/19] updating pca.py to same as tedana in upstream --- tedana/decomposition/pca.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index 5ddc657cb..075b49075 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -288,6 +288,11 @@ def tedpca(data_cat, data_oc, combmode, mask, t2s, t2sG, comptable['normalized variance explained'] = varex_norm # write component maps to 4D image + # compute component spatial maps based on regression of the data on the + # component time series. Internally, regression (orthogonal least squares) + # is performed after z-normalization of data and component time series. + # Finally write component spatial maps in 4D files, where the spatial maps + # will divided by its standard deviation (option normalize=True) comp_ts_z = stats.zscore(comp_ts, axis=0) comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) From 1617d2a2097b23cd6a03578eb9bd17b54d301af8 Mon Sep 17 00:00:00 2001 From: Stefano Moia Date: Mon, 25 Nov 2019 18:10:01 +0100 Subject: [PATCH 03/19] Changed default compute_zvalues of get_coeffs to F --- tedana/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/stats.py b/tedana/stats.py index e7de40ab2..c0f1c00dd 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -134,7 +134,7 @@ def computefeats2(data, mmix, mask=None, normalize=True): return data_Z -def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df=1): +def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1): """ Performs least-squares fit of `X` against `data` From 2453c787cba8fceee96ae15db23cd27f4d3bc025 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 27 Nov 2019 02:39:24 +0100 Subject: [PATCH 04/19] LIntered and changed name of function get_coeff into compute_least_squares, changed import in viz.py. --- docs/api.rst | 2 +- tedana/io.py | 10 +++--- tedana/metrics/__init__.py | 4 +-- tedana/metrics/kundu_fit.py | 10 +++--- tedana/stats.py | 67 ++++++++++++++++++++----------------- tedana/tests/test_stats.py | 38 ++++++++++----------- tedana/viz.py | 4 +-- 7 files changed, 71 insertions(+), 64 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index c44365ca7..6e71e07c8 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -177,7 +177,7 @@ API :toctree: generated/ :template: function.rst - tedana.stats.get_coeffs + tedana.stats.compute_least_squares tedana.stats.computefeats2 tedana.stats.getfbounds diff --git a/tedana/io.py b/tedana/io.py index 9eb756ba6..bba0d97e6 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -13,7 +13,7 @@ from nilearn.image import new_img_like from tedana import utils -from tedana.stats import computefeats2, get_coeffs +from tedana.stats import computefeats2, compute_least_squares LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') @@ -47,8 +47,8 @@ def split_ts(data, mmix, mask, comptable): """ acc = comptable[comptable.classification == 'accepted'].index.values - cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), - mmix, mask) + cbetas = compute_least_squares(data - data.mean(axis=-1, keepdims=True), + mmix, mask) betas = cbetas[mask] if len(acc) != 0: hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) @@ -104,7 +104,7 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): dmdata = mdata.T - mdata.T.mean(axis=0) # get variance explained by retained components - betas = get_coeffs(dmdata.T, mmix, mask=None) + betas = compute_least_squares(dmdata.T, mmix, mask=None) varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl)) @@ -223,7 +223,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC') - ts_B = get_coeffs(ts, mmix, mask) + ts_B = compute_least_squares(ts, mmix, mask) fout = filewrite(ts_B, 'betas_OC', ref_img) LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout))) diff --git a/tedana/metrics/__init__.py b/tedana/metrics/__init__.py index 95261dd14..ff5a0b1ff 100644 --- a/tedana/metrics/__init__.py +++ b/tedana/metrics/__init__.py @@ -2,8 +2,8 @@ # ex: set sts=4 ts=4 sw=4 et: from .kundu_fit import ( - dependence_metrics, kundu_metrics, get_coeffs, computefeats2 + dependence_metrics, kundu_metrics, compute_least_squares, computefeats2 ) __all__ = [ - 'dependence_metrics', 'kundu_metrics', 'get_coeffs', 'computefeats2'] + 'dependence_metrics', 'kundu_metrics', 'compute_least_squares', 'computefeats2'] diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index d3ca732d9..0c128d8dc 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -9,7 +9,7 @@ from scipy import stats from tedana import io, utils -from tedana.stats import getfbounds, computefeats2, get_coeffs +from tedana.stats import getfbounds, computefeats2, compute_least_squares LGR = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) # compute PSC dataset - shouldn't have to refit data - tsoc_B = get_coeffs(tsoc_dm, mmix, mask=None) + tsoc_B = compute_least_squares(tsoc_dm, mmix, mask=None) del tsoc_dm tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 @@ -124,9 +124,9 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis - betas = get_coeffs(utils.unmask(catd, mask), - mmix, - np.repeat(mask[:, np.newaxis], len(tes), axis=1)) + betas = compute_least_squares(utils.unmask(catd, mask), + mmix, + np.repeat(mask[:, np.newaxis], len(tes), axis=1)) betas = betas[mask, ...] n_voxels, n_echos, n_components = betas.shape mu = catd.mean(axis=-1, dtype=float) diff --git a/tedana/stats.py b/tedana/stats.py index e7de40ab2..390e2b312 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -7,19 +7,21 @@ from scipy import stats from tedana import utils +from tedana.due import due LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') RefLGR = logging.getLogger('REFERENCES') -@due.dcite(references.T2Z_TRANSFORM, - description='Introduces T-to-Z transform.') -@due.dcite(references.T2Z_IMPLEMENTATION, - description='Python implementation of T-to-Z transform.') - +# @due.dcite(references.T2Z_TRANSFORM, +# description='Introduces T-to-Z transform.') +# @due.dcite(references.T2Z_IMPLEMENTATION, +# description='Python implementation of T-to-Z transform.') def t_to_z(t_values, dof): """ From Vanessa Sochat's TtoZ package. + Copyright (c) 2015 Vanessa Sochat + MIT Licensed """ # check if t_values is np.array, and convert if required @@ -53,6 +55,7 @@ def t_to_z(t_values, dof): out[t_values != 0] = z_values return out + def getfbounds(n_echos): """ Gets F-statistic boundaries based on number of echos @@ -96,12 +99,14 @@ def computefeats2(data, mmix, mask=None, normalize=True): Data in component space """ if data.ndim != 2: - raise ValueError('Parameter data should be 2d, not {0}d'.format(data.ndim)) + raise ValueError('Parameter data should be 2d, not ' + '{0}d'.format(data.ndim)) elif mmix.ndim not in [2]: raise ValueError('Parameter mmix should be 2d, not ' '{0}d'.format(mmix.ndim)) elif (mask is not None) and (mask.ndim != 1): - raise ValueError('Parameter mask should be 1d, not {0}d'.format(mask.ndim)) + raise ValueError('Parameter mask should be 1d, not ' + '{0}d'.format(mask.ndim)) elif (mask is not None) and (data.shape[0] != mask.shape[0]): raise ValueError('First dimensions (number of samples) of data ({0}) ' 'and mask ({1}) do not match.'.format(data.shape[0], @@ -119,7 +124,8 @@ def computefeats2(data, mmix, mask=None, normalize=True): # get betas and z-values of `data`~`mmix` # mmix is normalized internally - data_R, data_Z = get_coeffs(data_vn, mmix, mask=None, add_const=False, compute_zvalues=True) + _, data_Z = compute_least_squares(data_vn, mmix, mask=None, add_const=False, + compute_zvalues=True) if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T @@ -134,7 +140,7 @@ def computefeats2(data, mmix, mask=None, normalize=True): return data_Z -def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df=1): +def compute_least_squares(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1): """ Performs least-squares fit of `X` against `data` @@ -162,21 +168,23 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df """ if data.ndim not in [2, 3]: - raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim)) + raise ValueError('Parameter data should be 2d or 3d, not ' + '{0}d'.format(data.ndim)) elif X.ndim not in [2]: raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim)) elif data.shape[-1] != X.shape[0]: - raise ValueError('Last dimension (dimension {0}) of data ({1}) does not ' - 'match first dimension of ' - 'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0])) + raise ValueError('Last dimension (dimension {0}) of data ({1}) does ' + 'not match first dimension of X ' + '({2})'.format(data.ndim, data.shape[-1], X.shape[0])) # mask data and flip (time x samples) if mask is not None: if mask.ndim not in [1, 2]: - raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim)) + raise ValueError('Parameter data should be 1d or 2d, not ' + '{0}d'.format(mask.ndim)) elif data.shape[0] != mask.shape[0]: - raise ValueError('First dimensions of data ({0}) and mask ({1}) do not ' - 'match'.format(data.shape[0], mask.shape[0])) + raise ValueError('First dimensions of data ({0}) and mask ({1}) do' + ' not match'.format(data.shape[0], mask.shape[0])) mdata = data[mask, :].T else: mdata = data.T @@ -191,24 +199,25 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df X = np.column_stack([X, np.ones((len(X), 1))]) # least squares estimation - betas = np.dot(np.linalg.pinv(X),mdata) + betas = np.dot(np.linalg.pinv(X), mdata) if compute_zvalues: # compute t-values of betas (estimates) and then convert to z-values # first compute number of degrees of freedom df = mdata.shape[0] - X.shape[1] if df == 0: - LGR.error('ERROR: No degrees of freedom left in least squares calculation. Stopping!!') - else: - elif df <= min_df: - LGR.warning('Number of degrees of freedom in least-square estimation is less than {}'.format(min_df+1)) - # compute residual sum of squares (RSS) - RSS = np.sum(np.power(mdata - np.dot(X, betas.T),2),axis=0)/df - RSS = RSS[:,np.newaxis] - C = np.diag(np.linalg.pinv(np.dot(X.T,X))) - C = C[:,np.newaxis] - std_betas = np.sqrt(np.dot(RSS,C.T)) - z_values = t_to_z(betas / std_betas,df) + LGR.error('ERROR: No degrees of freedom left in least squares ' + 'calculation. Stopping!!') + elif df <= min_df: + LGR.warning('Number of degrees of freedom in least-square ' + 'estimation is less than {}'.format(min_df + 1)) + # compute residual sum of squares (RSS) + RSS = np.sum(np.power(mdata - np.dot(X, betas.T), 2), axis=0) / df + RSS = RSS[:, np.newaxis] + C = np.diag(np.linalg.pinv(np.dot(X.T, X))) + C = C[:, np.newaxis] + std_betas = np.sqrt(np.dot(RSS, C.T)) + z_values = t_to_z(betas / std_betas, df) if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] @@ -224,5 +233,3 @@ def get_coeffs(data, X, mask=None, add_const=False, compute_zvalues=True, min_df return betas, z_values else: return betas - - diff --git a/tedana/tests/test_stats.py b/tedana/tests/test_stats.py index bbf28a95d..3450ea273 100644 --- a/tedana/tests/test_stats.py +++ b/tedana/tests/test_stats.py @@ -6,7 +6,7 @@ import random from tedana.stats import computefeats2 -from tedana.stats import get_coeffs +from tedana.stats import compute_least_squares from tedana.stats import getfbounds @@ -58,7 +58,7 @@ def test_smoke_computefeats2(): assert computefeats2(data, mmix, normalize=False) is not None -def test_get_coeffs(): +def test_compute_least_squares(): """ Check least squares coefficients. """ @@ -69,26 +69,26 @@ def test_get_coeffs(): X = np.arange(0, 40)[:, np.newaxis] mask = np.array([True, False]) - betas = get_coeffs(data, X, mask=None, add_const=False) + betas = compute_least_squares(data, X, mask=None, add_const=False) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5., 5.])) - betas = get_coeffs(data, X, mask=None, add_const=True) + betas = compute_least_squares(data, X, mask=None, add_const=True) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5., 5.])) - betas = get_coeffs(data, X, mask=mask, add_const=False) + betas = compute_least_squares(data, X, mask=mask, add_const=False) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5, 0])) - betas = get_coeffs(data, X, mask=mask, add_const=True) + betas = compute_least_squares(data, X, mask=mask, add_const=True) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5, 0])) -def test_break_get_coeffs(): +def test_break_compute_least_squares(): """ - Ensure that get_coeffs fails when input data do not have the right + Ensure that compute_least_squares fails when input data do not have the right shapes. """ n_samples, n_echos, n_vols, n_comps = 10000, 5, 100, 50 @@ -98,41 +98,41 @@ def test_break_get_coeffs(): data = np.empty((n_samples)) with pytest.raises(ValueError): - get_coeffs(data, X, mask, add_const=False) + compute_least_squares(data, X, mask, add_const=False) data = np.empty((n_samples, n_vols)) X = np.empty((n_vols)) with pytest.raises(ValueError): - get_coeffs(data, X, mask, add_const=False) + compute_least_squares(data, X, mask, add_const=False) data = np.empty((n_samples, n_echos, n_vols + 1)) X = np.empty((n_vols, n_comps)) with pytest.raises(ValueError): - get_coeffs(data, X, mask, add_const=False) + compute_least_squares(data, X, mask, add_const=False) data = np.empty((n_samples, n_echos, n_vols)) mask = np.empty((n_samples, n_echos, n_vols)) with pytest.raises(ValueError): - get_coeffs(data, X, mask, add_const=False) + compute_least_squares(data, X, mask, add_const=False) mask = np.empty((n_samples + 1, n_echos)) with pytest.raises(ValueError): - get_coeffs(data, X, mask, add_const=False) + compute_least_squares(data, X, mask, add_const=False) -def test_smoke_get_coeffs(): +def test_smoke_compute_least_squares(): """ - Ensure that get_coeffs returns outputs with different inputs and optional paramters + Ensure that compute_least_squares returns outputs with different inputs and optional paramters """ n_samples, _, n_times, n_components = 100, 5, 20, 6 data_2d = np.random.random((n_samples, n_times)) x = np.random.random((n_times, n_components)) mask = np.random.randint(2, size=n_samples) - assert get_coeffs(data_2d, x) is not None - # assert get_coeffs(data_3d, x) is not None TODO: submit an issue for the bug - assert get_coeffs(data_2d, x, mask=mask) is not None - assert get_coeffs(data_2d, x, add_const=True) is not None + assert compute_least_squares(data_2d, x) is not None + # assert compute_least_squares(data_3d, x) is not None TODO: submit an issue for the bug + assert compute_least_squares(data_2d, x, mask=mask) is not None + assert compute_least_squares(data_2d, x, add_const=True) is not None def test_getfbounds(): diff --git a/tedana/viz.py b/tedana/viz.py index 92c664872..42e484006 100644 --- a/tedana/viz.py +++ b/tedana/viz.py @@ -9,7 +9,7 @@ matplotlib.use('AGG') import matplotlib.pyplot as plt -from tedana import metrics +from tedana.stats import compute_least_squares from tedana.utils import get_spectrum LGR = logging.getLogger(__name__) @@ -79,7 +79,7 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, LGR.warning('Provided colormap is not recognized, proceeding with default') png_cmap = 'coolwarm' # regenerate the beta images - ts_B = metrics.get_coeffs(ts, mmix, mask) + ts_B = compute_least_squares(ts, mmix, mask) ts_B = ts_B.reshape(ref_img.shape[:3] + ts_B.shape[1:]) # trim edges from ts_B array ts_B = trim_edge_zeros(ts_B) From 94cc77edfd101fef857386be8805a764fea38018 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 27 Nov 2019 15:27:57 +0100 Subject: [PATCH 05/19] Changed orientation of "beta" matrix in RSS --- tedana/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/stats.py b/tedana/stats.py index 390e2b312..18110862f 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -199,7 +199,7 @@ def compute_least_squares(data, X, mask=None, add_const=False, compute_zvalues=F X = np.column_stack([X, np.ones((len(X), 1))]) # least squares estimation - betas = np.dot(np.linalg.pinv(X), mdata) + betas = np.dot(np.linalg.pinv(X), mdata).T if compute_zvalues: # compute t-values of betas (estimates) and then convert to z-values From b3baa52fc1fe53cee658bb00e7978e2b9aebc02a Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 27 Nov 2019 15:28:23 +0100 Subject: [PATCH 06/19] Renmaed compute_least_squares into get_ls_coeffs --- docs/api.rst | 2 +- tedana/io.py | 8 ++++---- tedana/metrics/__init__.py | 4 ++-- tedana/metrics/kundu_fit.py | 6 +++--- tedana/stats.py | 4 ++-- tedana/tests/test_stats.py | 38 ++++++++++++++++++------------------- tedana/viz.py | 4 ++-- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 6e71e07c8..c22dac02b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -177,7 +177,7 @@ API :toctree: generated/ :template: function.rst - tedana.stats.compute_least_squares + tedana.stats.get_ls_coeffs tedana.stats.computefeats2 tedana.stats.getfbounds diff --git a/tedana/io.py b/tedana/io.py index bba0d97e6..10b3cc6e3 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -13,7 +13,7 @@ from nilearn.image import new_img_like from tedana import utils -from tedana.stats import computefeats2, compute_least_squares +from tedana.stats import computefeats2, get_ls_coeffs LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') @@ -47,7 +47,7 @@ def split_ts(data, mmix, mask, comptable): """ acc = comptable[comptable.classification == 'accepted'].index.values - cbetas = compute_least_squares(data - data.mean(axis=-1, keepdims=True), + cbetas = get_ls_coeffs(data - data.mean(axis=-1, keepdims=True), mmix, mask) betas = cbetas[mask] if len(acc) != 0: @@ -104,7 +104,7 @@ def write_split_ts(data, mmix, mask, comptable, ref_img, suffix=''): dmdata = mdata.T - mdata.T.mean(axis=0) # get variance explained by retained components - betas = compute_least_squares(dmdata.T, mmix, mask=None) + betas = get_ls_coeffs(dmdata.T, mmix, mask=None) varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 LGR.info('Variance explained by ICA decomposition: {:.02f}%'.format(varexpl)) @@ -223,7 +223,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, ref_img): write_split_ts(ts, mmix, mask, comptable, ref_img, suffix='OC') - ts_B = compute_least_squares(ts, mmix, mask) + ts_B = get_ls_coeffs(ts, mmix, mask) fout = filewrite(ts_B, 'betas_OC', ref_img) LGR.info('Writing full ICA coefficient feature set: {}'.format(op.abspath(fout))) diff --git a/tedana/metrics/__init__.py b/tedana/metrics/__init__.py index ff5a0b1ff..b2e69bfa1 100644 --- a/tedana/metrics/__init__.py +++ b/tedana/metrics/__init__.py @@ -2,8 +2,8 @@ # ex: set sts=4 ts=4 sw=4 et: from .kundu_fit import ( - dependence_metrics, kundu_metrics, compute_least_squares, computefeats2 + dependence_metrics, kundu_metrics, get_ls_coeffs, computefeats2 ) __all__ = [ - 'dependence_metrics', 'kundu_metrics', 'compute_least_squares', 'computefeats2'] + 'dependence_metrics', 'kundu_metrics', 'get_ls_coeffs', 'computefeats2'] diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index 0c128d8dc..b122b8a59 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -9,7 +9,7 @@ from scipy import stats from tedana import io, utils -from tedana.stats import getfbounds, computefeats2, compute_least_squares +from tedana.stats import getfbounds, computefeats2, get_ls_coeffs LGR = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) # compute PSC dataset - shouldn't have to refit data - tsoc_B = compute_least_squares(tsoc_dm, mmix, mask=None) + tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None) del tsoc_dm tsoc_Babs = np.abs(tsoc_B) PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 @@ -124,7 +124,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis - betas = compute_least_squares(utils.unmask(catd, mask), + betas = get_ls_coeffs(utils.unmask(catd, mask), mmix, np.repeat(mask[:, np.newaxis], len(tes), axis=1)) betas = betas[mask, ...] diff --git a/tedana/stats.py b/tedana/stats.py index 18110862f..c94a1461d 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -124,7 +124,7 @@ def computefeats2(data, mmix, mask=None, normalize=True): # get betas and z-values of `data`~`mmix` # mmix is normalized internally - _, data_Z = compute_least_squares(data_vn, mmix, mask=None, add_const=False, + _, data_Z = get_ls_coeffs(data_vn, mmix, mask=None, add_const=False, compute_zvalues=True) if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T @@ -140,7 +140,7 @@ def computefeats2(data, mmix, mask=None, normalize=True): return data_Z -def compute_least_squares(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1): +def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, min_df=1): """ Performs least-squares fit of `X` against `data` diff --git a/tedana/tests/test_stats.py b/tedana/tests/test_stats.py index 3450ea273..b96ecd972 100644 --- a/tedana/tests/test_stats.py +++ b/tedana/tests/test_stats.py @@ -6,7 +6,7 @@ import random from tedana.stats import computefeats2 -from tedana.stats import compute_least_squares +from tedana.stats import get_ls_coeffs from tedana.stats import getfbounds @@ -58,7 +58,7 @@ def test_smoke_computefeats2(): assert computefeats2(data, mmix, normalize=False) is not None -def test_compute_least_squares(): +def test_get_ls_coeffs(): """ Check least squares coefficients. """ @@ -69,26 +69,26 @@ def test_compute_least_squares(): X = np.arange(0, 40)[:, np.newaxis] mask = np.array([True, False]) - betas = compute_least_squares(data, X, mask=None, add_const=False) + betas = get_ls_coeffs(data, X, mask=None, add_const=False) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5., 5.])) - betas = compute_least_squares(data, X, mask=None, add_const=True) + betas = get_ls_coeffs(data, X, mask=None, add_const=True) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5., 5.])) - betas = compute_least_squares(data, X, mask=mask, add_const=False) + betas = get_ls_coeffs(data, X, mask=mask, add_const=False) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5, 0])) - betas = compute_least_squares(data, X, mask=mask, add_const=True) + betas = get_ls_coeffs(data, X, mask=mask, add_const=True) betas = np.squeeze(betas) assert np.allclose(betas, np.array([5, 0])) -def test_break_compute_least_squares(): +def test_break_get_ls_coeffs(): """ - Ensure that compute_least_squares fails when input data do not have the right + Ensure that get_ls_coeffs fails when input data do not have the right shapes. """ n_samples, n_echos, n_vols, n_comps = 10000, 5, 100, 50 @@ -98,41 +98,41 @@ def test_break_compute_least_squares(): data = np.empty((n_samples)) with pytest.raises(ValueError): - compute_least_squares(data, X, mask, add_const=False) + get_ls_coeffs(data, X, mask, add_const=False) data = np.empty((n_samples, n_vols)) X = np.empty((n_vols)) with pytest.raises(ValueError): - compute_least_squares(data, X, mask, add_const=False) + get_ls_coeffs(data, X, mask, add_const=False) data = np.empty((n_samples, n_echos, n_vols + 1)) X = np.empty((n_vols, n_comps)) with pytest.raises(ValueError): - compute_least_squares(data, X, mask, add_const=False) + get_ls_coeffs(data, X, mask, add_const=False) data = np.empty((n_samples, n_echos, n_vols)) mask = np.empty((n_samples, n_echos, n_vols)) with pytest.raises(ValueError): - compute_least_squares(data, X, mask, add_const=False) + get_ls_coeffs(data, X, mask, add_const=False) mask = np.empty((n_samples + 1, n_echos)) with pytest.raises(ValueError): - compute_least_squares(data, X, mask, add_const=False) + get_ls_coeffs(data, X, mask, add_const=False) -def test_smoke_compute_least_squares(): +def test_smoke_get_ls_coeffs(): """ - Ensure that compute_least_squares returns outputs with different inputs and optional paramters + Ensure that get_ls_coeffs returns outputs with different inputs and optional paramters """ n_samples, _, n_times, n_components = 100, 5, 20, 6 data_2d = np.random.random((n_samples, n_times)) x = np.random.random((n_times, n_components)) mask = np.random.randint(2, size=n_samples) - assert compute_least_squares(data_2d, x) is not None - # assert compute_least_squares(data_3d, x) is not None TODO: submit an issue for the bug - assert compute_least_squares(data_2d, x, mask=mask) is not None - assert compute_least_squares(data_2d, x, add_const=True) is not None + assert get_ls_coeffs(data_2d, x) is not None + # assert get_ls_coeffs(data_3d, x) is not None TODO: submit an issue for the bug + assert get_ls_coeffs(data_2d, x, mask=mask) is not None + assert get_ls_coeffs(data_2d, x, add_const=True) is not None def test_getfbounds(): diff --git a/tedana/viz.py b/tedana/viz.py index 42e484006..82516f2e5 100644 --- a/tedana/viz.py +++ b/tedana/viz.py @@ -9,7 +9,7 @@ matplotlib.use('AGG') import matplotlib.pyplot as plt -from tedana.stats import compute_least_squares +from tedana.stats import get_ls_coeffs from tedana.utils import get_spectrum LGR = logging.getLogger(__name__) @@ -79,7 +79,7 @@ def write_comp_figs(ts, mask, comptable, mmix, ref_img, out_dir, LGR.warning('Provided colormap is not recognized, proceeding with default') png_cmap = 'coolwarm' # regenerate the beta images - ts_B = compute_least_squares(ts, mmix, mask) + ts_B = get_ls_coeffs(ts, mmix, mask) ts_B = ts_B.reshape(ref_img.shape[:3] + ts_B.shape[1:]) # trim edges from ts_B array ts_B = trim_edge_zeros(ts_B) From 487416dd8ecff33c6704c77b9befa965d34353f6 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 27 Nov 2019 16:08:24 +0100 Subject: [PATCH 07/19] Lintering and Removing part of code that is not needed anymore due to normalisation of z_score happening inside get_ls_coeffs --- tedana/io.py | 2 +- tedana/metrics/kundu_fit.py | 4 ++-- tedana/stats.py | 10 +--------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tedana/io.py b/tedana/io.py index 10b3cc6e3..9a65a28fd 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -48,7 +48,7 @@ def split_ts(data, mmix, mask, comptable): acc = comptable[comptable.classification == 'accepted'].index.values cbetas = get_ls_coeffs(data - data.mean(axis=-1, keepdims=True), - mmix, mask) + mmix, mask) betas = cbetas[mask] if len(acc) != 0: hikts = utils.unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index b122b8a59..73fd51b0c 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -125,8 +125,8 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, # compute Betas and means over TEs for TE-dependence analysis betas = get_ls_coeffs(utils.unmask(catd, mask), - mmix, - np.repeat(mask[:, np.newaxis], len(tes), axis=1)) + mmix, + np.repeat(mask[:, np.newaxis], len(tes), axis=1)) betas = betas[mask, ...] n_voxels, n_echos, n_components = betas.shape mu = catd.mean(axis=-1, dtype=float) diff --git a/tedana/stats.py b/tedana/stats.py index c94a1461d..8a8a8dab6 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -125,18 +125,10 @@ def computefeats2(data, mmix, mask=None, normalize=True): # get betas and z-values of `data`~`mmix` # mmix is normalized internally _, data_Z = get_ls_coeffs(data_vn, mmix, mask=None, add_const=False, - compute_zvalues=True) + compute_zvalues=True) if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T - # normalize data (only division by std) - if normalize: - # minus mean and divided by std - data_Zm = stats.zscore(data_Z, axis=0) - # adding back the mean - data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / - data_Z.std(axis=0, keepdims=True)) - return data_Z From b96c3b2bfabfa41d0ba76b97e89e5a0e77036452 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 27 Nov 2019 17:08:08 +0100 Subject: [PATCH 08/19] Improved comments on what's going on in get_ls_coeffs --- tedana/stats.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tedana/stats.py b/tedana/stats.py index 8a8a8dab6..af9107cc7 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -190,7 +190,8 @@ def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, mi if add_const: # add intercept, if specified X = np.column_stack([X, np.ones((len(X), 1))]) - # least squares estimation + # least squares estimation: beta = (X^T * X)^(-1) * X^T * mdata + # betas is transposed due to backward compatibility with rest of code. betas = np.dot(np.linalg.pinv(X), mdata).T if compute_zvalues: @@ -203,12 +204,17 @@ def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, mi elif df <= min_df: LGR.warning('Number of degrees of freedom in least-square ' 'estimation is less than {}'.format(min_df + 1)) - # compute residual sum of squares (RSS) - RSS = np.sum(np.power(mdata - np.dot(X, betas.T), 2), axis=0) / df - RSS = RSS[:, np.newaxis] + # compute sigma: + # RSS = sum{[mdata - (X * betas)]^2} + # sigma = RSS / Degrees_of_Freedom + sigma = np.sum(np.power(mdata - np.dot(X, betas.T), 2), axis=0) / df + sigma = sigma[:, np.newaxis] + # Copmute std of betas: + # C = (X^T * X)_ii^(-1) + # std(betas) = sqrt(sigma * C) C = np.diag(np.linalg.pinv(np.dot(X.T, X))) C = C[:, np.newaxis] - std_betas = np.sqrt(np.dot(RSS, C.T)) + std_betas = np.sqrt(np.dot(sigma, C.T)) z_values = t_to_z(betas / std_betas, df) if add_const: # drop beta for intercept, if specified From 706161490a182da45cce0cebfdf79e44a1746691 Mon Sep 17 00:00:00 2001 From: smoia Date: Thu, 28 Nov 2019 00:20:43 +0100 Subject: [PATCH 09/19] Corrected use of "due" in stats.py --- tedana/stats.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tedana/stats.py b/tedana/stats.py index af9107cc7..2628d9f5d 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -7,16 +7,32 @@ from scipy import stats from tedana import utils -from tedana.due import due +from tedana.due import due, BibTeX, Doi LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') RefLGR = logging.getLogger('REFERENCES') -# @due.dcite(references.T2Z_TRANSFORM, -# description='Introduces T-to-Z transform.') -# @due.dcite(references.T2Z_IMPLEMENTATION, -# description='Python implementation of T-to-Z transform.') +T2Z_TRANSFORM = BibTeX(""" + @article{hughett2007accurate, + title={Accurate Computation of the F-to-z and t-to-z Transforms + for Large Arguments}, + author={Hughett, Paul and others}, + journal={Journal of Statistical Software}, + volume={23}, + number={1}, + pages={1--5}, + year={2007}, + publisher={Foundation for Open Access Statistics} + } + """) +T2Z_IMPLEMENTATION = Doi('10.5281/zenodo.32508') + + +@due.dcite(references.T2Z_TRANSFORM, + description='Introduces T-to-Z transform.') +@due.dcite(references.T2Z_IMPLEMENTATION, + description='Python implementation of T-to-Z transform.') def t_to_z(t_values, dof): """ From Vanessa Sochat's TtoZ package. From 4820c19ed75befcf2ad1ba705ca2700aeac7d5ee Mon Sep 17 00:00:00 2001 From: smoia Date: Thu, 28 Nov 2019 00:21:57 +0100 Subject: [PATCH 10/19] Corrected use of duecredit --- tedana/stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tedana/stats.py b/tedana/stats.py index 2628d9f5d..a282ba8ee 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -29,9 +29,9 @@ T2Z_IMPLEMENTATION = Doi('10.5281/zenodo.32508') -@due.dcite(references.T2Z_TRANSFORM, +@due.dcite(T2Z_TRANSFORM, description='Introduces T-to-Z transform.') -@due.dcite(references.T2Z_IMPLEMENTATION, +@due.dcite(T2Z_IMPLEMENTATION, description='Python implementation of T-to-Z transform.') def t_to_z(t_values, dof): """ From 25b5ea3550ca9caf23adf963a1542bb4056c5bac Mon Sep 17 00:00:00 2001 From: CesarCaballeroGaudes Date: Fri, 29 Nov 2019 17:00:31 +0100 Subject: [PATCH 11/19] compute OLS with np.linalg.lstsq instead of np.linalg.pinv --- tedana/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/stats.py b/tedana/stats.py index a282ba8ee..aaeffae3e 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -208,7 +208,7 @@ def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, mi # least squares estimation: beta = (X^T * X)^(-1) * X^T * mdata # betas is transposed due to backward compatibility with rest of code. - betas = np.dot(np.linalg.pinv(X), mdata).T + betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T if compute_zvalues: # compute t-values of betas (estimates) and then convert to z-values From 8b03c2b1020d442f9f155fb3aba80b47b71c120b Mon Sep 17 00:00:00 2001 From: smoia Date: Mon, 2 Dec 2019 12:28:47 +0100 Subject: [PATCH 12/19] Removed unused parameter from computefeats2 --- tedana/stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/stats.py b/tedana/stats.py index aaeffae3e..b9203acec 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -93,7 +93,7 @@ def getfbounds(n_echos): return f05, f025, f01 -def computefeats2(data, mmix, mask=None, normalize=True): +def computefeats2(data, mmix, mask=None): """ Converts `data` to component space using `mmix` From 88d049d6afd9d176b7b88823e9352a36689d9faa Mon Sep 17 00:00:00 2001 From: smoia Date: Mon, 2 Dec 2019 13:49:05 +0100 Subject: [PATCH 13/19] Removed unused parameter --- tedana/tests/test_stats.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tedana/tests/test_stats.py b/tedana/tests/test_stats.py index b96ecd972..066f155ae 100644 --- a/tedana/tests/test_stats.py +++ b/tedana/tests/test_stats.py @@ -22,26 +22,26 @@ def test_break_computefeats2(): data = np.empty((n_samples)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask, normalize=True) + computefeats2(data, mmix, mask) data = np.empty((n_samples, n_vols)) mmix = np.empty((n_vols)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask, normalize=True) + computefeats2(data, mmix, mask) mmix = np.empty((n_vols, n_comps)) mask = np.empty((n_samples, n_vols)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask, normalize=True) + computefeats2(data, mmix, mask) mask = np.empty((n_samples + 1)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask, normalize=True) + computefeats2(data, mmix, mask) data.shape[1] != mmix.shape[0] mask = np.empty((n_samples)) mmix = np.empty((n_vols + 1, n_comps)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask, normalize=True) + computefeats2(data, mmix, mask) def test_smoke_computefeats2(): @@ -55,7 +55,7 @@ def test_smoke_computefeats2(): assert computefeats2(data, mmix) is not None assert computefeats2(data, mmix, mask=mask) is not None - assert computefeats2(data, mmix, normalize=False) is not None + assert computefeats2(data, mmix) is not None def test_get_ls_coeffs(): From adc285080417fdc3289c5a46cd9f892e71a3e7d9 Mon Sep 17 00:00:00 2001 From: smoia Date: Sun, 8 Dec 2019 22:44:30 +0100 Subject: [PATCH 14/19] Removed unused parameter --- tedana/metrics/kundu_fit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index 73fd51b0c..f95ced45f 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -104,7 +104,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix - WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) + WTS = computefeats2(tsoc, mmixN, mask=None) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None) From f8dfc8b19a73f5659265c4697cf77a039d3bc670 Mon Sep 17 00:00:00 2001 From: smoia Date: Sun, 8 Dec 2019 22:44:30 +0100 Subject: [PATCH 15/19] Removed unused parameter --- tedana/metrics/kundu_fit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index 73fd51b0c..f95ced45f 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -104,7 +104,7 @@ def dependence_metrics(catd, tsoc, mmix, t2s, tes, ref_img, # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix - WTS = computefeats2(tsoc, mmixN, mask=None, normalize=False) + WTS = computefeats2(tsoc, mmixN, mask=None) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None) From c6796005c783bf57d5024478e730bbfecaef8c80 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 11 Dec 2019 22:52:39 +0100 Subject: [PATCH 16/19] Added limits to betas and z_scores to avoid breaking code due to INFs. --- tedana/stats.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tedana/stats.py b/tedana/stats.py index b9203acec..2a82a5519 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -232,6 +232,9 @@ def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, mi C = C[:, np.newaxis] std_betas = np.sqrt(np.dot(sigma, C.T)) z_values = t_to_z(betas / std_betas, df) + z_values = np.nan_to_num(z_values, posinf=30, neginf=-30) + + betas = np.nan_to_num(betas, posinf=30, neginf=-30) if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] From ae582b6407c5973c6471bbabb5369e12908128d8 Mon Sep 17 00:00:00 2001 From: smoia Date: Wed, 11 Dec 2019 23:36:59 +0100 Subject: [PATCH 17/19] Modified nan_to_num to clip in order to limit arrays, removed limit on betas. --- tedana/stats.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tedana/stats.py b/tedana/stats.py index 2a82a5519..0aa319c8c 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -232,9 +232,7 @@ def get_ls_coeffs(data, X, mask=None, add_const=False, compute_zvalues=False, mi C = C[:, np.newaxis] std_betas = np.sqrt(np.dot(sigma, C.T)) z_values = t_to_z(betas / std_betas, df) - z_values = np.nan_to_num(z_values, posinf=30, neginf=-30) - - betas = np.nan_to_num(betas, posinf=30, neginf=-30) + z_values = np.clip(z_values, -40, 40) if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] From d8bd35596fc31105a27fe1223b940043896abfde Mon Sep 17 00:00:00 2001 From: Stefano Moia Date: Fri, 15 Jan 2021 12:57:46 +0100 Subject: [PATCH 18/19] Align indented line and keep the linter happy. --- tedana/metrics/kundu_fit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index 7a54274f7..55dc52326 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -122,9 +122,9 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, # compute Betas and means over TEs for TE-dependence analysis betas = get_ls_coeffs(utils.unmask(catd, mask), - mmix_corrected, - np.repeat(mask[:, np.newaxis], len(tes), axis=1), - add_const=True) + mmix_corrected, + np.repeat(mask[:, np.newaxis], len(tes), axis=1), + add_const=True) betas = betas[mask, ...] n_voxels, n_echos, n_components = betas.shape mu = catd.mean(axis=-1, dtype=float) From 8cfa3ca5f3391aa250cd076f187a62e0245ea055 Mon Sep 17 00:00:00 2001 From: Stefano Moia Date: Mon, 15 Feb 2021 18:20:35 +0100 Subject: [PATCH 19/19] Rename computefeats2 as get_ls_zvalues --- docs/api.rst | 2 +- tedana/decomposition/pca.py | 4 ++-- tedana/io.py | 4 ++-- tedana/metrics/kundu_fit.py | 4 ++-- tedana/stats.py | 2 +- tedana/tests/test_stats.py | 26 +++++++++++++------------- tedana/workflows/tedana.py | 6 +++--- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index c300bfb54..ad9f0e267 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -182,7 +182,7 @@ API :template: function.rst tedana.stats.get_ls_coeffs - tedana.stats.computefeats2 + tedana.stats.get_ls_zvalues tedana.stats.getfbounds diff --git a/tedana/decomposition/pca.py b/tedana/decomposition/pca.py index 643cb0bba..aadc874d4 100644 --- a/tedana/decomposition/pca.py +++ b/tedana/decomposition/pca.py @@ -12,7 +12,7 @@ from tedana import metrics, utils, io from tedana.decomposition import ma_pca -from tedana.stats import computefeats2 +from tedana.stats import get_ls_zvalues from tedana.selection import kundu_tedpca LGR = logging.getLogger(__name__) @@ -249,7 +249,7 @@ def tedpca(data_cat, data_oc, combmode, mask, adaptive_mask, t2sG, # Finally write component spatial maps in 4D files, where the spatial maps # will divided by its standard deviation (option normalize=True) comp_ts_z = stats.zscore(comp_ts, axis=0) - comp_maps = utils.unmask(computefeats2(data_oc, comp_ts_z, mask), mask) + comp_maps = utils.unmask(get_ls_zvalues(data_oc, comp_ts_z, mask), mask) io.filewrite(comp_maps, op.join(out_dir, 'pca_components.nii.gz'), ref_img) # Select components using decision tree diff --git a/tedana/io.py b/tedana/io.py index 39cd4d52f..1d587fe8f 100644 --- a/tedana/io.py +++ b/tedana/io.py @@ -13,7 +13,7 @@ from nilearn.image import new_img_like from tedana import utils -from tedana.stats import computefeats2, get_ls_coeffs +from tedana.stats import get_ls_zvalues, get_ls_coeffs LGR = logging.getLogger(__name__) RepLGR = logging.getLogger('REPORT') @@ -169,7 +169,7 @@ def writefeats(data, mmix, mask, ref_img, out_dir='.', suffix=''): """ # write feature versions of components - feats = utils.unmask(computefeats2(data, mmix, mask), mask) + feats = utils.unmask(get_ls_zvalues(data, mmix, mask), mask) fname = filewrite(feats, op.join(out_dir, 'feats_{0}'.format(suffix)), ref_img) return fname diff --git a/tedana/metrics/kundu_fit.py b/tedana/metrics/kundu_fit.py index 541304a01..03b4fa8f6 100644 --- a/tedana/metrics/kundu_fit.py +++ b/tedana/metrics/kundu_fit.py @@ -9,7 +9,7 @@ from scipy import stats from tedana import io, utils -from tedana.stats import getfbounds, computefeats2, get_ls_coeffs +from tedana.stats import getfbounds, get_ls_zvalues, get_ls_coeffs LGR = logging.getLogger(__name__) @@ -109,7 +109,7 @@ def dependence_metrics(catd, tsoc, mmix, adaptive_mask, tes, ref_img, # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix - WTS = computefeats2(tsoc, mmixN, mask=None) + WTS = get_ls_zvalues(tsoc, mmixN, mask=None) # compute PSC dataset - shouldn't have to refit data tsoc_B = get_ls_coeffs(tsoc_dm, mmix, mask=None, add_const=False) diff --git a/tedana/stats.py b/tedana/stats.py index 0aa319c8c..ad1b46327 100644 --- a/tedana/stats.py +++ b/tedana/stats.py @@ -93,7 +93,7 @@ def getfbounds(n_echos): return f05, f025, f01 -def computefeats2(data, mmix, mask=None): +def get_ls_zvalues(data, mmix, mask=None): """ Converts `data` to component space using `mmix` diff --git a/tedana/tests/test_stats.py b/tedana/tests/test_stats.py index 066f155ae..bec32d9b7 100644 --- a/tedana/tests/test_stats.py +++ b/tedana/tests/test_stats.py @@ -5,14 +5,14 @@ import pytest import random -from tedana.stats import computefeats2 +from tedana.stats import get_ls_zvalues from tedana.stats import get_ls_coeffs from tedana.stats import getfbounds -def test_break_computefeats2(): +def test_break_get_ls_zvalues(): """ - Ensure that computefeats2 fails when input data do not have the right + Ensure that get_ls_zvalues fails when input data do not have the right shapes. """ n_samples, n_vols, n_comps = 10000, 100, 50 @@ -22,40 +22,40 @@ def test_break_computefeats2(): data = np.empty((n_samples)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask) + get_ls_zvalues(data, mmix, mask) data = np.empty((n_samples, n_vols)) mmix = np.empty((n_vols)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask) + get_ls_zvalues(data, mmix, mask) mmix = np.empty((n_vols, n_comps)) mask = np.empty((n_samples, n_vols)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask) + get_ls_zvalues(data, mmix, mask) mask = np.empty((n_samples + 1)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask) + get_ls_zvalues(data, mmix, mask) data.shape[1] != mmix.shape[0] mask = np.empty((n_samples)) mmix = np.empty((n_vols + 1, n_comps)) with pytest.raises(ValueError): - computefeats2(data, mmix, mask) + get_ls_zvalues(data, mmix, mask) -def test_smoke_computefeats2(): +def test_smoke_get_ls_zvalues(): """ - Ensures that computefeats2 works with random inputs and different optional parameters + Ensures that get_ls_zvalues works with random inputs and different optional parameters """ n_samples, n_times, n_components = 100, 20, 6 data = np.random.random((n_samples, n_times)) mmix = np.random.random((n_times, n_components)) mask = np.random.randint(2, size=n_samples) - assert computefeats2(data, mmix) is not None - assert computefeats2(data, mmix, mask=mask) is not None - assert computefeats2(data, mmix) is not None + assert get_ls_zvalues(data, mmix) is not None + assert get_ls_zvalues(data, mmix, mask=mask) is not None + assert get_ls_zvalues(data, mmix) is not None def test_get_ls_coeffs(): diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 0b4d7c15f..61af4628f 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -19,7 +19,7 @@ from tedana import (decay, combine, decomposition, io, metrics, reporting, selection, utils) import tedana.gscontrol as gsc -from tedana.stats import computefeats2 +from tedana.stats import get_ls_zvalues from tedana.workflows.parser_utils import is_valid_file, check_tedpca_value, ContextFilter LGR = logging.getLogger(__name__) @@ -558,7 +558,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, for comp in comptable.index.values] mixing_df = pd.DataFrame(data=mmix, columns=comp_names) mixing_df.to_csv(op.join(out_dir, 'ica_mixing.tsv'), sep='\t', index=False) - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) + betas_oc = utils.unmask(get_ls_zvalues(data_oc, mmix, mask), mask) io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'), ref_img) @@ -578,7 +578,7 @@ def tedana_workflow(data, tes, out_dir='.', mask=None, comptable = io.load_comptable(ctab) if manacc is not None: comptable = selection.manual_selection(comptable, acc=manacc) - betas_oc = utils.unmask(computefeats2(data_oc, mmix, mask), mask) + betas_oc = utils.unmask(get_ls_zvalues(data_oc, mmix, mask), mask) io.filewrite(betas_oc, op.join(out_dir, 'ica_components.nii.gz'), ref_img)