From 0180093952d2353601535dc4a569a833f4c9c2d8 Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Thu, 7 Sep 2023 16:34:14 +0200 Subject: [PATCH 1/5] check changelog --- ncem/tl/__init__.py | 19 +++++++++++------ ncem/tl/fit/backend/ablation.py | 3 +-- ncem/tl/fit/backend/design_matrix.py | 4 +++- ncem/tl/fit/backend/linear_model.py | 32 ++++++++++++++++++---------- ncem/tl/fit/backend/spline_model.py | 28 +++++++++++++++--------- ncem/tl/fit/backend/testing.py | 18 ++++++++-------- ncem/tl/fit/glm/__init__.py | 19 +++++++++++------ 7 files changed, 76 insertions(+), 47 deletions(-) diff --git a/ncem/tl/__init__.py b/ncem/tl/__init__.py index 5ce7e0b3..058189bd 100644 --- a/ncem/tl/__init__.py +++ b/ncem/tl/__init__.py @@ -1,10 +1,15 @@ -from ncem.tl.fit.backend.linear_model import (differential_ncem, - differential_ncem_deconvoluted, - linear_ncem, - linear_ncem_deconvoluted) +from ncem.tl.fit.backend.linear_model import ( + differential_ncem, + differential_ncem_deconvoluted, + linear_ncem, + linear_ncem_deconvoluted, +) from ncem.tl.fit.backend.spline_model import ( - get_spline_interpolation, spline_differential_ncem, - spline_differential_ncem_deconvoluted, spline_linear_ncem, - spline_linear_ncem_deconvoluted) + get_spline_interpolation, + spline_differential_ncem, + spline_differential_ncem_deconvoluted, + spline_linear_ncem, + spline_linear_ncem_deconvoluted, +) from . import fit diff --git a/ncem/tl/fit/backend/ablation.py b/ncem/tl/fit/backend/ablation.py index 047216cf..5f79a4d0 100644 --- a/ncem/tl/fit/backend/ablation.py +++ b/ncem/tl/fit/backend/ablation.py @@ -7,8 +7,7 @@ import squidpy as sq from scipy.stats import linregress -from ncem.tl.fit.constants import (OBS_KEY_SPLIT, OBSM_KEY_DMAT, - UNS_KEY_ABLATION, VARM_KEY_PARAMS) +from ncem.tl.fit.constants import OBS_KEY_SPLIT, OBSM_KEY_DMAT, UNS_KEY_ABLATION, VARM_KEY_PARAMS from ncem.tl.fit.glm import linear_ncem diff --git a/ncem/tl/fit/backend/design_matrix.py b/ncem/tl/fit/backend/design_matrix.py index 810d104e..e080c747 100644 --- a/ncem/tl/fit/backend/design_matrix.py +++ b/ncem/tl/fit/backend/design_matrix.py @@ -296,7 +296,9 @@ def get_dmats_from_deconvoluted( obs_full = pd.concat([obs, obs_index_type_x, obs_niche], axis=1) dmats[x] = patsy.dmatrix(formulas[x], obs_full) # ensure that column names start with index type name - dmat_columns = [col if col.startswith(PREFIX_INDEX) else PREFIX_INDEX+x+col for col in dmats[x].design_info.column_names] + dmat_columns = [ + col if col.startswith(PREFIX_INDEX) else PREFIX_INDEX + x + col for col in dmats[x].design_info.column_names + ] dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmat_columns) return dmats diff --git a/ncem/tl/fit/backend/linear_model.py b/ncem/tl/fit/backend/linear_model.py index 274d78e0..1fa40c68 100644 --- a/ncem/tl/fit/backend/linear_model.py +++ b/ncem/tl/fit/backend/linear_model.py @@ -5,20 +5,30 @@ import pandas as pd from ncem.tl.fit.backend.design_matrix import ( - extend_formula_differential_ncem, extend_formula_ncem, - get_binary_sample_annotation_conditions, get_dmat_from_obs, - get_dmats_from_deconvoluted, get_obs_niche_from_graph) + extend_formula_differential_ncem, + extend_formula_ncem, + get_binary_sample_annotation_conditions, + get_dmat_from_obs, + get_dmats_from_deconvoluted, + get_obs_niche_from_graph, +) from ncem.tl.fit.backend.ols_fit import ols_fit from ncem.tl.fit.backend.testing import test_deconvoluted, test_standard from ncem.tl.fit.backend.utils import write_uns -from ncem.tl.fit.constants import (OBSM_KEY_DMAT, OBSM_KEY_DMAT_NICHE, - UNS_KEY_CELL_TYPES, UNS_KEY_CONDITIONS, - UNS_KEY_PER_INDEX, VARM_KEY_FDR_PVALS, - VARM_KEY_FDR_PVALS_DIFFERENTIAL, - VARM_KEY_PARAMS, VARM_KEY_PVALS, - VARM_KEY_PVALS_DIFFERENTIAL, - VARM_KEY_TESTED_PARAMS, - VARM_KEY_TESTED_PARAMS_DIFFERENTIAL) +from ncem.tl.fit.constants import ( + OBSM_KEY_DMAT, + OBSM_KEY_DMAT_NICHE, + UNS_KEY_CELL_TYPES, + UNS_KEY_CONDITIONS, + UNS_KEY_PER_INDEX, + VARM_KEY_FDR_PVALS, + VARM_KEY_FDR_PVALS_DIFFERENTIAL, + VARM_KEY_PARAMS, + VARM_KEY_PVALS, + VARM_KEY_PVALS_DIFFERENTIAL, + VARM_KEY_TESTED_PARAMS, + VARM_KEY_TESTED_PARAMS_DIFFERENTIAL, +) def _validate_formula(formula: str, auto_keys: List[str] = []): diff --git a/ncem/tl/fit/backend/spline_model.py b/ncem/tl/fit/backend/spline_model.py index 9d0ed3c0..1d46599d 100644 --- a/ncem/tl/fit/backend/spline_model.py +++ b/ncem/tl/fit/backend/spline_model.py @@ -5,18 +5,26 @@ import pandas as pd import patsy -from ncem.tl.fit.backend.linear_model import (differential_ncem, - differential_ncem_deconvoluted, - linear_ncem, - linear_ncem_deconvoluted) +from ncem.tl.fit.backend.linear_model import ( + differential_ncem, + differential_ncem_deconvoluted, + linear_ncem, + linear_ncem_deconvoluted, +) from ncem.tl.fit.backend.testing import test_deconvoluted, test_standard from ncem.tl.fit.backend.utils import read_uns, write_uns -from ncem.tl.fit.constants import (PREFIX_INDEX, UNS_KEY_CELL_TYPES, - UNS_KEY_PER_INDEX, UNS_KEY_SPLINE_COEFS, - UNS_KEY_SPLINE_DF, UNS_KEY_SPLINE_FAMILY, - UNS_KEY_SPLINE_KEY_1D, - VARM_KEY_FDR_PVALS_SPLINE, VARM_KEY_PARAMS, - VARM_KEY_PVALS_SPLINE) +from ncem.tl.fit.constants import ( + PREFIX_INDEX, + UNS_KEY_CELL_TYPES, + UNS_KEY_PER_INDEX, + UNS_KEY_SPLINE_COEFS, + UNS_KEY_SPLINE_DF, + UNS_KEY_SPLINE_FAMILY, + UNS_KEY_SPLINE_KEY_1D, + VARM_KEY_FDR_PVALS_SPLINE, + VARM_KEY_PARAMS, + VARM_KEY_PVALS_SPLINE, +) def get_spline_basis(df: int, key_1d_coord: str, obs: pd.DataFrame, spline_family: str): diff --git a/ncem/tl/fit/backend/testing.py b/ncem/tl/fit/backend/testing.py index be75966f..ebbfd291 100644 --- a/ncem/tl/fit/backend/testing.py +++ b/ncem/tl/fit/backend/testing.py @@ -127,7 +127,7 @@ def test_deconvoluted( for k, y in coef_to_test.items(): if np.all([z in parameter_names for z in y]): params_idx = np.sort([parameter_names.index(z) for z in y]) - #theta_mle = params.values[:, params_idx] + # theta_mle = params.values[:, params_idx] for idx, coef in enumerate(y): theta_mle = params.values[:, idx] theta_sd = fisher_inv[:, idx, idx] @@ -135,16 +135,16 @@ def test_deconvoluted( theta_sd = np.sqrt(theta_sd) theta_sd = np.expand_dims(theta_sd, axis=0) theta_mle = np.expand_dims(theta_mle, axis=0) - #pvals[x] = wald_test(theta_mle=theta_mle, theta_sd=theta_sd, theta0=0) - #fisher_inv_subset = fisher_inv[:, idx, :][:, :, idx] + # pvals[x] = wald_test(theta_mle=theta_mle, theta_sd=theta_sd, theta0=0) + # fisher_inv_subset = fisher_inv[:, idx, :][:, :, idx] assert coef not in pvals.keys() pvals[coef] = wald_test(theta_mle=theta_mle, theta_sd=theta_sd, theta0=0) qvals[coef] = correct(pvals[coef].flatten()).reshape(pvals[coef].shape) tested_coefficients[coef] = theta_mle - #pvals[k] = wald_test_chisq(theta_mle=theta_mle.T, theta_covar=fisher_inv_subset) - #if len(idx) == 1: + # pvals[k] = wald_test_chisq(theta_mle=theta_mle.T, theta_covar=fisher_inv_subset) + # if len(idx) == 1: # tested_coefficients[k] = theta_mle[:, 0] - #else: + # else: # tested_coefficients[k] = np.zeros_like(theta_mle[:, 0]) + np.nan else: for y in coef_to_test: @@ -163,9 +163,9 @@ def test_deconvoluted( qvals_arr = np.concatenate(list(qvals.values()), axis=0).T adata.varm[key_pval] = pd.DataFrame(pvals_arr, index=adata.var_names, columns=list(pvals.keys())) adata.varm[key_fdr_pval] = pd.DataFrame(qvals_arr, index=adata.var_names, columns=list(pvals.keys())) - #pvals_flat = np.hstack(list(pvals.values())) - #qvals_flat = np.hstack(list(qvals.values())) - #qvals_flat = correct(pvals_flat) + # pvals_flat = np.hstack(list(pvals.values())) + # qvals_flat = np.hstack(list(qvals.values())) + # qvals_flat = correct(pvals_flat) # qvals = qvals_flat.reshape((-1, len(test_keys))) # Write results to object: if key_coef is not None: diff --git a/ncem/tl/fit/glm/__init__.py b/ncem/tl/fit/glm/__init__.py index e0ce886f..b66e7cef 100644 --- a/ncem/tl/fit/glm/__init__.py +++ b/ncem/tl/fit/glm/__init__.py @@ -1,8 +1,13 @@ -from ncem.tl.fit.backend.linear_model import (differential_ncem, - differential_ncem_deconvoluted, - linear_ncem, - linear_ncem_deconvoluted) +from ncem.tl.fit.backend.linear_model import ( + differential_ncem, + differential_ncem_deconvoluted, + linear_ncem, + linear_ncem_deconvoluted, +) from ncem.tl.fit.backend.spline_model import ( - get_spline_interpolation, spline_differential_ncem, - spline_differential_ncem_deconvoluted, spline_linear_ncem, - spline_linear_ncem_deconvoluted) + get_spline_interpolation, + spline_differential_ncem, + spline_differential_ncem_deconvoluted, + spline_linear_ncem, + spline_linear_ncem_deconvoluted, +) From 24d6c4070db8ebac350d19c1ed39515c4bc784e1 Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Thu, 7 Sep 2023 17:17:07 +0200 Subject: [PATCH 2/5] inspect changelog --- ncem/api/train/__init__.py | 18 +++++++++++++----- ncem/estimators/__init__.py | 6 ++---- ncem/estimators/base_estimator.py | 23 +++++++++++++++-------- ncem/interpretation/__init__.py | 14 ++++++++------ ncem/models/layers/__init__.py | 24 ++++++++++++------------ ncem/models/model_cvae.py | 3 +-- ncem/models/model_cvae_ncem.py | 3 +-- ncem/models/model_ed_single_ncem.py | 5 +---- ncem/models/model_interactions.py | 3 +-- ncem/train/__init__.py | 15 ++++++++++----- ncem/train/train_model.py | 14 ++++++++++---- ncem/unit_test/test_dataloader_local.py | 10 +++++++--- ncem/unit_test/test_estimator_local.py | 3 +-- ncem/unit_test/tools/fit/test_glm.py | 24 +++++++++++++----------- ncem/unit_test/tools/fit/test_spline.py | 24 ++++++++++-------------- 15 files changed, 105 insertions(+), 84 deletions(-) diff --git a/ncem/api/train/__init__.py b/ncem/api/train/__init__.py index daff07c2..846acf99 100644 --- a/ncem/api/train/__init__.py +++ b/ncem/api/train/__init__.py @@ -1,9 +1,17 @@ """Initializes a train object in api.""" import numpy as np -from ncem.estimators import (Estimator, EstimatorCVAE, EstimatorCVAEncem, - EstimatorDeconvolution, EstimatorED, - EstimatorEDncem, EstimatorEdNcemNeighborhood, - EstimatorGraph, EstimatorInteractions, - EstimatorLinear, EstimatorNoGraph) +from ncem.estimators import ( + Estimator, + EstimatorCVAE, + EstimatorCVAEncem, + EstimatorDeconvolution, + EstimatorED, + EstimatorEDncem, + EstimatorEdNcemNeighborhood, + EstimatorGraph, + EstimatorInteractions, + EstimatorLinear, + EstimatorNoGraph, +) from ncem.models import BetaScheduler diff --git a/ncem/estimators/__init__.py b/ncem/estimators/__init__.py index 103a99a8..5b88ec7a 100644 --- a/ncem/estimators/__init__.py +++ b/ncem/estimators/__init__.py @@ -1,12 +1,10 @@ """Importing estimator classes.""" -from ncem.estimators.base_estimator import (Estimator, EstimatorGraph, - EstimatorNoGraph) +from ncem.estimators.base_estimator import Estimator, EstimatorGraph, EstimatorNoGraph from ncem.estimators.base_estimator_neighbors import EstimatorNeighborhood from ncem.estimators.estimator_cvae import EstimatorCVAE from ncem.estimators.estimator_cvae_ncem import EstimatorCVAEncem from ncem.estimators.estimator_deconvolution import EstimatorDeconvolution from ncem.estimators.estimator_ed import EstimatorED -from ncem.estimators.estimator_ed_ncem import (EstimatorEDncem, - EstimatorEdNcemNeighborhood) +from ncem.estimators.estimator_ed_ncem import EstimatorEDncem, EstimatorEdNcemNeighborhood from ncem.estimators.estimator_interactions import EstimatorInteractions from ncem.estimators.estimator_linear import EstimatorLinear diff --git a/ncem/estimators/base_estimator.py b/ncem/estimators/base_estimator.py index ad803e98..6acb33c2 100644 --- a/ncem/estimators/base_estimator.py +++ b/ncem/estimators/base_estimator.py @@ -7,12 +7,20 @@ import tensorflow as tf from ncem.utils.losses import GaussianLoss, KLLoss, NegBinLoss -from ncem.utils.metrics import (custom_kl, custom_mae, custom_mean_sd, - custom_mse, custom_mse_scaled, - gaussian_reconstruction_loss, logp1_custom_mse, - logp1_r_squared, logp1_r_squared_linreg, - nb_reconstruction_loss, r_squared, - r_squared_linreg) +from ncem.utils.metrics import ( + custom_kl, + custom_mae, + custom_mean_sd, + custom_mse, + custom_mse_scaled, + gaussian_reconstruction_loss, + logp1_custom_mse, + logp1_r_squared, + logp1_r_squared_linreg, + nb_reconstruction_loss, + r_squared, + r_squared_linreg, +) def transfer_layers(model1, model2): @@ -212,8 +220,7 @@ def _load_data( elif data_origin.startswith("cell2location_lymphnode"): self.targeted_assay = False - from ncem.data import \ - DataLoaderCell2locationLymphnode as DataLoader + from ncem.data import DataLoaderCell2locationLymphnode as DataLoader self.undefined_node_types = None diff --git a/ncem/interpretation/__init__.py b/ncem/interpretation/__init__.py index f33467e3..fd3279aa 100644 --- a/ncem/interpretation/__init__.py +++ b/ncem/interpretation/__init__.py @@ -1,7 +1,9 @@ """Importing interpretation for different model classes.""" -from ncem.interpretation.interpreter import (InterpreterCVAEncem, - InterpreterDeconvolution, - InterpreterEDncem, - InterpreterGraph, - InterpreterInteraction, - InterpreterLinear) +from ncem.interpretation.interpreter import ( + InterpreterCVAEncem, + InterpreterDeconvolution, + InterpreterEDncem, + InterpreterGraph, + InterpreterInteraction, + InterpreterLinear, +) diff --git a/ncem/models/layers/__init__.py b/ncem/models/layers/__init__.py index 1f78d310..ee99c92a 100644 --- a/ncem/models/layers/__init__.py +++ b/ncem/models/layers/__init__.py @@ -1,15 +1,15 @@ """Importing custom layers for different model classes.""" from ncem.models.layers.gnn_layers import GCNLayer, MaxLayer -from ncem.models.layers.layer_stacks_lvm import (CondDecoder, CondEncoder, - Decoder, Encoder, - SamplingPrior) -from ncem.models.layers.output_layers import (GaussianConstDispOutput, - GaussianOutput, - LinearConstDispOutput, - LinearOutput, - NegBinConstDispOutput, - NegBinOutput, - NegBinSharedDispOutput, get_out) +from ncem.models.layers.layer_stacks_lvm import CondDecoder, CondEncoder, Decoder, Encoder, SamplingPrior +from ncem.models.layers.output_layers import ( + GaussianConstDispOutput, + GaussianOutput, + LinearConstDispOutput, + LinearOutput, + NegBinConstDispOutput, + NegBinOutput, + NegBinSharedDispOutput, + get_out, +) from ncem.models.layers.preproc_input import DenseInteractions, PreprocInput -from ncem.models.layers.single_gnn_layers import (SingleGatLayer, - SingleLrGatLayer) +from ncem.models.layers.single_gnn_layers import SingleGatLayer, SingleLrGatLayer diff --git a/ncem/models/model_cvae.py b/ncem/models/model_cvae.py index b8d8e816..59ec37aa 100644 --- a/ncem/models/model_cvae.py +++ b/ncem/models/model_cvae.py @@ -1,8 +1,7 @@ import numpy as np import tensorflow as tf -from ncem.models.layers import (Decoder, Encoder, PreprocInput, SamplingPrior, - get_out) +from ncem.models.layers import Decoder, Encoder, PreprocInput, SamplingPrior, get_out class ModelCVAE: diff --git a/ncem/models/model_cvae_ncem.py b/ncem/models/model_cvae_ncem.py index cee709c3..75c50b5c 100644 --- a/ncem/models/model_cvae_ncem.py +++ b/ncem/models/model_cvae_ncem.py @@ -3,8 +3,7 @@ import numpy as np import tensorflow as tf -from ncem.models.layers import (CondDecoder, CondEncoder, GCNLayer, MaxLayer, - PreprocInput, SamplingPrior, get_out) +from ncem.models.layers import CondDecoder, CondEncoder, GCNLayer, MaxLayer, PreprocInput, SamplingPrior, get_out class ModelCVAEncem: diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py index 0332ccc9..39a4636e 100644 --- a/ncem/models/model_ed_single_ncem.py +++ b/ncem/models/model_ed_single_ncem.py @@ -1,10 +1,7 @@ import tensorflow as tf from ncem.models.layers import Decoder, get_out -from ncem.models.layers.single_gnn_layers import (SingleGatLayer, - SingleGcnLayer, - SingleLrGatLayer, - SingleMaxLayer) +from ncem.models.layers.single_gnn_layers import SingleGatLayer, SingleGcnLayer, SingleLrGatLayer, SingleMaxLayer class ModelEd2Ncem: diff --git a/ncem/models/model_interactions.py b/ncem/models/model_interactions.py index 2504a09c..13260618 100644 --- a/ncem/models/model_interactions.py +++ b/ncem/models/model_interactions.py @@ -2,8 +2,7 @@ import tensorflow as tf -from ncem.models.layers import (DenseInteractions, LinearConstDispOutput, - LinearOutput) +from ncem.models.layers import DenseInteractions, LinearConstDispOutput, LinearOutput class ModelInteractions: diff --git a/ncem/train/__init__.py b/ncem/train/__init__.py index 9dab3b31..fb9c705a 100644 --- a/ncem/train/__init__.py +++ b/ncem/train/__init__.py @@ -1,7 +1,12 @@ """Importing GridSearchContrainer and TrainModel classes.""" from ncem.train.summaries import GridSearchContainer -from ncem.train.train_model import (TrainModelCVAE, TrainModelCVAEncem, - TrainModelED, TrainModelEDncem, - TrainModelEdSingleNcem, - TrainModelInteractions, TrainModelLinear, - TrainModelLinearDeconvolution) +from ncem.train.train_model import ( + TrainModelCVAE, + TrainModelCVAEncem, + TrainModelED, + TrainModelEDncem, + TrainModelEdSingleNcem, + TrainModelInteractions, + TrainModelLinear, + TrainModelLinearDeconvolution, +) diff --git a/ncem/train/train_model.py b/ncem/train/train_model.py index 8ae4bc15..73861e88 100644 --- a/ncem/train/train_model.py +++ b/ncem/train/train_model.py @@ -2,10 +2,16 @@ import pickle from typing import Union -from ncem.estimators import (EstimatorCVAE, EstimatorCVAEncem, - EstimatorDeconvolution, EstimatorED, - EstimatorEDncem, EstimatorEdNcemNeighborhood, - EstimatorInteractions, EstimatorLinear) +from ncem.estimators import ( + EstimatorCVAE, + EstimatorCVAEncem, + EstimatorDeconvolution, + EstimatorED, + EstimatorEDncem, + EstimatorEdNcemNeighborhood, + EstimatorInteractions, + EstimatorLinear, +) class TrainModel: diff --git a/ncem/unit_test/test_dataloader_local.py b/ncem/unit_test/test_dataloader_local.py index 289b6d0a..987f5f19 100644 --- a/ncem/unit_test/test_dataloader_local.py +++ b/ncem/unit_test/test_dataloader_local.py @@ -1,8 +1,12 @@ import pytest -from ncem.unit_test.directories import (DATA_PATH_HARTMANN, DATA_PATH_JAROSCH, - DATA_PATH_LU, DATA_PATH_SCHUERCH, - DATA_PATH_ZHANG) +from ncem.unit_test.directories import ( + DATA_PATH_HARTMANN, + DATA_PATH_JAROSCH, + DATA_PATH_LU, + DATA_PATH_SCHUERCH, + DATA_PATH_ZHANG, +) class HelperTestDataLoader: diff --git a/ncem/unit_test/test_estimator_local.py b/ncem/unit_test/test_estimator_local.py index b08bbcdb..e761a0e4 100644 --- a/ncem/unit_test/test_estimator_local.py +++ b/ncem/unit_test/test_estimator_local.py @@ -4,8 +4,7 @@ import ncem.api as ncem from ncem.estimators import Estimator -from ncem.unit_test.directories import (DATA_PATH_DESTVI, DATA_PATH_HARTMANN, - DATA_PATH_LU, DATA_PATH_ZHANG) +from ncem.unit_test.directories import DATA_PATH_DESTVI, DATA_PATH_HARTMANN, DATA_PATH_LU, DATA_PATH_ZHANG class HelperTestEstimator: diff --git a/ncem/unit_test/tools/fit/test_glm.py b/ncem/unit_test/tools/fit/test_glm.py index 2f30095c..ed9933e9 100644 --- a/ncem/unit_test/tools/fit/test_glm.py +++ b/ncem/unit_test/tools/fit/test_glm.py @@ -1,17 +1,19 @@ import numpy as np import pytest -from ncem.tl.fit.constants import (PREFIX_INDEX, PREFIX_NEIGHBOR, - VARM_KEY_FDR_PVALS, - VARM_KEY_FDR_PVALS_DIFFERENTIAL, - VARM_KEY_PARAMS, VARM_KEY_PVALS, - VARM_KEY_PVALS_DIFFERENTIAL, - VARM_KEY_TESTED_PARAMS, - VARM_KEY_TESTED_PARAMS_DIFFERENTIAL) -from ncem.tl.fit.glm import (differential_ncem, differential_ncem_deconvoluted, - linear_ncem, linear_ncem_deconvoluted) -from ncem.unit_test.data_for_tests import (KEY_ADJACENCY, KEY_BATCH, KEY_COND, - KEY_DECONV, KEY_TYPE, get_adata) +from ncem.tl.fit.constants import ( + PREFIX_INDEX, + PREFIX_NEIGHBOR, + VARM_KEY_FDR_PVALS, + VARM_KEY_FDR_PVALS_DIFFERENTIAL, + VARM_KEY_PARAMS, + VARM_KEY_PVALS, + VARM_KEY_PVALS_DIFFERENTIAL, + VARM_KEY_TESTED_PARAMS, + VARM_KEY_TESTED_PARAMS_DIFFERENTIAL, +) +from ncem.tl.fit.glm import differential_ncem, differential_ncem_deconvoluted, linear_ncem, linear_ncem_deconvoluted +from ncem.unit_test.data_for_tests import KEY_ADJACENCY, KEY_BATCH, KEY_COND, KEY_DECONV, KEY_TYPE, get_adata def _assert_slot_keys(adata, differential: bool): diff --git a/ncem/unit_test/tools/fit/test_spline.py b/ncem/unit_test/tools/fit/test_spline.py index 941ce865..970df460 100644 --- a/ncem/unit_test/tools/fit/test_spline.py +++ b/ncem/unit_test/tools/fit/test_spline.py @@ -4,20 +4,16 @@ import pytest from ncem.tl.fit.backend.utils import read_uns -from ncem.tl.fit.constants import (UNS_KEY_CELL_TYPES, - VARM_KEY_FDR_PVALS_SPLINE, - VARM_KEY_PVALS_SPLINE) -from ncem.tl.fit.glm import (get_spline_interpolation, - spline_differential_ncem, - spline_differential_ncem_deconvoluted, - spline_linear_ncem, - spline_linear_ncem_deconvoluted) -from ncem.unit_test.data_for_tests import (KEY_1D, KEY_ADJACENCY, KEY_BATCH, - KEY_COND, KEY_DECONV, KEY_TYPE, - get_adata_1d) -from ncem.unit_test.tools.fit.test_glm import (_assert_slot_dimension, - _assert_slot_domain, - _assert_slot_keys) +from ncem.tl.fit.constants import UNS_KEY_CELL_TYPES, VARM_KEY_FDR_PVALS_SPLINE, VARM_KEY_PVALS_SPLINE +from ncem.tl.fit.glm import ( + get_spline_interpolation, + spline_differential_ncem, + spline_differential_ncem_deconvoluted, + spline_linear_ncem, + spline_linear_ncem_deconvoluted, +) +from ncem.unit_test.data_for_tests import KEY_1D, KEY_ADJACENCY, KEY_BATCH, KEY_COND, KEY_DECONV, KEY_TYPE, get_adata_1d +from ncem.unit_test.tools.fit.test_glm import _assert_slot_dimension, _assert_slot_domain, _assert_slot_keys HYPERPARAMS_SPLINE = {"df": 3, "spline_family": "cr", "key_1d_coord": KEY_1D} From 4137a39af396d01f7163761fddb9868749a20ce5 Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Fri, 8 Sep 2023 10:00:45 +0200 Subject: [PATCH 3/5] fix issue in column name saving --- ncem/tl/fit/backend/design_matrix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ncem/tl/fit/backend/design_matrix.py b/ncem/tl/fit/backend/design_matrix.py index e080c747..fe98e507 100644 --- a/ncem/tl/fit/backend/design_matrix.py +++ b/ncem/tl/fit/backend/design_matrix.py @@ -247,8 +247,12 @@ def get_dmat_from_obs(obs: pd.DataFrame, obs_niche: pd.DataFrame, formula: str, obs_niche.columns = [PREFIX_NEIGHBOR + x for x in obs_niche.columns] # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type, obs_niche], axis=1) + columns_names = formula.replace('~0+','').split('+') + dmat = patsy.dmatrix(formula, obs_full) - dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=dmat.design_info.column_names) + # Simplify names, this is necessary for patsy to accept these as terms later. + column_names = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat.design_info.column_names] + dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=column_names) return dmat From 9c23d032945c9fc19bcdaca1227106afe5a9d2a8 Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Sun, 10 Sep 2023 13:43:37 +0200 Subject: [PATCH 4/5] adjust naming convention for deconvoluted model --- ncem/tl/fit/backend/design_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ncem/tl/fit/backend/design_matrix.py b/ncem/tl/fit/backend/design_matrix.py index fe98e507..e9f4c1f8 100644 --- a/ncem/tl/fit/backend/design_matrix.py +++ b/ncem/tl/fit/backend/design_matrix.py @@ -303,6 +303,7 @@ def get_dmats_from_deconvoluted( dmat_columns = [ col if col.startswith(PREFIX_INDEX) else PREFIX_INDEX + x + col for col in dmats[x].design_info.column_names ] + dmat_columns = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat_columns] dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmat_columns) return dmats From d03a0792b9ab6ea62d2715b57ce50fcf1a84967c Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Fri, 27 Oct 2023 13:00:33 +0200 Subject: [PATCH 5/5] remove patsy dependency --- ncem/tl/fit/backend/design_matrix.py | 23 +++++++++-------------- ncem/tl/fit/backend/linear_model.py | 1 - ncem/tl/fit/backend/testing.py | 1 + pyproject.toml | 3 ++- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ncem/tl/fit/backend/design_matrix.py b/ncem/tl/fit/backend/design_matrix.py index e9f4c1f8..f1b182f4 100644 --- a/ncem/tl/fit/backend/design_matrix.py +++ b/ncem/tl/fit/backend/design_matrix.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd import patsy +from formulaic import Formula from ncem.tl.fit.constants import PREFIX_INDEX, PREFIX_NEIGHBOR @@ -248,11 +249,9 @@ def get_dmat_from_obs(obs: pd.DataFrame, obs_niche: pd.DataFrame, formula: str, # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type, obs_niche], axis=1) columns_names = formula.replace('~0+','').split('+') - - dmat = patsy.dmatrix(formula, obs_full) - # Simplify names, this is necessary for patsy to accept these as terms later. - column_names = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat.design_info.column_names] - dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=column_names) + + dmat = Formula(formula).get_model_matrix(obs_full) + dmat = pd.DataFrame(np.asarray(dmat), index=obs.index, columns=dmat.columns) return dmat @@ -298,13 +297,8 @@ def get_dmats_from_deconvoluted( obs_index_type_x.index = obs.index # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type_x, obs_niche], axis=1) - dmats[x] = patsy.dmatrix(formulas[x], obs_full) - # ensure that column names start with index type name - dmat_columns = [ - col if col.startswith(PREFIX_INDEX) else PREFIX_INDEX + x + col for col in dmats[x].design_info.column_names - ] - dmat_columns = [f"{x.split('[')[0]}{x.split('[')[1].split(']')[-1]}" for x in dmat_columns] - dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmat_columns) + dmats[x] = Formula(formulas[x]).get_model_matrix(obs_full) + dmats[x] = pd.DataFrame(np.asarray(dmats[x]), index=obs.index, columns=dmats[x].columns) return dmats @@ -353,7 +347,8 @@ def get_dmat_global_from_deconvoluted(obs: pd.DataFrame, deconv: pd.DataFrame, f obs_index_type[x].index = obs.index # Merge sample annotation: obs_full = pd.concat([obs, obs_index_type_x, obs_niche], axis=1) - dmat_x = patsy.dmatrix(formula, obs_full) - dmats.append(pd.DataFrame(np.asarray(dmat_x), index=obs.index, columns=dmat_x.design_info.column_names)) + + dmat_x = Formula(formula).get_model_matrix(obs_full) + dmats.append(pd.DataFrame(np.asarray(dmat_x), index=obs.index, columns=dmat_x.columns)) dmat = pd.concat(dmats, axis=0) return dmat diff --git a/ncem/tl/fit/backend/linear_model.py b/ncem/tl/fit/backend/linear_model.py index 1fa40c68..6d9aa9f6 100644 --- a/ncem/tl/fit/backend/linear_model.py +++ b/ncem/tl/fit/backend/linear_model.py @@ -274,7 +274,6 @@ def linear_ncem_deconvoluted( ) dmats = get_dmats_from_deconvoluted(deconv=adata.obsm[key_deconvolution], formulas=formulas, obs=adata.obs) for k, v in dmats.items(): - print(k) dmat_key = f"{OBSM_KEY_DMAT}_{k}" adata.obsm[dmat_key] = v params = ols_fit(x_=adata.obsm[dmat_key].values, y_=adata.layers[k]) diff --git a/ncem/tl/fit/backend/testing.py b/ncem/tl/fit/backend/testing.py index ebbfd291..9cfda723 100644 --- a/ncem/tl/fit/backend/testing.py +++ b/ncem/tl/fit/backend/testing.py @@ -150,6 +150,7 @@ def test_deconvoluted( for y in coef_to_test: if y in parameter_names: idx = parameter_names.index(y) + print(idx) theta_mle = params.values[:, idx] theta_sd = fisher_inv[:, idx, idx] theta_sd = np.nextafter(0, np.inf, out=theta_sd, where=theta_sd < np.nextafter(0, np.inf)) diff --git a/pyproject.toml b/pyproject.toml index 6d4b703b..e96e04c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,10 +22,11 @@ python = ">=3.8,<=3.10" click = "^8.0.0" rich = "^10.1.0" PyYAML = "^5.4.1" -Jinja2 = ">=2.11.3,<4.0.0" +Jinja2 = ">=2.11.3,<=4.0.0" scanpy = "^1.9.3" squidpy = "^1.2.3" patsy = "^0.5.1" +formulaic = "=0.6.6" scipy = "=1.9.1" seaborn = "^0.12.2" matplotlib = "^3.7.1"