Skip to content

Commit

Permalink
add multivariate arg [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 8, 2024
1 parent cdb539b commit d5194b5
Show file tree
Hide file tree
Showing 10 changed files with 420 additions and 91 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SystemRequirements: Python (>= 3.7.0)
biocViews:
Software
Imports:
Rdimtools,
scKirby,
utils,
methods,
Expand Down Expand Up @@ -85,7 +86,8 @@ Remotes:
github::neurogenomics/HPOExplorer,
github::neurogenomics/MultiEWCE,
github::RajLabMSSM/echotabix,
github::RajLabMSSM/downloadR
github::RajLabMSSM/downloadR,
github::satijalab/seurat-wrappers
RoxygenNote: 7.3.1
VignetteBuilder: knitr
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export(phenomix_merge)
export(phenomix_query)
export(phenomix_query_batched)
export(plot_enrichment)
export(plot_hpo_pseudotime)
export(plot_reduction)
export(plot_trait_cor)
export(plot_variancePartition)
Expand Down
38 changes: 28 additions & 10 deletions R/iterate_lm.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@
#' @param ymat gene x celltype matrix.
#' @param test_method Association testing method to use.
#' @param correction_method Multiple-testing correction
#' method to be passed to \code{stats::p.adjust}.
#' method to be passed to \link[stats]{p.adjust}.
#' @param qvalue_thresh q-value threshold to use when report
#' significant results summary.
#' @param quantize A named list where the values of "x" and "y" indicaite the
#' number of quantiles to bin the respective "xmat" and "ymat" datasets into.
#' @param quantize A named list where the values of "x" and "y" indicate the
#' number of quantiles to bin the respective \code{xmat} and
#' \code{ymat} datasets into.
#' @param multivariate If \code{TRUE}, runs tests with each column in
#' \code{xmat} as a multivariate predictor in a single model
#' (one model per column in \code{yvar}). If\code{FALSE},
#' runs tests with each column in \code{xmat} as a univariate predictor in
#' separate models (one model per column in \code{yvar}).
#' @param ... Additional parameters passed to the statistical test function.
#' @inheritParams set_cores
#'
#' @export
Expand All @@ -29,14 +36,16 @@
#' workers = 1)
iterate_lm <- function(xmat,
ymat,
test_method = c("glm","anova"),
test_method = c("glm","anova","lm.ridge","rlm"),
multivariate = FALSE,
correction_method = "BH",
qvalue_thresh = .05,
quantize = list(x=NULL,
y=NULL),
progressbar = TRUE,
workers = NULL,
verbose=TRUE) {
verbose = TRUE,
...) {
p <- q <- NULL;
test_method <- match.arg(test_method)
data.table::setDTthreads(threads = 1)
Expand Down Expand Up @@ -71,25 +80,34 @@ iterate_lm <- function(xmat,
lm_res <- iterate_lm_long(xmat = xmat,
ymat = ymat,
cores = cores,
method = test_method)
method = test_method,
multivariate = multivariate,
...)
### Multiple-testing correction
if(test_method=="anova"){
lm_res[,q:=stats::p.adjust(p = p, method = correction_method)]
} else {
grep_terms <- if(isTRUE(multivariate)) "^x[:]" else "^x$"
model_p <- model_q <- term <- xvar <- NULL;
lm_res|>data.table::setnames("p.value","p")
model_res <- lm_res[term %in% c("(Intercept)")]|>
data.table:::setnames(c("p","estimate","statistic"),
c("model_p","model_estimate","model_statistic"))
model_res[,model_q:=stats::p.adjust(p = model_p,
method = correction_method)]
lm_res <- merge(lm_res[grepl("^x[:]",term)] ,
lm_res <- merge(lm_res[grepl(grep_terms,term)] ,
model_res[,c("model_id","model_p","model_q",
"model_estimate","model_statistic")],
by="model_id")
lm_res[,xvar:=gsub("^x\\:xvar","",term)]|>
data.table::setcolorder("xvar",3)
lm_res[,q:=ifelse(model_q<0.05,p,min(1,model_q+p))]
if(isTRUE(multivariate)){
lm_res[,xvar:=gsub("^x\\:xvar","",term)]|>
data.table::setcolorder("xvar",3)
## by=.I is EXTREMELY important! otherwise, the p-values will be
## the minimum across ALL rows.
lm_res[,q:=ifelse(model_q<0.05,p,min(1,model_q+p)), by=.I]
} else {
lm_res[,q:=stats::p.adjust(p = p, method = correction_method)]
}
}
#### Report ####
messager(formatC(nrow(lm_res[q<qvalue_thresh,]), big.mark = ","),
Expand Down
46 changes: 34 additions & 12 deletions R/iterate_lm_long.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
iterate_lm_long <- function(xmat,
ymat,
cores,
method){
x <- y <- NULL;
method,
multivariate,
...){
x <- y <- xvar <- NULL;
progressbar <- cores$params$progressbar
add_model_id <- function(res,i){
mid <- gsub("file",paste0("model",i,"_"),basename(tempfile()))
Expand Down Expand Up @@ -30,29 +32,49 @@ iterate_lm_long <- function(xmat,
}
## Run tests: glm
if(method=="glm"){
messager("Method: glm",v=!progressbar)
mod <- stats::glm(data = dt,
formula = y~x*xvar)
res <- broom::tidy(mod) |>
data.table::data.table()
add_model_id(res,i)
if(isTRUE(multivariate)){
messager("Method: glm (multivariate)",v=!progressbar)
mod <- stats::glm(data = dt,
formula = y~x*xvar,
...)
res <- broom::tidy(mod) |>
data.table::data.table()
add_model_id(res,i)
} else {
res <- lapply(stats::setNames(unique(dt$xvar),
unique(dt$xvar)),
function(xv){
messager("Method: glm (univariate)",v=!progressbar)
mod <- stats::glm(data = dt[xvar==xv],
formula = y~x,
...)
res <- broom::tidy(mod) |>
data.table::data.table()
add_model_id(res,i)
}) |>
data.table::rbindlist(idcol = "xvar",
fill = TRUE)
}
} else if(method=="anova") {
## Run tests: ANOVA
messager("Method: ANOVA",v=!progressbar)
messager("Method: ANOVA",v=!progressbar)
res <- dt |>
rstatix::group_by(xvar) |>
rstatix::anova_test(formula = y ~ x) |>
rstatix::anova_test(formula = y ~ x,
...) |>
data.table::data.table()
} else if(method=="lm.ridge"){
### XGboost ####
mod <- MASS::lm.ridge(formula= y~x+xvar,
data=dt)
data=dt,
...)
res <- broom::tidy(mod) |>
data.table::data.table()
add_model_id(res,i)
} else if(method=="rlm"){
mod <- MASS::rlm(formula= y~x+xvar,
data=dt)
data=dt,
...)
res <- broom::tidy(mod) |>
data.table::data.table()
add_model_id(res,i)
Expand Down
104 changes: 104 additions & 0 deletions R/plot_hpo_pseudotime.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' Plot HPO pseudotime
#'
#' Plot the Human Phenotype Ontology (HPO) in genomic latent space,
#' then compute pseudotime trajectories between a subset of phenotypes that
#' are symptoms of a given disease (or set of diseases).
#' @param obj \link{Seurat} object of HPO terms generated by \link{prepare_hpo}.
#' @param disease_ids One or more disease IDs found within \code{dt_genes}.
#' @inheritParams prepare_hpo
#' @inheritParams monocle3::learn_graph
#' @inheritDotParams monocle3::plot_cells
#' @export
plot_hpo_pseudotime <- function(obj,
dt_genes = HPOExplorer::load_phenotype_to_genes(1),
disease_ids=dt_genes$disease_id[1:3],
learn_graph_control=list(prune_graph=FALSE),
...
){
requireNamespace("monocle3")
requireNamespace("SeuratWrappers")
gene_symbol <- disease_id <- NULL;

#### Add disease metadata
## Using gene overlap (670998 p2d pairs) ##
# nrow(unique(dt_genes[,c("hpo_id","disease_id")]))
p2d <- dt_genes[disease_id %in% disease_ids,
list(count=data.table::uniqueN(gene_symbol)),
by=c("hpo_id","disease_id")]#|>
# data.table::dcast.data.table(formula = hpo_id~disease_id,
# value.var = "count")
if(nrow(p2d)==0) stopper("No hpo_ids found for the given disease_ids.")
# p2d <- p2d[colnames(ref),]
# ## Using annotation overlap (258276 p2d pairs)
# nrow(unique(dt_annot[,c("hpo_id","disease_id")]))
# p2d <- dt_annot[,count:=1]|>
# data.table::dcast.data.table(formula = hpo_id~disease_id,
# value.var = "count",
# )
#### Merge with rest of annotations ####
## make Seurat way too slow having this many columns...
# dt_annot_melt <- data.table::merge.data.table(dt_annot_melt,
# p2d,
# by.x = "id",
# by.y = "hpo_id",
# all.x = TRUE)

hpo_ids <- intersect(unique(p2d$hpo_id),
colnames(obj))
if(length(hpo_ids)==0) {
stopper("No hpo_ids oevrlapping with samples (colnames) in obj.")
}
cds <- SeuratWrappers::as.cell_data_set(obj)
cds_sub <- monocle3::cluster_cells(
cds = cds[,hpo_ids],
k = max(as.integer(cds@colData$seurat_clusters)))
cds_sub <- monocle3::learn_graph(cds_sub,
learn_graph_control=learn_graph_control)
cds_sub <- monocle3::order_cells(cds_sub, root_cells = colnames(cds_sub))
monocle3::principal_graph(cds) <- monocle3::principal_graph(cds_sub)
monocle3::principal_graph_aux(cds) <- monocle3::principal_graph_aux(cds_sub)
plt <- monocle3::plot_cells(cds,
# color_cells_by = "ancestor_name_abnormality",
# group_cells_by = "top_celltype"
)
return(
list(data=cds,
plot=plt)
)
# pseudo_dt <- t(cds@principal_graph_aux$UMAP$pr_graph_cell_proj_dist)|>`colnames<-`(c("umap1","umap2"))

# gm <- ref@graphs$freq_nn[highlights$hpo_id,
# highlights$hpo_id]
# g <- igraph::graph_from_adjacency_matrix(gm)
# gdt <- KGExplorer::graph_to_dt(g)[object!=subject]
# ggraph(g, layout = 'stress') +
# geom_edge_density() +
# geom_edge_link(alpha = 0.25)
# dp <- Seurat::DimPlot(ref,
# group.by = disease_id,
# cols.highlight = "red",
# alpha = .7,
# sizes.highlight = highlights[[disease_id]],
# cells.highlight = list( highlights[["hpo_id"]])|>
# `names<-`(disease_id))
# highlight_df <- subset(dp[[1]]$data,highlight!="Unselected")
#
# highlight_dt <- merge(gdt,
# data.table::data.table(highlight_df,keep.rownames = "subject"),
# by="subject")|>
# merge(data.table::data.table(highlight_df,keep.rownames = "object"),
# by=c("object","highlight",disease_id)) |>
# data.table::setnames(c("umap_1.x","umap_2.x","umap_1.y","umap_2.y"),
# c("x","y","xend","yend"))
# # ggplot2::ggplot()
# dp + ggplot2::geom_segment(data = highlight_dt,
# mapping = ggplot2::aes(x = x,
# xend = xend,
# y = y,
# yend = yend),
# inherit.aes = FALSE)
# dp+
# ggplot2::geom_step(data = highlight_df,
# ggplot2::aes(x=umap_1,
# y=umap_2))
}
Loading

0 comments on commit d5194b5

Please sign in to comment.