Skip to content

Commit

Permalink
add functions from writing thesis
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Oct 2, 2024
1 parent eebb060 commit ddba7e2
Show file tree
Hide file tree
Showing 50 changed files with 570,931 additions and 141 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rworkflows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
run_pkgdown: ${{ true }}
has_runit: ${{ false }}
has_latex: ${{ false }}
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run_docker: ${{ false }}
docker_user: bschilder
docker_org: neurogenomicslab
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ Suggests:
arrow,
ggsankey,
Hmisc,
proxy
proxy,
rnndescent
Remotes:
github::neurogenomics/scNLP,
github::neurogenomics/scKirby,
Expand All @@ -94,6 +95,6 @@ Remotes:
github::RajLabMSSM/downloadR,
github::satijalab/seurat-wrappers,
github::davidsjoberg/ggsankey
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/testthat/edition: 3
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(add_cluster_colors)
export(add_mixingmetric)
export(adjust_zstat)
export(clip_outliers)
export(dt_to_granges)
export(find_neighbors)
export(generate_metadata)
Expand Down Expand Up @@ -34,6 +35,7 @@ export(plot_enrichment)
export(plot_factors_sankey)
export(plot_feature_density)
export(plot_highlights)
export(plot_ontological_similarity)
export(plot_ontological_velocity)
export(plot_preservation_histo)
export(plot_pseudotime)
Expand All @@ -46,17 +48,23 @@ export(prepare_opentargets)
export(prepare_primekg)
export(regress_gene_info)
export(run_autoencoder)
export(run_cor)
export(run_dag_enrich)
export(run_gprofiler)
export(run_imputation)
export(run_integration)
export(run_knn_overlap)
export(run_lda)
export(run_mofa2)
export(run_ontological_similarity)
export(run_pca)
export(run_preservation)
export(run_pseudotime_subtypes)
export(run_sparsesvd)
export(run_umap)
export(run_variancePartition)
export(scale_color_nightlight)
export(seurat_to_ontological_similarity)
export(theme_nightlight)
import(GenomicFiles)
import(HPOExplorer)
Expand Down Expand Up @@ -94,6 +102,7 @@ importFrom(data.table,rbindlist)
importFrom(data.table,setkey)
importFrom(data.table,uniqueN)
importFrom(downloadR,downloader)
importFrom(dplyr,"%>%")
importFrom(dplyr,arrange)
importFrom(dplyr,desc)
importFrom(dplyr,mutate)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
* Remove all ontology-related functions (now within `HPOExplorer`)
* Explore `bigstatsr`/`bigreadr` a bit.

## Bug fixes

* Fix GH token in rworkflows yml

# phenomix 0.99.5

## New features
Expand Down
56 changes: 56 additions & 0 deletions R/clip_outliers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Clip outliers
#'
#' Clip outliers from a ggplot object using Mahalanobis distance.
#' @param gg A ggplot or patchwork object.
#' @param max_dist Maximum Mahalanobis distance to consider as a non-outlier.
#' @inheritParams patchwork::plot_layout
#' @inheritDotParams patchwork::wrap_plots
#' @return A ggplot or patchwork object with outliers clipped.
#' @export
#' @examples
#' gg <- ggplot2::ggplot(mtcars,ggplot2::aes(x=mpg,y=disp))+ggplot2::geom_point()
#' clip_outliers(gg)
clip_outliers <- function(gg,
max_dist=NULL,
guides = "collect",
axes="collect",
axis_titles="collect",
...){

if(isFALSE(max_dist)) return(gg)

clip_outliers_i <- function(gg, max_dist){
is.outlier <- mahal.dist <- NULL;

df <- ggplot2::ggplot_build(gg)$data[[1]][,c("x","y")]
outlier_md <- rstatix::mahalanobis_distance(data.table::data.table(df))
if(!is.null(max_dist)){
outlier_md[,is.outlier:=mahal.dist>max_dist]
}
messager(formatC(sum(outlier_md$is.outlier),big.mark=","),
"outliers identified.")
messager("max_dist for non-outlier:",
max(outlier_md[is.outlier==FALSE,]$mahal.dist))
# ggplot(aes(x,y),data = outlier_md)+
# geom_point(aes(color=factor(is.outlier)),size=5)
gg +
ggplot2::lims(x=c(min(outlier_md[is.outlier==FALSE]$x),
max(outlier_md[is.outlier==FALSE]$x)
),
y=c(min(outlier_md[is.outlier==FALSE]$y),
max(outlier_md[is.outlier==FALSE]$y)
)
)
}
if(methods::is(gg,"patchwork")){
lapply(seq(length(gg)), function(i){
clip_outliers_i(gg[[i]],max_dist)
}) |>
patchwork::wrap_plots(...) +
patchwork::plot_layout(guides = guides,
axes = axes,
axis_titles =axis_titles)
} else{
clip_outliers_i(gg,max_dist)
}
}
10 changes: 8 additions & 2 deletions R/get_variance_explained.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@ get_variance_explained <- function(obj,
dims=NULL
){
dr <- obj[[reduction]]
mat <- Seurat::GetAssayData(obj, layer = layer)
total_variance <- sum(matrixStats::rowVars(mat))
total_variance <- if(!is.null(obj@misc$total_variance[[layer]])){
obj@misc$total_variance[[layer]]
} else {
messager("Computing total variance.")
sum(matrixStats::rowVars(
Seurat::GetAssayData(obj, layer = layer)
))
}
## EigenValues
if(length(dr@stdev)==0){
messager("Computing stdev for reduction.")
Expand Down
3 changes: 2 additions & 1 deletion R/replace_char_fun.R → R/map_id_sep.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ map_id_sep <- function(nms,
"_"=":")){
if(length(replace_char)>0){
for(r in names(replace_char)){
nms <- gsub(r,replace_char[[r]],nms, fixed = TRUE)
nms <- sub(r,replace_char[[r]],nms, fixed = TRUE)
}
}

return(nms)
}
5 changes: 3 additions & 2 deletions R/map_xref.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
map_xref <- function(dat,
prefix="MONDO",
new_col=paste0(tolower(prefix),"_id"),
xrefs_col="dbXRefs",
verbose=TRUE){
dbXRefs <- id <- NULL
id <- NULL
messager("Adding xref column:",new_col,v=verbose)
dat[grepl(paste0("^",prefix),id),(new_col):=id]
dat[get(new_col)=="NA",(new_col):=NA]
if(sum(is.na(unlist(dat[[new_col]])))>0) {
dat[is.na(get(new_col)),(new_col):=sapply(dbXRefs,function(x){
dat[is.na(get(new_col)),(new_col):=sapply(get(xrefs_col),function(x){
r <- grep(paste0("^",prefix),unlist(x),value = TRUE)
if(length(r)==0) NA else unlist(r)
})]
Expand Down
8 changes: 4 additions & 4 deletions R/plot_factors_sankey.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ plot_factors_sankey <- function(factor.traits,
p_adjust_all <- factor_num <- NULL;

X.traits <- data.table::dcast.data.table(factor.traits,
formula = factor ~ name,
value.var = "p_adjust_all",
formula = factor ~ trait_name,
value.var = "q",
fun.aggregate = mean,
fill=1)|>
KGExplorer::dt_to_matrix()
Expand Down Expand Up @@ -56,7 +56,7 @@ plot_factors_sankey <- function(factor.traits,
stats::dist()|>
stats::hclust()
}
factor.annot <- merge(factor.traits[p_adjust_all<q_threshold,],
factor.annot <- merge(factor.traits[q<q_threshold,],
factor.celltypes[q<q_threshold & mean_q<q_threshold,],
by=c("factor","factor_num"), all.x = TRUE,
allow.cartesian = TRUE)
Expand All @@ -69,7 +69,7 @@ plot_factors_sankey <- function(factor.traits,
is.finite(logFC)]
}
dt <- ggsankey::make_long(factor.annot,
cl_name, factor_num, name,
cl_name, factor_num, trait_name,
value = value_var)
dt$value <- 1-dt$value
lvls <- c(stringr::str_split(hc.factors$labels[hc.factors$order],
Expand Down
3 changes: 1 addition & 2 deletions R/plot_feature_density.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ plot_feature_density <- function(obj,
...)
### Add theme ####
plts <- lapply(plts,function(p){
p + theme_nightlight() +
scale_color_nightlight()
p + theme_nightlight()
})
if(!is.null(joint_title)){
plts[[length(plts)]] <- plts[[length(plts)]] +
Expand Down
79 changes: 79 additions & 0 deletions R/plot_ontological_similarity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#' Plot ontological similarity
#'
#' Plot ontological similarity of cells to a set of terms in an ontology.
#' @inheritDotParams Seurat::FeaturePlot
#' @export
plot_ontological_similarity <- function(obj,
id_col,
ont,
### Use pre-computed DOSA results
run_ontological_similarity_out=NULL,
### Args for running new DOSA
ancestors=unique(ont@elementMetadata$ancestor),
group_var="seurat_clusters",
top_n=9,
min.cutoff = "q90",
show_plot=TRUE,
...){
if(!is.null(run_ontological_similarity_out)){
messager("Using pre-computed DOSA results.")
out <- run_ontological_similarity_out
} else {
out <- run_ontological_similarity(
obj=obj,
id_col=id_col,
ont=ont,
ancestors=ancestors,
group_var=group_var)
}
#### Plot most differentially similar ancestors ####
terms_to_use <- if(length(out$top_markers)==0){
messager("No term identified from DOSA. Using ancestors instead.")
intersect(ancestors,colnames(out$sim))
} else {
unique(out$top_markers)
}
term_map <- KGExplorer::map_ontology_terms(ont = ont,
keep_order = FALSE,
terms = terms_to_use)|>
utils::head(top_n)
term_map <- term_map[!is.na(names(term_map))]
### Order ancestors by their clustered similarity
d <- stats::dist(out$sim[names(term_map),])
hc <- stats::hclust(d)
term_map <- term_map[hc$labels[hc$order]]

fo <- Seurat::FeaturePlot(out$obj_sim,
order = TRUE,
min.cutoff = min.cutoff,
features = names(term_map),
...)
fig_ontmarkers <- lapply(seq(length(term_map)), function(i){
fo_i <- fo[[i]] +
ggplot2::labs(title=NULL,
subtitle=paste(term_map[[i]],
names(term_map)[[i]],sep="\n"),
color="Ontological\nsimilarity"
)+
phenomix::theme_nightlight() +
phenomix::scale_color_nightlight()
if(i>1){
fo_i <- fo_i + ggplot2::theme(legend.position = "none")
}
fo_i
})|>patchwork::wrap_plots() +
patchwork::plot_layout(axes = "collect",
axis_titles = "collect",
guides = "collect"
)

if(isTRUE(show_plot)) methods::show(fig_ontmarkers)
#### Return ####
return(
list(plot=fig_ontmarkers,
run_ontological_similarity=out,
hclust=hc,
term_map=term_map
)
)
}
13 changes: 12 additions & 1 deletion R/plot_pseudotime.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#' Set to \code{NULL} to include all.
#' @param map_root_cells Attempt to map root cells onto equivalent IDs
#' across different ontologies.
#' @param return_cds Return the \code{CellDataSet} object.
#' @inheritParams prepare_hpo
#' @inheritParams monocle3::learn_graph
#' @inheritParams monocle3::order_cells
#' @inheritParams KGExplorer::cache_save
#' @inheritDotParams monocle3::plot_cells
#' @export
#' @examples
Expand All @@ -31,6 +33,7 @@ plot_pseudotime <- function(obj,
prune_graph=FALSE,
nn.cores=KGExplorer::set_cores()$workers),
use_partition=TRUE,
return_cds=FALSE,
merge_trajectories = TRUE,
symptom_color="red",
bg_colors=c("#5000ff","white"),
Expand All @@ -51,6 +54,9 @@ plot_pseudotime <- function(obj,
20*2.5
),
show_plot=TRUE,
save_path=NULL,
height=8,
width=9,
...
){
# obj=readRDS("../thesis/inst/pages/chapter2/data/hpo_ot_integratedV5.rds"); id_col="hp_id";
Expand Down Expand Up @@ -80,6 +86,7 @@ plot_pseudotime <- function(obj,
map_root_cells=map_root_cells,
learn_graph_control = learn_graph_control,
use_partition=use_partition,
return_cds=return_cds,
color_by_symptoms=color_by_symptoms,
color_cells_by=color_cells_by,
point_alpha=point_alpha,
Expand Down Expand Up @@ -120,6 +127,7 @@ plot_pseudotime <- function(obj,
map_root_cells=map_root_cells,
learn_graph_control = learn_graph_control,
use_partition=use_partition,
return_cds=return_cds,
title = stringr::str_wrap(title,
width = title_width),
subtitle=subtitle,
Expand All @@ -139,7 +147,10 @@ plot_pseudotime <- function(obj,
axes = "collect",
axis_titles = "collect")
if(isTRUE(show_plot)) methods::show(out[["plot"]] )
}
}
KGExplorer::plot_save(plt = out[["plot"]],
save_path = save_path,
height = height, width = width)
return(out)

# pseudo_dt <- t(cds@principal_graph_aux$UMAP$pr_graph_cell_proj_dist)|>`colnames<-`(c("umap1","umap2"))
Expand Down
Loading

0 comments on commit ddba7e2

Please sign in to comment.