diff --git a/.Rhistory b/.Rhistory index e0ec83a..f97a208 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,363 +1,3 @@ -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -# plots[["all"]] <- -res2 |> -ggstatsplot::ggscatterstats(x="p_HumanCellLandscape", -y="p_DescartesHuman", -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(10)[-1],.5) -)) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=X, -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=x, -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=x, -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -res |> -ggstatsplot::ggscatterstats(x=get(), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=get(), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=get(), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=get(x), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=y, -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="red", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -plots[["all"]] -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="magenta", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(8)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -plots[["significant"]] <- make_density_plot(res_sig) -plots[["significant"]] -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman"){ -res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="magenta", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(15)[-1],.5) -)) -} -plots[["all"]] <- make_density_plot(res2) -plots[["significant"]] <- make_density_plot(res_sig) -plots[["all"]] -plots[["significant"]] -plots[["significant"]] + -ggplot2::scale_x_log10() + -ggplot2::scale_y_log10() -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman", -log_vars=FALSE){ -p<- res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="magenta", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(15)[-1],.5) -)) -if(log_vars){ -p <- p+ -ggplot2::scale_x_log10() + -ggplot2::scale_y_log10() -} -return(p) -} -add_logfc(results) -res2 <- results|> -# subset(stage=="Fetus") |> -# subset(q<0.05)|> -data.table::dcast.data.table( -formula = hpo_id+cl_name ~ ctd, -fun.aggregate = mean, -drop = TRUE, -value.var = c("p","q","logFC","estimate")) -value.var <- intersect(c("p","q","logFC","estimate"), -names(results)) -res2 <- results|> -# subset(stage=="Fetus") |> -# subset(q<0.05)|> -data.table::dcast.data.table( -formula = hpo_id+cl_name ~ ctd, -fun.aggregate = mean, -drop = TRUE, -value.var = value.var) -res2 <- res2[complete.cases(res2)][,test_id:=.I] -### All results -message(length(unique(res2$cl_name))," comparable celltypes.") -message(length(unique(res2$hpo_id))," comparable phenotypes.") -### Significant results in both CTDs -res_sig <- res2[q_HumanCellLandscape<.05 & q_DescartesHuman<.05] -message(length(unique(res_sig$cl_name))," comparable celltypes (FDR<0.05).") -message(length(unique(res_sig$hpo_id))," comparable phenotypes (FDR<0.05).") -plots <- list() -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman", -log_vars=FALSE){ -p<- res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="magenta", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(15)[-1],.5) -)) -if(log_vars){ -p <- p+ -ggplot2::scale_x_log10() + -ggplot2::scale_y_log10() -} -return(p) -} -plots[["all"]] <- make_density_plot(res2) -plots[["significant"]] <- make_density_plot(res_sig, -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman", -log_vars=TRUE) -plots[["significant"]] <- make_density_plot(res_sig, -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman", -log_vars=F) -plots[["significant"]] -plots[["logFC.all"]] <- make_density_plot(res2, -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman") -plots[["logFC.all"]] -plots[["logFC.all"]] <- make_density_plot(res2[!is.na(logFC)], -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman") -res2 <- res2[complete.cases(res2)][,test_id:=.I] -make_density_plot <- function(res, -x="p_HumanCellLandscape", -y="p_DescartesHuman", -log_vars=FALSE){ -res <- res[!is.na(get(x)) & !is.na(get(y))] -p<- res |> -ggstatsplot::ggscatterstats(x=!!ggplot2::sym(x), -y=!!ggplot2::sym(y), -xsidehistogram.args = list(fill="magenta", color="white"), -ysidehistogram.args = list(fill="blue", color="white"), -point.args = list(alpha=.1)) + -ggplot2::geom_density_2d_filled(alpha=.7) + -ggplot2::scale_fill_manual(values = c(ggplot2::alpha("white",0), -ggplot2::alpha(pals::gnuplot(15)[-1],.5) -)) -if(log_vars){ -p <- p+ -ggplot2::scale_x_log10() + -ggplot2::scale_y_log10() -} -return(p) -} -plots[["logFC.all"]] <- make_density_plot(res2, -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman") -plots[["logFC.all"]] -plots[["logFC.significant"]] <- make_density_plot(res_sig, -x="logFC_HumanCellLandscape", -y="logFC_DescartesHuman") -plots[["logFC.significant"]] -unique(results$ctd)[2] -ctd_select=unique(results$ctd)[2] -results=load_example_results()[ctd==ctd_select] -results -results <- map_celltype(results) -add_logfc(results) -value.var <- intersect(c("p","q","logFC","estimate"), -names(results)) -comparison_var="stage" -unique(results$stage) -filters=list("stage"=c("Fetus","Adult")) -results <- KGExplorer::filter_dt(results, filters = filters) -comparison_var -res2 <- results |> -data.table::dcast.data.table( -formula = as.formula(paste0("hpo_id+cl_name~",comparison_var)), -fun.aggregate = mean, -drop = TRUE, -value.var = value.var) -res2 <- res2[complete.cases(res2)][,test_id:=.I] -### All results -message(length(unique(res2$cl_name))," comparable celltypes.") -message(length(unique(res2$hpo_id))," comparable phenotypes.") -### Significant results in both CTDs -res_sig <- RES_HCL[q_Adult<.05 & q_Fetus<.05] -message(length(unique(res_sig$cell_ontology_mapped))," comparable celltypes (FDR<0.05).") -message(length(unique(res_sig$cl_name))," comparable celltypes (FDR<0.05).") -message(length(unique(res_sig$hpo_id))," comparable phenotypes (FDR<0.05).") -plots <- list() -filters[[comparison_var]] -filters[[comparison_var]][1] -x_var=filters[[comparison_var]][1] -y_var=filters[[comparison_var]][2] -data_stats <- lapply(plots, get_ggstatsplot_stats) -source("~/Desktop/Rare Disease Celltyping/MSTExplorer/R/get_ggstatsplot_stats.R") -data_stats <- lapply(plots, get_ggstatsplot_stats) -data_stats -plots -source("~/Desktop/Rare Disease Celltyping/MSTExplorer/R/plot_density_cor.R") -### Significant results in both CTDs -res_sig <- res2[q_Adult<.05 & q_Fetus<.05] -celltype_var="cl_name" -comparison_var="stage" -celltype_var="cl_name" -group_values <- unique(results[[comparison_var]]) -group_values -### Significant results in both CTDs res_sig <- res2[get(paste0("q_",group_values[2])) +unique()) +top_targets <- prioritise_targets_out$top_targets +#### Filter by status #### +if(!is.null(keep_status)){ +dat_sub <- dat_sub[HIGHEST_STATUS %in% keep_status,] +} +if(!is.null(remove_status)){ +dat_sub <- dat_sub[!HIGHEST_STATUS %in% remove_status,] +} +dat_sub[,failed:=HIGHEST_STATUS %in% failed_status] +#### Filter to only those in top_targets #### +dat_sub2 <- (merge( +dat_sub[failed==FALSE], +top_targets, +allow.cartesian = allow.cartesian, +by.x = "GENENAME3", +by.y = "gene_symbol")[,c("GENENAME2","TARGETID","TARGNAME", +"INDICATI","DRUGID","DRUGNAME", +"HIGHEST_STATUS", +"disease_name","disease_id","hpo_name", +"CellType","ontLvl")] |> +unique()) +#### Remove results that can't be linked to specific genes ##### +dat_sub <- ttdi$merged[!is.na(TARGETID) & +!is.na(GENENAME2) & +GENENAME2!="",] +#### Filter by drug type #### +if(!is.null(drug_types)){ +dat_sub <- dat_sub[ +grepl(paste(drug_types,collapse = "|"),DRUGNAME,ignore.case = TRUE) | +grepl(paste(drug_types,collapse = "|"),DRUGTYPE,ignore.case = TRUE),] +} +#### Filter by status #### +if(!is.null(keep_status)){ +dat_sub <- dat_sub[HIGHEST_STATUS %in% keep_status,] +} +if(!is.null(remove_status)){ +dat_sub <- dat_sub[!HIGHEST_STATUS %in% remove_status,] +} +dat_sub[,failed:=HIGHEST_STATUS %in% failed_status] +#### Filter to only those in top_targets #### +dat_sub2 <- (merge( +dat_sub[failed==FALSE], +top_targets, +allow.cartesian = allow.cartesian, +by.x = "GENENAME3", +by.y = "gene_symbol")[,c("GENENAME2","TARGETID","TARGNAME", +"INDICATI","DRUGID","DRUGNAME", +"HIGHEST_STATUS", +"disease_name","disease_id","hpo_name", +"CellType","ontLvl")] |> +unique()) +allow.cartesian +allow.cartesian=T +#### Filter to only those in top_targets #### +dat_sub2 <- (merge( +dat_sub[failed==FALSE], +top_targets, +allow.cartesian = allow.cartesian, +by.x = "GENENAME3", +by.y = "gene_symbol")[,c("GENENAME2","TARGETID","TARGNAME", +"INDICATI","DRUGID","DRUGNAME", +"HIGHEST_STATUS", +"disease_name","disease_id","hpo_name", +"CellType","ontLvl")] |> +unique()) +#### Count proportion of drugs that our analyses captured #### +pct_captured <- length(unique(dat_sub2$DRUGID)) / +length(unique(dat_sub$DRUGID))*100 +# length(unique(paste0(dat_sub2$DRUGID,dat_sub2$INDICATI, +# dat_sub2$GENENAME2))) +dat_sub[,prioritised:=(DRUGID %in% dat_sub2$DRUGID)] +#### Plot #### +plt <- plot_ttd(dat_sub = dat_sub, +failed_status = failed_status) +#### Show #### +if(isTRUE(show_plot)) methods::show(plt) +#### Hypergeometric test #### +stats::pyhypergeo::phypergeo( +#### Hypergeometric test #### +stats::phypergeo( +k = length(unique(dat_sub2$DRUGID)), +K = length(unique(dat_sub$DRUGID)), +n = length(unique(dat_sub$DRUGID)), +N = nrow(dat_sub) +) +#### Hypergeometric test #### +stats::phyper( +k = length(unique(dat_sub2$DRUGID)), +K = length(unique(dat_sub$DRUGID)), +n = length(unique(dat_sub$DRUGID)), +N = nrow(dat_sub) +) +?stats::phyper +dat_sub +#### Hypergeometric test #### +stats::phyper( +q = length(unique(dat_sub2$DRUGID)), +m = length(unique(dat_sub$DRUGID)), +n = length(unique(dat_sub$DRUGID)), +k = nrow(dat_sub) +) +#### Plot #### +plt <- plot_ttd(dat_sub = dat_sub, +failed_status = failed_status) +is(plt) +plt +#### Hypergeometric test #### +fail <- dat_sub[HIGHEST_STATUS %in% failed_status,drop=FALSE] +notfail <- dat_sub[!HIGHEST_STATUS %in% failed_status,drop=FALSE] +nrow(notfail) +phyper(nrow(notfail)-1, nrow(notfail), (nrow(notfail)+nrow(top_targets))-nrow(notfail), nrow(top_targets),lower.tail= FALSE) +notfail +overlap <- nrow(notfail[prioritised==TRUE]) +group2 <- nrow(notfail) +total <- nrow(notfail)+nrow(top_targets) +group1 <- nrow(top_targets) +## Test for over-representation (enrichment) +# phyper(Overlap-1, group2, Total-group2, group1,lower.tail= FALSE) +notfail <- dat_sub[!HIGHEST_STATUS %in% failed_status,drop=FALSE] +overlap <- nrow(notfail[prioritised==TRUE]) +group2 <- nrow(notfail) +total <- nrow(notfail)+nrow(top_targets) +group1 <- nrow(top_targets) +phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +overlap +overlap <- nrow(notfail[prioritised==TRUE]) +group2 <- nrow(notfail) +total <- nrow(notfail)+nrow(notfail) +group1 <- nrow(top_targets) +total <- nrow(dat_sub)+nrow(top_targets) +group1 <- nrow(top_targets) +phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +## Test for under-representation (depletion) +phyper(overlap, group2, total-group2, group1, lower.tail= TRUE) +overlap <- nrow(notfail[prioritised==TRUE]) +group2 <- nrow(notfail) +total <- nrow(notfail)+nrow(top_targets) +group1 <- nrow(top_targets) +stats::phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +## Test for under-representation (depletion) +overlap <- nrow(fail[prioritised==TRUE]) +group2 <- nrow(fail) +total <- nrow(fail)+nrow(top_targets) +group1 <- nrow(top_targets) +stats::phyper(overlap, +group2, +total-group2, +group1, +lower.tail= TRUE) +total <- nrow(fail)#+nrow(top_targets) +group1 <- nrow(top_targets) +stats::phyper(overlap, +group2, +total-group2, +group1, +lower.tail= TRUE) +group2 <- nrow(fail) +total <- nrow(fail)+nrow(top_targets) +group1 <- nrow(top_targets) +stats::phyper(overlap, +group2, +total-group2, +group1, +lower.tail= TRUE) +p2g <- HPOExplorer::load_phenotype_to_genes() +unique(p2g$gene_symbol) +length(unique(p2g$gene_symbol)) +## Test for over-representation (enrichment) +# phyper(Overlap-1, group2, Total-group2, group1,lower.tail= FALSE) +overlap <- nrow(notfail[prioritised==TRUE]) +group2 <- nrow(notfail) +total <- length(unique(p2g$gene_symbol)) +group1 <- nrow(top_targets) +nonfailed_enrichment <- stats::phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +nonfailed_enrichment +total +overlap +group2 +total +str(notfail) +## Test for over-representation (enrichment) +# phyper(Overlap-1, group2, Total-group2, group1,lower.tail= FALSE) +overlap <- data.table::uniqueN(notfail[prioritised==TRUE]$GENENAME3) +group2 <- data.table::uniqueN(notfail$GENENAME3) +total <- length(unique(p2g$gene_symbol)) +overlap +group2 +total <- data.table::uniqueN(p2g$gene_symbol) +group1 <- data.table::uniqueN(top_targets$gene_symbol) +nonfailed_enrichment <- stats::phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +nonfailed_enrichment +group1 +total <- data.table::uniqueN(c(p2g$gene_symbol,notfail$GENENAME3)) +group1 <- data.table::uniqueN(top_targets$gene_symbol) +nonfailed_enrichment <- stats::phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +nonfailed_enrichment +c(p2g$gene_symbol,notfail$GENENAME3) +total <- data.table::uniqueN(c(p2g$gene_symbol,notfail$GENENAME3)) +group1 <- data.table::uniqueN(top_targets$gene_symbol) +nonfailed_enrichment <- stats::phyper(overlap-1, +group2, +total-group2, +group1, +lower.tail= FALSE) +nonfailed_enrichment +#### Hypergeometric test #### +dat_sub[,failed:=HIGHEST_STATUS %in% failed_status] +fail <- dat_sub[failed==TRUE,drop=FALSE] +notfail <- dat_sub[failed==TRUE,drop=FALSE] +notfail +## Test for under-representation (depletion) +overlap <- data.table::uniqueN(fail[prioritised==TRUE]$GENENAME3) +group2 <- data.table::uniqueN(fail$GENENAME3) +total <- data.table::uniqueN(c(p2g$gene_symbol,fail$GENENAME3)) +group1 <- data.table::uniqueN(top_targets$gene_symbol) +failed_depletion <- stats::phyper(overlap, +group2, +total-group2, +group1, +lower.tail= TRUE) +failed_depletion +nonfailed_enrichment +source("~/Desktop/Rare Disease Celltyping/MSTExplorer/R/ttd_hypergeo.R") +devtools::check_man() +library(MSTExplorer) +library(MSTExplorer) +devoptera::args2vars(reassign = T) +hpo_ids <- HPOExplorer::map_phenotypes(terms = phenotypes, +to = "id") +res <- data.table::copy(results) +phenotypes <- c("Generalized neonatal hypotonia", +"Scrotal hypospadias", +"Increased circulating progesterone") +# diseases_include <- "OMIM:176270" +genes_include <- c("MAGEL2","HERC2") +genes_exclude <- c("SNORD115-1") +res +#### Add diseases #### +res <- HPOExplorer::add_disease(phenos = res, +allow.cartesian = TRUE) +res +old_cols <- names(res) +res <- add_driver_genes(res, +phenotype_to_genes=phenotype_to_genes, +metric = "specificity") +#### Filter diseases #### +if(!is.null(diseases_include)){ +res <- res[disease_id %in% diseases_include,] +} +if(!is.null(diseases_exclude)){ +res <- res[!disease_id %in% diseases_exclude,] +} +#### Add cell types #### +res <- MSTExplorer::map_celltype(results = res) +old_cols <- names(res) +res <- add_driver_genes(res, +phenotype_to_genes=phenotype_to_genes, +metric = "specificity") +ont=KGExplorer::get_ontology("uberon",method = "github") +KGExplorer::filter_ontology(ont=ont, keep_descendants = "UBERON:0002038") +ont=ontologyIndex::get_OBO("https://github.com/obophenotype/uberon/releases/download/v2024-03-22/uberon-base.obo") +ont +ont$children[["UBERON:0002038"]] +ont$children +ont$children["UBERON:0002038"] +ont=ontologyIndex::get_OBO("https://github.com/obophenotype/uberon/releases/download/v2024-03-22/uberon-simple.obo") +ont$children["UBERON:0002038"] +ont=ontologyIndex::get_OBO("https://github.com/obophenotype/uberon/releases/download/v2024-03-22/uberon.obo") +ont=ontologyIndex::get_OBO("https://github.com/obophenotype/uberon/releases/download/v2024-03-22/uberon.obo", propagate_relationships = T) +ont +ont$children["UBERON:0002038"] +ont=ontologyIndex::get_OBO("https://github.com/obophenotype/uberon/releases/download/v2024-03-22/uberon.obo", merge_equivalent_terms = T) +ont=simona::import_ontology("http://purl.obolibrary.org/obo/uberon/releases/2024-03-22/uberon.owl") +ont=simona::import_ontology("http://purl.obolibrary.org/obo/uberon/releases/2024-03-22/uberon.owl") +ont=simona::import_ontology("http://purl.obolibrary.org/obo/uberon/releases/2024-03-22/uberon.owl") +ont@lt_children +which(ont@terms=="UBERON:0002038") +ont@lt_children[which(ont@terms=="UBERON:0002038")] diff --git a/R/0docs.R b/R/0docs.R index 2e60d80..0665294 100644 --- a/R/0docs.R +++ b/R/0docs.R @@ -19,6 +19,8 @@ #' @param width Width of the saved plot. #' @param heights Passed to \link[patchwork]{wrap_plots}. #' @param subtitle_size Size of the plot subtitle. +#' @param phenotype_to_genes Phenotype to gene mapping from +#' \link[HPOExplorer]{load_phenotype_to_genes}. #' @family plot_ #' @returns R object. #' @name plot_ diff --git a/R/plot_ontology_levels.R b/R/plot_ontology_levels.R index f406f56..6e03fd7 100644 --- a/R/plot_ontology_levels.R +++ b/R/plot_ontology_levels.R @@ -13,6 +13,7 @@ #' @param log_vars Logical vector indicating which variables to log-transform. #' @param sig_vars Logical vector indicating which variables to only plot #' for significant results. +#' @param return_data Return the full long data used in the plots. #' @inheritParams plot_ #' @inheritParams ggpubr::stat_cor #' @inheritParams prioritise_targets @@ -57,7 +58,8 @@ plot_ontology_levels <- function(results = load_example_results(), height=7, width=length(x_vars)*5.75, smooth.line.args=list(method = "loess", - se = FALSE) + se = FALSE), + return_data=TRUE ){ requireNamespace("ggplot2") @@ -252,6 +254,7 @@ plot_ontology_levels <- function(results = load_example_results(), height = height, width = width) } + if(isFALSE(return_data)) r2 <- NULL return(list(data=r2, data_stats=data_stats, plot=plts2)) diff --git a/R/plot_report.R b/R/plot_report.R index 086d5ea..a2b8494 100644 --- a/R/plot_report.R +++ b/R/plot_report.R @@ -5,8 +5,6 @@ #' @param rep_dt Report table. #' @param annot HPO annotations. #' @param remove_cols Columns to remove from \code{rep_dt}. -#' @param phenotype_to_genes Phenotype to gene mapping from -#' \link[HPOExplorer]{load_phenotype_to_genes}. #' @inheritParams plot_ #' @inheritParams ggnetwork_plot_full #' @inheritDotParams ggplot2::ggsave diff --git a/R/prioritise_targets.R b/R/prioritise_targets.R index 45a8d05..990bb90 100644 --- a/R/prioritise_targets.R +++ b/R/prioritise_targets.R @@ -209,6 +209,10 @@ prioritise_targets <- function(#### Input data #### Severity_score <- cl_name <- cl_id <- Severity_score_max <- info_content <- NULL; + force(results) + force(ctd_list) + force(hpo) + force(phenotype_to_genes) t1 <- Sys.time() messager("Prioritising gene targets.",v=verbose) #### Add logFC #### diff --git a/R/ttd_check.R b/R/ttd_check.R index ef5333c..3135d09 100644 --- a/R/ttd_check.R +++ b/R/ttd_check.R @@ -34,18 +34,9 @@ ttd_check <- function(top_targets, show_plot = TRUE, save_path = NULL, height=NULL, - width=NULL){ - # top_targets <- prioritise_targets( - # keep_deaths = NULL, - # keep_tiers = NULL, - # severity_threshold_max = NULL, - # severity_threshold = NULL, - # pheno_frequency_threshold = NULL, - # keep_onsets = NULL, - # keep_ont_levels = seq(4), - # keep_celltypes = NULL, - # top_n = 2, - # group_vars = c("hpo_id","disease_id"))$top_targets + width=NULL, + phenotype_to_genes=HPOExplorer::load_phenotype_to_genes()){ + # top_targets <- prioritise_targets()$top_targets # drug_types <- c("Gene therapy" # "Antisense drug", # "Antisense oligonucleotide", @@ -109,6 +100,14 @@ ttd_check <- function(top_targets, # length(unique(paste0(dat_sub2$DRUGID,dat_sub2$INDICATI, # dat_sub2$GENENAME2))) dat_sub[,prioritised:=(DRUGID %in% dat_sub2$DRUGID)] + #### Hypergeometric test #### + dat_sub[,failed:=HIGHEST_STATUS %in% failed_status] + fail <- dat_sub[failed==TRUE,drop=FALSE] + notfail <- dat_sub[failed==FALSE,drop=FALSE] + ttd_hypergeo_out <- ttd_hypergeo(fail=fail, + notfail=notfail, + top_targets=top_targets, + p2g=phenotype_to_genes) #### Plot #### plt <- plot_ttd(dat_sub = dat_sub, failed_status = failed_status) @@ -124,6 +123,7 @@ ttd_check <- function(top_targets, list(data=dat_sub, data_overlap=dat_sub2, pct_captured, - plot=plt) + plot=plt, + ttd_hypergeo_out=ttd_hypergeo_out) ) } diff --git a/R/ttd_hypergeo.R b/R/ttd_hypergeo.R new file mode 100644 index 0000000..63b39b0 --- /dev/null +++ b/R/ttd_hypergeo.R @@ -0,0 +1,37 @@ +ttd_hypergeo <- function(fail, + notfail, + top_targets, + p2g=HPOExplorer::load_phenotype_to_genes()){ + # https://seqqc.wordpress.com/2019/07/25/how-to-use-phyper-in-r/ + + ## Test for over-representation (enrichment) + # phyper(Overlap-1, group2, Total-group2, group1,lower.tail= FALSE) + overlap <- data.table::uniqueN(notfail[prioritised==TRUE]$GENENAME3) + group2 <- data.table::uniqueN(notfail$GENENAME3) + total <- data.table::uniqueN(c(p2g$gene_symbol,notfail$GENENAME3)) + group1 <- data.table::uniqueN(top_targets$gene_symbol) + nonfailed_enrichment <- stats::phyper(overlap-1, + group2, + total-group2, + group1, + lower.tail= FALSE) + messager("Non-failed gene targets enrichment p-value:",nonfailed_enrichment) + ## Test for under-representation (depletion) + # phyper(Overlap, group2, Total-group2, group1, lower.tail= TRUE) + overlap <- data.table::uniqueN(fail[prioritised==TRUE]$GENENAME3) + group2 <- data.table::uniqueN(fail$GENENAME3) + total <- data.table::uniqueN(c(p2g$gene_symbol,fail$GENENAME3)) + group1 <- data.table::uniqueN(top_targets$gene_symbol) + failed_depletion <- stats::phyper(overlap, + group2, + total-group2, + group1, + lower.tail= TRUE) + messager("Failed gene targets depletion p-value:",failed_depletion) + return( + list( + nonfailed_enrichment=nonfailed_enrichment, + failed_depletion=failed_depletion + ) + ) +} diff --git a/man/plot_.Rd b/man/plot_.Rd index d7877e2..1188dfa 100644 --- a/man/plot_.Rd +++ b/man/plot_.Rd @@ -36,6 +36,9 @@ Set to \code{NULL} to not save the plot.} \item{heights}{Passed to \link[patchwork]{wrap_plots}.} \item{subtitle_size}{Size of the plot subtitle.} + +\item{phenotype_to_genes}{Phenotype to gene mapping from +\link[HPOExplorer]{load_phenotype_to_genes}.} } \value{ R object. diff --git a/man/predict_celltypes.Rd b/man/predict_celltypes.Rd index 061ad46..6e90a03 100644 --- a/man/predict_celltypes.Rd +++ b/man/predict_celltypes.Rd @@ -51,9 +51,8 @@ genes in the include, default, and exclude lists.} \link[MSTExplorer]{gen_results} and merged together with \link[MSTExplorer]{merge_results}} -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} +\item{phenotype_to_genes}{Phenotype to gene mapping from +\link[HPOExplorer]{load_phenotype_to_genes}.} \item{agg_var}{The variable(s) to aggregate \code{results} by.} diff --git a/man/ttd_check.Rd b/man/ttd_check.Rd index 22c8cca..f372fd3 100644 --- a/man/ttd_check.Rd +++ b/man/ttd_check.Rd @@ -14,7 +14,8 @@ ttd_check( show_plot = TRUE, save_path = NULL, height = NULL, - width = NULL + width = NULL, + phenotype_to_genes = HPOExplorer::load_phenotype_to_genes() ) } \arguments{ @@ -40,6 +41,9 @@ Set to \code{NULL} to not save the plot.} \item{height}{Height of the saved plot.} \item{width}{Width of the saved plot.} + +\item{phenotype_to_genes}{Phenotype to gene mapping from +\link[HPOExplorer]{load_phenotype_to_genes}.} } \description{ Identify the overlap between your prioritised list of gene therapy targets