diff --git a/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R b/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R new file mode 100644 index 0000000..b97f017 --- /dev/null +++ b/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R @@ -0,0 +1,76 @@ +library(tidyverse) +data <- read_tsv("~/Downloads/splittingSummary_full_final.tsv") +data <- data %>% mutate(WBC = n_wbcs > 0) + +data <- data %>% mutate(impact_mutations = high_impact_mutations + medium_impact_mutations) + + +View(data) +filtered_data <- data %>% + filter(str_detect(`Sample Name`, "Br|Pr|LM2")) + + +fit <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + WBC, data = data, family = binomial(link = "logit")) +summary(fit) +#### Not signficant + + +fit2 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + WBC, data = filtered_data, family = binomial(link = "logit")) +summary(fit2) +#### Not signficant + + +fit3 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log")) +summary(fit3) + +fit4 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log")) +summary(fit4) +### Not significant + + +fit6 <- glm(high_impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + WBC, data = filtered_data, family = poisson(link = "log")) +summary(fit6) +### Not significant + + +fit7 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + impact_mutations + Oligoclonal + WBC, data = filtered_data, family = binomial(link = "logit")) +summary(fit7) + + +fit5 <- glm(impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + WBC, data = filtered_data, family = poisson(link = "log")) +summary(fit5) +### significant effect of WBC presence on impact mutations + +filtered_data %>% + ggplot(aes(x = WBC, y = impact_mutations, group = WBC)) + + geom_boxplot() + + theme( + axis.text.x = element_text(angle = 45, hjust = 1, size = 18), + axis.title.x = element_text(size = 20), + axis.title.y = element_text(size = 20), + axis.text.y = element_text(size = 18) + ) + + ylab("# Mutations with functional impact") + + + +fit8 <- glm(impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + n_wbcs, data = filtered_data, family = poisson(link = "log")) +summary(fit8) + + +filtered_data %>% + ggplot(aes(x = n_wbcs, y = impact_mutations, group = n_wbcs)) + + geom_boxplot() + + theme( + axis.text.x = element_text(angle = 45, hjust = 1, size = 18), + axis.title.x = element_text(size = 20), + axis.title.y = element_text(size = 20), + axis.text.y = element_text(size = 18) + ) + + ylab("# Mutations with functional impact") + + + +fit9 <- glm(n_cells ~ `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log")) +summary(fit9) +### Not significant diff --git a/experiments/assessing_cluster_clonality/sandbox/assessment_of_oligoclonals.R b/experiments/assessing_cluster_clonality/sandbox/assessment_of_oligoclonals.R new file mode 100644 index 0000000..6fbc7b1 --- /dev/null +++ b/experiments/assessing_cluster_clonality/sandbox/assessment_of_oligoclonals.R @@ -0,0 +1,142 @@ +input_folder <- "/Users/jgawron/Documents/projects/CTC_backup/input_folder" + +simulation_input_folder <- "/Users/jgawron/Documents/projects/CTC_backup/simulations/simulations2" +tree_name <- "Br16_AC" +n_sampling_events <- 100 + +source("functions.R") + + +tree_names <- c("Br11", "Br16_AC", "Br16_B", "Br16_C", "Br23", "Br26", "Br30", "Br37", "Br38", "Br39", "Br44", "Br45", "Br46", "Br53", "Br57", "Br61", "Br7", "Brx50", "LM2", "Lu2", "Lu7", "Ov8", "Pr6", "Pr9") + + +mean_splitting_scores_mono <- vector() +for (tree_name in tree_names) { + # input <- load_data(input_folder, tree_name) + + # description_data <- + # read_delim( + # file.path( + # input$directory, + ### input$sampleName, + # paste0(input$sampleName, "_samples_nodeDescription.tsv") + # ), + # delim = "\t", + ### col_names = FALSE, + # quote = "none" + # ) + # colnames(description_data) <- + # c("sample_name", "total_number_cells", "tumor_cells", "WBCs", "description") + + + folders <- list.dirs(path = simulation_input_folder, recursive = FALSE) + matching_folders <- folders[grepl(tree_name, basename(folders))] + + for (idx1 in 1:length(matching_folders)) { + simulation_instance <- basename(matching_folders[idx1]) + input_simulated <- load_data(simulation_input_folder, simulation_instance) + sample_description_simulated <- input_simulated$sample_description + for (color in c( + "orchid", "orchid1", "orchid2", + "orchid3", "orchid4", "darkorchid", + "darkorchid1", "darkorchid2", "darkorchid3", + "darkorchid4", "purple", "purple1", + "purple2", "purple3", "purple4" + )) { + distance_simulated <- + computeClusterSplits( + input_simulated$sample_description, input_simulated$postSampling, + simulation_instance, input_simulated$nCells, input_simulated$nMutations, + input_simulated$nClusters, input_simulated$alleleCount, + input_simulated$mutatedReadCounts, input_simulated$totalReadCounts, + nMutationSamplingEvents = n_sampling_events, + nTreeSamplingEvents = n_sampling_events, + cellPairSelection = c(color) + ) + plot( + ggplot( + data.frame(x = distance_simulated$aggregatedBranchingProbabilities), aes(x = x) + ) + + geom_histogram(binwidth = 0.01) + ) + + mean_splitting_scores_mono <- c(mean_splitting_scores_mono, mean(distance_simulated$aggregatedBranchingProbabilities)) + } + } +} + + + +involved_cell_indices <- sub(paste0(".*", tree_name, "_"), "", simulation_instance) %>% + strsplit("_") %>% + unlist() +involved_cell_indices <- as.numeric(involved_cell_indices) + +involved_single_tumor_cells <- description_data$sample_name[involved_cell_indices] + +pairs <- combn(involved_single_tumor_cells, 2, simplify = FALSE) + +distance_separate <- + computeClusterSplits(input$sample_description, input$postSampling, + treeName, input$nCells, input$nMutations, + input$nClusters, input$alleleCount, + input$mutatedReadCounts, input$totalReadCounts, + nMutationSamplingEvents = n_sampling_events, + nTreeSamplingEvents = n_sampling_events, + cellPairSelection = pairs + ) + +plot( + ggplot( + data.frame(x = distance_separate$aggregatedBranchingProbabilities), aes(x = x) + ) + + geom_histogram(binwidth = 0.01) +) +mean(distance_separate$aggregatedBranchingProbabilities) + + + + +######### + +mean_splitting_scores_mono <- mean_splitting_scores_mono[!is.na(mean_splitting_scores_mono)] + +data.frame(splitting_probs = mean_splitting_scores_mono, Monoclonal = TRUE) %>% + ggplot(aes(y = splitting_probs)) + + geom_boxplot() + + theme_minimal() + +load("~/Documents/projects/CTC_backup/simulations/simulation3/mean_branching_probs_oligo.RData") +load("~/Documents/projects/CTC_backup/simulations/simulation3/mean_branching_probs_mono.RData") +load("~/Documents/projects/CTC_backup/simulations/simulation3/deviance_splitting_score.RData") + +splitting_probs <- data.frame(splitting_probs = mean_splitting_scores_mono, Oligoclonal = "Monoclonal") +splitting_probs2 <- data.frame(splitting_probs = mean_branching_probs_oligo, Oligoclonal = "Oligoclonal") +splitting_probs_single_cells <- data.frame(splitting_probs = -deviance_splitting_score + mean_branching_probs_oligo, Oligoclonal = "Genetically distinct single cells") + +splitting_probs <- rbind(splitting_probs, splitting_probs2, splitting_probs_single_cells) +splitting_probs <- splitting_probs %>% mutate(Oligoclonal = factor(Oligoclonal, levels = c("Monoclonal", "Oligoclonal", "Genetically distinct single cells"))) + +splitting_probs %>% + ggplot(aes(y = splitting_probs, x = Oligoclonal, group = Oligoclonal)) + + geom_boxplot() + + ylab("Mean splitting probability") + + xlab("Clonality status of CTC cluster") + + theme_minimal() +theme( + axis.text.x = element_text(angle = 45, hjust = 1, size = 18), + axis.title.x = element_text(size = 20), + axis.title.y = element_text(size = 20), + axis.text.y = element_text(size = 18) +) + +data.frame(y = deviance_splitting_score) %>% + ggplot(aes(y = y)) + + geom_boxplot() + + theme_minimal() + + +data.frame(y = -deviance_splitting_score + mean_branching_probs_oligo) %>% + ggplot(aes(y = y)) + + geom_boxplot() + + theme_minimal()