SVM plots

Liza Brusman 2024-10-29

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggridges)
library(forcats)
library(ggpubr)
library(glmmTMB)
library(DHARMa)
library(gridExtra)
library(emmeans)
library(Hmisc)
library(corrplot)
source("SVM_plot_fxns.R")

metadata

metadata <- read.csv("../../docs/seq_beh_metadata.csv")
metadata$Group <- paste(metadata$sex, metadata$SS_OS, sep = "_")
metadata <- metadata %>% filter(!animal %in% c("4918", "4967"))

module data

all_cells <- read.csv("../hotspot/docs/ani_mod_scores_allcells_lognorm_counts.csv")
modules <- colnames(all_cells)
modules <- modules[modules != "animal"]

data <- merge(all_cells, metadata, on = "animal")

for SVMs that include self:

with_self <- read.csv("output/ani_pairwise_classifications_withself.csv")

for SVMs that do not include self

exclude_self <- read.csv("output/all_svm_combined_excludeself_200cells.csv") %>% filter(animal == Var1)

put clusters in right order and assign colors

f.levels <- c('Drd1Pdyn', 'Drd1PdynOprm1', 'Drd1Penk', 'Drd2Penk', 'Drd2NoPenk', 
                                      'GABAergicNeurons', 'Dlx2ImmatureNeurons', 'SstNpyInterneurons', 
                                      'PvalbInterneurons', 'CholinergicInterneurons', 'MatureOligos', 
                                      'ImmatureOligos', 'Astrocytes', 'Microglia')

clust_cols <- c("Drd1Pdyn" = "#304880", "Drd1PdynOprm1" = "#4898D0", "Drd1Penk" = "#88C0F0", "Drd2Penk" = "#9060A0", "Drd2NoPenk" = "#C078C0", "GABAergicNeurons" = "#389078", "Dlx2ImmatureNeurons" = "#70A830", "SstNpyInterneurons" = "#98D048", "PvalbInterneurons" = "#60D0A0", "CholinergicInterneurons" = "#80E8C0", "MatureOligos" = "#783028", "ImmatureOligos" = "#B82820", "Astrocytes" = "#D04058", "Microglia" = "#F87878")

process df into format for ridgeline plots

with_self2 <- process_df_withself(with_self)

## `summarise()` has grouped output by 'Cluster'. You can override using the
## `.groups` argument.

plot ridgeline plot

plot_ridgeline(with_self2, save = FALSE)

## Picking joint bandwidth of 0.0453

STAT TESTS - GLM for with self - self vs. partner vs. other

with_self_stats <- GLM_SVM(with_self2)

# setwd("output/")
# write.csv(with_self_stats, "glm_svm_with_self_groups_ordbeta.csv")

widen dataframe for other analyses later

with_self_wide <- widen_df_withself(with_self2)

process df for excluding self

exclude_self2 <- process_df_excludeself(exclude_self)

## `summarise()` has grouped output by 'Cluster'. You can override using the
## `.groups` argument.

plot exclude self ridgeline

plot_ridgeline(exclude_self2, save = FALSE)

## Picking joint bandwidth of 0.0526

GLM stats: exclude self

exclude_self_stats <- GLM_SVM(exclude_self2)

# setwd("output/")
# write.csv(exclude_self_stats, "svm_exclude_self_glm_groups.csv")

exclude self widened df

exclude_self_wide <- widen_df_excludeself(exclude_self2)

plot ridgelines by pairing type - with self

pair_types <- unique(data$pair_type)
for (type in pair_types) {
  print(type)
  plot_ridgeline(with_self2, grouping = "pair_type", save = FALSE)

}

## [1] "FM"

## Picking joint bandwidth of 0.0477

## [1] "MM"

## Picking joint bandwidth of 0.0549

## [1] "FF"

## Picking joint bandwidth of 0.0623

ridgeline by pair type without self

pair_types <- unique(data$pair_type)
for (type in pair_types) {
  print(type)
  plot_ridgeline(exclude_self2, grouping = "pair_type", save = FALSE)
  
}

## [1] "FM"

## Picking joint bandwidth of 0.0527

## [1] "MM"

## Picking joint bandwidth of 0.0699

## [1] "FF"

## Picking joint bandwidth of 0.0608

ridgeline by cohort with self

cohs <- unique(data$beh_cohort)
for (type in cohs) {
  print(type)
  plot_ridgeline(with_self2, grouping = "beh_cohort", save = FALSE)
  
}

## [1] 3

## Picking joint bandwidth of 0.0539

## [1] 2

## Picking joint bandwidth of 0.0565

## [1] 4

## Picking joint bandwidth of 0.0504

ridgelines by cohort exclude self

cohs <- unique(data$beh_cohort)
for (type in cohs) {
  print(type)
  plot_ridgeline(exclude_self2, grouping = "beh_cohort", save = FALSE)
  
}

## [1] 3

## Picking joint bandwidth of 0.0698

## [1] 2

## Picking joint bandwidth of 0.0637

## [1] 4

## Picking joint bandwidth of 0.0522

directly compare pairing types. each point in these plots is the percentage of classification for one animal to one other animal

exclude_self2$group_type <- paste(exclude_self2$pair_type, exclude_self2$group, sep = "_")

plot_dotcloud(exclude_self2)

for stats - looking at partner/other*pair_type

exclude_self_pairtype_stats <- GLM_SVM_pairtype(exclude_self2)

# setwd("output/")
# write.csv(exclude_self_pairtype_stats, "svm_excludeself_glm_posthocs_by_pairtype.csv")

correlation between partners in partner % classification

plot_SVM_corrs(exclude_self_wide, save = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

get out some summary stats for exclude self data

summ_stats <- exclude_self_wide %>% group_by(Cluster) %>% summarise(mean_partner_pct = mean(partner.pct), avg_cells_tested = mean(partner+other))

ggplot(summ_stats, aes(x = avg_cells_tested, y = mean_partner_pct, color = Cluster)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_manual(values = clust_cols)

## `geom_smooth()` using formula = 'y ~ x'

#here i correlated the mean partner % vs. the number of cells tested
rcorr(summ_stats$mean_partner_pct, summ_stats$avg_cells_tested)

##      x    y
## x 1.00 0.49
## y 0.49 1.00
## 
## n= 14 
## 
## 
## P
##   x      y     
## x        0.0727
## y 0.0727

get some summary stats for with self data

summ_stats <- with_self_wide %>% group_by(Cluster) %>% summarise(mean_self_pct = mean(self.pct), avg_cells_tested = mean(self+partner+other))

#here i correlated the self classifications to the number of cells tested. this is highly correlated
cor_test <- rcorr(summ_stats$mean_self_pct, summ_stats$avg_cells_tested, type = "spearman")
print(cor_test)

##      x    y
## x 1.00 0.88
## y 0.88 1.00
## 
## n
##    x  y
## x 13 13
## y 13 14
## 
## P
##   x  y 
## x     0
## y  0

ggplot(summ_stats, aes(x = avg_cells_tested, y = mean_self_pct, color = Cluster)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_manual(values = clust_cols)

## `geom_smooth()` using formula = 'y ~ x'

self vs. chance - one-sample t-test vs. 1/38 (correct classification by random)

vs_chance_df <- data.frame()

for (clust in f.levels) {
  clust_df <- with_self2 %>% filter(Cluster == clust) %>% filter(group == "self")
  
  onesamp <- t.test(clust_df$norm_classes, mu = 1/38)
  # print(oneway)
  
  stats_df <- data.frame(Cluster = clust,
                        T_val = onesamp$statistic[1],
                        p.value = onesamp$p.value,
                        test = "onesamp t-test")
  
  vs_chance_df <- rbind(vs_chance_df, stats_df)
  
}

partner vs. chance for exclude self

vs_chance_df_exclude <- data.frame()

for (clust in f.levels) {
  clust_df <- exclude_self2 %>% filter(Cluster == clust) %>% filter(group == "partner")
  
  onesamp <- t.test(clust_df$norm_classes, mu = 1/37)
  # print(oneway)
  
  stats_df <- data.frame(Cluster = clust,
                        T_val = onesamp$statistic[1],
                        p.value = onesamp$p.value,
                        test = "onesamp t-test")
  
  vs_chance_df_exclude <- rbind(vs_chance_df_exclude, stats_df)
  
}

look at classifications for same sex, same cohort, same pairing type, relatedness, etc. basically - what other factors are at play other than partner ID?

exclude_self_newclass <- exclude_self2
exclude_self_newclass <- exclude_self_newclass %>% merge(metadata, on = "animal")

#basically just creating named lists to match to predicted animal
sex_vec <- metadata$sex
names(sex_vec) <- metadata$animal

exclude_self_newclass$y_pred <- as.character(exclude_self_newclass$y_pred)
exclude_self_newclass$same_opp_sex <- if_else(exclude_self_newclass$sex == sex_vec[exclude_self_newclass$y_pred], "same", "different")

coh_vec <- metadata$beh_cohort
names(coh_vec) <- metadata$animal
exclude_self_newclass$same_diff_coh <- if_else(exclude_self_newclass$beh_cohort == coh_vec[exclude_self_newclass$y_pred], "same", "different")

type_vec <- metadata$pair_type
names(type_vec) <- metadata$animal
exclude_self_newclass$same_diff_type <- if_else(exclude_self_newclass$pair_type == type_vec[exclude_self_newclass$y_pred], "same", "different")

group_vec <- metadata$Group
names(group_vec) <- metadata$animal
exclude_self_newclass$same_diff_group <- if_else(exclude_self_newclass$Group == group_vec[exclude_self_newclass$y_pred], "same", "different")

parent_vec <- metadata$parents
names(parent_vec) <- metadata$animal
exclude_self_newclass$same_diff_parents <- if_else(exclude_self_newclass$parents == parent_vec[exclude_self_newclass$y_pred], "same", "different")

only look at “other” classifications

exclude_self_newclass_other <- exclude_self_newclass %>% filter(group == "other")
norm_to_other_class <- exclude_self_newclass_other %>% group_by(Cluster, animal) %>% summarise(sum_other_classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster'. You can override using the
## `.groups` argument.

same_sex_norm <- exclude_self_newclass_other %>% group_by(Cluster, animal, same_opp_sex) %>% summarise(classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster', 'animal'. You can override using
## the `.groups` argument.

same_sex_norm$metric <- "sex"
same_sex_norm <- same_sex_norm %>% rename("same_diff" = "same_opp_sex")
same_sex_norm <- same_sex_norm %>% merge(norm_to_other_class, on = "animal")

same_coh_norm <- exclude_self_newclass_other %>% group_by(Cluster, animal, same_diff_coh) %>% summarise(classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster', 'animal'. You can override using
## the `.groups` argument.

same_coh_norm$metric <- "cohort"
same_coh_norm <- same_coh_norm %>% rename("same_diff" = "same_diff_coh")
same_coh_norm <- same_coh_norm %>% merge(norm_to_other_class, on = "animal")

same_type_norm <- exclude_self_newclass_other %>% group_by(Cluster, animal, same_diff_type) %>% summarise(classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster', 'animal'. You can override using
## the `.groups` argument.

same_type_norm$metric <- "pair_type"
same_type_norm <- same_type_norm %>% rename("same_diff" = "same_diff_type")
same_type_norm <- same_type_norm %>% merge(norm_to_other_class, on = "animal")


same_group_norm <- exclude_self_newclass_other %>% group_by(Cluster, animal, same_diff_group) %>% summarise(classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster', 'animal'. You can override using
## the `.groups` argument.

same_group_norm$metric <- "group"
same_group_norm <- same_group_norm %>% rename("same_diff" = "same_diff_group")
same_group_norm <- same_group_norm %>% merge(norm_to_other_class, on = "animal")


same_parents_norm <- exclude_self_newclass_other %>% group_by(Cluster, animal, same_diff_parents) %>% summarise(classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster', 'animal'. You can override using
## the `.groups` argument.

same_parents_norm$metric <- "parents"
same_parents_norm <- same_parents_norm %>% rename("same_diff" = "same_diff_parents")
same_parents_norm <- same_parents_norm %>% merge(norm_to_other_class, on = "animal")


# norm_to_other_class <- norm_to_other_class %>% merge(same_sex_norm, on = "animal") %>% merge(same_coh_norm, on = "animal") %>% merge(same_type_norm, on = "animal") %>% merge(same_group_norm, on = "animal") %>% merge(same_parents_norm, on = "animal")
norm_to_other_class <- rbind(same_sex_norm, same_coh_norm, same_type_norm, same_group_norm, same_parents_norm)
norm_to_other_class$pcts <- norm_to_other_class$classes/norm_to_other_class$sum_other_classes

normalize to the percent of classifications that are for “other” (i.e. what percent of “other” classifications?)

norm_to_other_pct <- exclude_self_newclass_other %>% group_by(Cluster, animal) %>% summarise(sum_other_classes = sum(Freq))

## `summarise()` has grouped output by 'Cluster'. You can override using the
## `.groups` argument.

exclude_self_newclass_other <- merge(exclude_self_newclass_other, norm_to_other_pct, on = "animal")
exclude_self_newclass_other$norm_to_other <- exclude_self_newclass_other$Freq/exclude_self_newclass_other$sum_other_classes

plot misclassification allocations by cluster

clust_cols <- c("Drd1Pdyn" = "#304880",
                "Drd1PdynOprm1" = "#4898D0",
                "Drd1Penk" = "#88C0F0",
                "Drd2Penk" = "#9060A0",
                "Drd2NoPenk" = "#C078C0",
                "GABAergicNeurons" = "#389078",
                "Dlx2ImmatureNeurons" = "#70A830",
                "SstNpyInterneurons" = "#98D048",
                "PvalbInterneurons" = "#60D0A0",
                "CholinergicInterneurons" = "#80E8C0",
                "MatureOligos" = "#783028",
                "ImmatureOligos" = "#B82820",
                "Astrocytes" = "#D04058",
                "Microglia" = "#F87878")

clust_cols_new <- clust_cols
names(clust_cols_new) <- paste0(names(clust_cols), "_same")
clust_cols_light <- colorspace::lighten(clust_cols, 0.6)
clust_cols_light <- rep("gray69", 14)
names(clust_cols_light) <- paste0(names(clust_cols), "_different")

idx <- order(c(seq_along(clust_cols_new), seq_along(clust_cols_light)))
all_clust_cols <- (c(clust_cols_new,clust_cols_light))[idx]

newclass_long_pcts <- exclude_self_newclass_other %>% pivot_longer(cols = c("same_opp_sex", "same_diff_coh", "same_diff_type", "same_diff_group", "same_diff_parents"))

newclass_long_pcts$value <- factor(newclass_long_pcts$value, levels = c("same", "different"))
newclass_long_pcts$Cluster <- factor(newclass_long_pcts$Cluster, levels = f.levels)
newclass_long_pcts$Cluster_samediff <- paste0(newclass_long_pcts$Cluster, "_", newclass_long_pcts$value)
p <- ggplot(newclass_long_pcts, aes(x = factor(Cluster_samediff, levels = names(all_clust_cols)), y = norm_to_other*100, color = Cluster_samediff, fill = Cluster_samediff, alpha = 0.8)) +
  # geom_boxplot() +
  # geom_point(position = position_jitterdodge(jitter.width = 0, dodge.width = 0.9)) +
  geom_jitter(size = 0.5, width = 0.2) +
  scale_color_manual(values = all_clust_cols) +
  scale_fill_manual(values = all_clust_cols) +
  xlab("Group") +
  ylab("% of Classifications") +
  theme_classic()

p2 <- p + facet_wrap(~ name, nrow = 5)
print(p2)

# setwd("output/new_plots/")
# ggsave("by_metrics_animal_by_animal.pdf", p2)

plot allocations of misclassifications as barplot

plt_list <- list()
for (m in c("same_diff_coh", "same_diff_group", "same_diff_type", "same_diff_parents", "same_opp_sex")) {
  print(m)
  #first find expected values
  metadata2 <- metadata
  if (m == "same_opp_sex") {
    metadata2 <- metadata2 %>% rename(m = "sex")
  }
  else if (m == "same_diff_group") {
    metadata2 <- metadata2 %>% rename(m = "Group")
  }
  else if(m == "same_diff_type") {
    metadata2 <- metadata2 %>% rename(m = "pair_type")
  }
  else if(m == "same_diff_parents") {
    metadata2 <- metadata2 %>% rename(m = "parents")
  }
  else if(m == "same_diff_coh") {
    metadata2 <- metadata2 %>% rename(m = "beh_cohort")
  }
  
  #get weighted avg expected value
  m_summ <- metadata2 %>% summarise(n_metric = n(), .by = m)
  m_summ$norm_pct <- m_summ$n_metric/38
  weighted_mean <- weighted.mean(m_summ$norm_pct, m_summ$n_metric)
  print(weighted_mean)
  
  #make df to sum all other classifications, don't group by cluster first
  test_m <- exclude_self_newclass_other %>% group_by_at(c("animal", "group", m)) %>% summarise_at(.vars = "Freq", .funs = sum)
  test_m <- test_m %>% rename("same_diff" = m)
  test_m2 <- test_m %>% group_by_at(c("animal", "group")) %>% summarise(total_other_classifications = sum(Freq)) %>% merge(test_m, on = "animal") %>% filter(same_diff == "same")
  test_m2$pct_same_classes <- test_m2$Freq/test_m2$total_other_classifications
  test_m2$metric <- m
  
  #one sample ttest vs. chance line
  one_samp_ttest <- t.test(test_m2$pct_same_classes*100, mu = weighted_mean*100)
  print(one_samp_ttest)
  
  p3 <- ggplot(test_m2, aes(x = metric, y = pct_same_classes*100)) + 
    geom_bar(stat = "summary", fun = "mean", color = "black", fill = "white") +
    stat_summary(
    fun.data = mean_se, fun.args = list(mult = 1), geom = "errorbar", color = "black", #fun.args = list(mult = 1), 
    position = position_dodge(0.8), width = 0.2, linewidth = 0.8
      ) +
    geom_jitter(height=0, width = 0.2, color = "black", alpha = 0.6, stroke = NA) +
    geom_hline(yintercept = weighted_mean*100, color = "red", linetype = "dashed") +
    ylim(0, 100) +
    theme_classic()
    print(p3)
  
  plt_list[[m]] <- p3
}

## [1] "same_diff_coh"
## [1] 0.3462604

## `summarise()` has grouped output by 'animal'. You can override using the
## `.groups` argument.

## 
##  One Sample t-test
## 
## data:  test_m2$pct_same_classes * 100
## t = 45.411, df = 37, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 34.62604
## 95 percent confidence interval:
##  84.55940 89.22346
## sample estimates:
## mean of x 
##  86.89143

## [1] "same_diff_group"
## [1] 0.2520776

## `summarise()` has grouped output by 'animal'. You can override using the
## `.groups` argument.

## 
##  One Sample t-test
## 
## data:  test_m2$pct_same_classes * 100
## t = -7.2743, df = 37, p-value = 1.226e-08
## alternative hypothesis: true mean is not equal to 25.20776
## 95 percent confidence interval:
##   7.989812 15.491970
## sample estimates:
## mean of x 
##  11.74089

## [1] "same_diff_type"
## [1] 0.3905817

## `summarise()` has grouped output by 'animal'. You can override using the
## `.groups` argument.

## 
##  One Sample t-test
## 
## data:  test_m2$pct_same_classes * 100
## t = -4.6812, df = 37, p-value = 3.76e-05
## alternative hypothesis: true mean is not equal to 39.05817
## 95 percent confidence interval:
##  13.11857 28.79036
## sample estimates:
## mean of x 
##  20.95446

## [1] "same_diff_parents"
## [1] 0.09695291

## `summarise()` has grouped output by 'animal'. You can override using the
## `.groups` argument.

## 
##  One Sample t-test
## 
## data:  test_m2$pct_same_classes * 100
## t = 2.6022, df = 35, p-value = 0.01349
## alternative hypothesis: true mean is not equal to 9.695291
## 95 percent confidence interval:
##  11.55082 24.72063
## sample estimates:
## mean of x 
##  18.13572

## [1] "same_opp_sex"
## [1] 0.501385

## `summarise()` has grouped output by 'animal'. You can override using the
## `.groups` argument.

## 
##  One Sample t-test
## 
## data:  test_m2$pct_same_classes * 100
## t = -1.526, df = 37, p-value = 0.1355
## alternative hypothesis: true mean is not equal to 50.1385
## 95 percent confidence interval:
##  37.54385 51.91185
## sample estimates:
## mean of x 
##  44.72785

p4 <- grid.arrange(grobs = plt_list, nrow = 1)

print(p4)

## TableGrob (1 x 5) "arrange": 5 grobs
##                   z     cells    name           grob
## same_diff_coh     1 (1-1,1-1) arrange gtable[layout]
## same_diff_group   2 (1-1,2-2) arrange gtable[layout]
## same_diff_type    3 (1-1,3-3) arrange gtable[layout]
## same_diff_parents 4 (1-1,4-4) arrange gtable[layout]
## same_opp_sex      5 (1-1,5-5) arrange gtable[layout]

setwd("output/new_plots/")
# ggsave("by_metrics_percent_cells_no_partner_combine_clusts_bars_se_noclustcollapsefirst.pdf", p4, width = 7, height = 7)

try a way to visualize only means (from data normalized to “other” counts) this sets up dataframe to make a bubble plot

norm_to_other_class2 <- norm_to_other_class %>% filter(same_diff == "same")

norm_for_summary <- norm_to_other_class2 %>% group_by(Cluster, metric) %>% summarise(mean_same_pct = mean(pcts, na.rm = TRUE))

## `summarise()` has grouped output by 'Cluster'. You can override using the
## `.groups` argument.

bubble plot with normalized avg expected values colored bubble indicates true mean black line indicates expected (weighted mean)

norm_wts <- data.frame()
for (m in unique(norm_for_summary$metric)) {
  print(m)
  m_df <- norm_for_summary %>% filter(metric == m)
  
  metadata2 <- metadata %>% filter(!animal %in% c("4918", "4967"))
  if (m == "cohort") {
    metadata2 <- metadata2 %>% rename(m = "beh_cohort")
  }
  
  else if (m == "group") {
    metadata2 <- metadata2 %>% rename(m = "Group")
  }
  
  #get weighted avg expected value
  m_summ <- metadata2 %>% summarise(n_metric = n(), .by = m)
  m_summ$norm_pct <- m_summ$n_metric/38
  weighted_mean <- weighted.mean(m_summ$norm_pct, m_summ$n_metric)
  
  mini_df <- data.frame("metric" = m, "weighted_mean" = weighted_mean)
  norm_wts <- rbind(norm_wts, mini_df)
}

## [1] "cohort"
## [1] "group"
## [1] "pair_type"
## [1] "parents"
## [1] "sex"

norm_for_summary_weighted <- norm_for_summary %>% merge(norm_wts, on = "metric")
p <- ggplot(norm_for_summary_weighted, aes(x = metric, y = factor(Cluster, levels = rev(f.levels)), fill = Cluster, color = Cluster)) +
  geom_point(aes(size = mean_same_pct), alpha = 1, shape = 21) +
  geom_point(aes(size = weighted_mean), alpha = 1, shape = 1, color = "black") +
  # geom_point(aes(size = 1), alpha = 1, shape = 1, color = "black") + #if you want 100% circle
  scale_size_continuous(limits = c(0, 1), range = c(0, 10)) +
  scale_fill_manual(values = clust_cols, guide = FALSE) +
  scale_color_manual(values = clust_cols, guide = FALSE) +
  theme(panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

print(p)

# setwd("output/new_plots/")
# ggsave("SVM_metric_classifications_bubbleplot_normalized_expected.pdf", p)

sessionInfo()

## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22631)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] corrplot_0.92  Hmisc_5.0-1    emmeans_1.8.5  gridExtra_2.3  DHARMa_0.4.6  
##  [6] glmmTMB_1.1.8  ggpubr_0.6.0   forcats_1.0.0  ggridges_0.5.4 ggplot2_3.4.2 
## [11] tidyr_1.3.0    dplyr_1.1.1   
## 
## loaded via a namespace (and not attached):
##  [1] splines_4.2.2       carData_3.0-5       Formula_1.2-5      
##  [4] highr_0.10          yaml_2.3.7          numDeriv_2016.8-1.1
##  [7] pillar_1.9.0        backports_1.4.1     lattice_0.21-8     
## [10] glue_1.6.2          digest_0.6.31       ggsignif_0.6.4     
## [13] checkmate_2.1.0     minqa_1.2.5         colorspace_2.1-0   
## [16] sandwich_3.0-2      htmltools_0.5.5     Matrix_1.6-2       
## [19] pkgconfig_2.0.3     broom_1.0.4         purrr_1.0.1        
## [22] xtable_1.8-4        mvtnorm_1.1-3       scales_1.2.1       
## [25] lme4_1.1-33         tibble_3.2.1        htmlTable_2.4.1    
## [28] mgcv_1.8-42         farver_2.1.1        generics_0.1.3     
## [31] car_3.1-2           TH.data_1.1-2       withr_2.5.0        
## [34] nnet_7.3-18         TMB_1.9.10          cli_3.6.0          
## [37] survival_3.5-5      magrittr_2.0.3      estimability_1.4.1 
## [40] evaluate_0.20       fansi_1.0.4         nlme_3.1-162       
## [43] MASS_7.3-58.3       rstatix_0.7.2       foreign_0.8-84     
## [46] data.table_1.14.6   tools_4.2.2         lifecycle_1.0.3    
## [49] multcomp_1.4-23     stringr_1.5.0       munsell_0.5.0      
## [52] cluster_2.1.4       compiler_4.2.2      rlang_1.1.1        
## [55] grid_4.2.2          nloptr_2.0.3        rstudioapi_0.14    
## [58] htmlwidgets_1.6.2   labeling_0.4.2      base64enc_0.1-3    
## [61] rmarkdown_2.25      boot_1.3-28.1       gtable_0.3.3       
## [64] codetools_0.2-19    abind_1.4-5         R6_2.5.1           
## [67] zoo_1.8-12          knitr_1.42          fastmap_1.1.1      
## [70] utf8_1.2.3          stringi_1.7.12      Rcpp_1.0.10        
## [73] vctrs_0.6.1         rpart_4.1.19        tidyselect_1.2.0   
## [76] xfun_0.38           coda_0.19-4

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SVM_plots_forpaper.md

SVM_plots_forpaper.md

SVM plots

Files

SVM_plots_forpaper.md

Latest commit

History

SVM_plots_forpaper.md

File metadata and controls

SVM plots