From fb401d6289433be78efe7be7840a7786e713ccae Mon Sep 17 00:00:00 2001
From: Cristianetaniguti <chaytaniguti@gmail.com>
Date: Thu, 15 Aug 2024 17:07:45 -0400
Subject: [PATCH 1/3] adding exceptions

---
 R/mod_gwas.R               | 34 +++++++++++++++++++++++++++++++---
 tests/testthat/test-GWAS.R | 23 ++++++++++++++++-------
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/R/mod_gwas.R b/R/mod_gwas.R
index 230b0fa..e1aef38 100644
--- a/R/mod_gwas.R
+++ b/R/mod_gwas.R
@@ -165,6 +165,13 @@ mod_gwas_server <- function(id){
       #I think I can subset the read.GWAS file pheno and fixed categories (data@pheno[,c("trait")]) and data@fixed = phenotype_file[,c("List of fixed traits")]
       phenotype_file <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE)
 
+      # Remove empty lines
+      rm.empty <- which(apply(phenotype_file, 1, function(x) all(is.na(x) | x == "")))
+      if(length(rm.empty) > 0){
+        warning(paste("Removing", length(rm.empty),"empty lines"))
+        phenotype_file <- phenotype_file[-rm.empty,]
+      }
+
       ids <- colnames(phenotype_file)[1]
       traits <- input$trait_info
       fixed <- input$fixed_info
@@ -204,9 +211,6 @@ mod_gwas_server <- function(id){
       #Save new phenotype file with selected traits and fixed effects
       write.csv(phenotype_file, file = temp_pheno_file, row.names = FALSE)
 
-      #Remove the phenotype_file from memory
-      rm(phenotype_file)
-
       #Status
       updateProgressBar(session = session, id = "pb_gwas", value = 5, title = "Upload Complete: Now Formatting GWASpoly Data")
 
@@ -215,6 +219,8 @@ mod_gwas_server <- function(id){
 
       #Geno.file conversion if needed
       if (grepl("\\.csv$", file_path)) {
+        #TODO: Add check for matches of sample names in genotype and phenotype data
+
         data <- read.GWASpoly(ploidy= ploidy, pheno.file= temp_pheno_file, geno.file=input$gwas_file$datapath,
                               format="numeric", n.traits=length(traits), delim=",") #only need to change files here
 
@@ -231,6 +237,28 @@ mod_gwas_server <- function(id){
         class(geno_mat) <- "numeric"
         info <- data.frame(vcf@fix)
         gpoly_df <- cbind(info[,c("ID","CHROM","POS")], geno_mat)
+
+        if(!any(colnames(gpoly_df) %in% phenotype_file$Sample_ID)) {
+          shinyalert(
+            title = "Samples ID do not match",
+            text = paste("Check if passport/phenotype files have same sample ID as the VCF/genotype file."),
+            size = "s",
+            closeOnEsc = TRUE,
+            closeOnClickOutside = FALSE,
+            html = TRUE,
+            type = "error",
+            showConfirmButton = TRUE,
+            confirmButtonText = "OK",
+            confirmButtonCol = "#004192",
+            showCancelButton = FALSE,
+            animation = TRUE
+          )
+
+        }
+        validate(
+          need(any(colnames(gpoly_df) %in% phenotype_file$Sample_ID), "The selected traits must be numerical.")
+        )
+
         write.csv(gpoly_df, file = temp_geno_file, row.names = FALSE)
 
         data <- read.GWASpoly(ploidy= ploidy, pheno.file= temp_pheno_file, geno.file=temp_geno_file,
diff --git a/tests/testthat/test-GWAS.R b/tests/testthat/test-GWAS.R
index 5d26dca..7d5833e 100644
--- a/tests/testthat/test-GWAS.R
+++ b/tests/testthat/test-GWAS.R
@@ -17,6 +17,13 @@ test_that("test GWAS",{
   #I think I can subset the read.GWAS file pheno and fixed categories (data@pheno[,c("trait")]) and data@fixed = phenotype_file[,c("List of fixed traits")]
   phenotype_file <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE)
 
+  # Remove empty lines
+  rm.empty <- which(apply(phenotype_file, 1, function(x) all(is.na(x) | x == "")))
+  if(length(rm.empty) > 0){
+    warning(paste("Removing", length(rm.empty),"empty lines"))
+    phenotype_file <- phenotype_file[-rm.empty,]
+  }
+
   ids <- colnames(phenotype_file)[1]
   traits <- input$trait_info
   fixed <- input$fixed_info
@@ -36,9 +43,6 @@ test_that("test GWAS",{
   #Save new phenotype file with selected traits and fixed effects
   write.csv(phenotype_file, file = temp_pheno_file, row.names = FALSE)
 
-  #Remove the phenotype_file from memory
-  rm(phenotype_file)
-
   #Geno file path
   file_path <- input$gwas_file$datapath
 
@@ -56,10 +60,15 @@ test_that("test GWAS",{
 
     #Extract GT
     geno_mat <- extract.gt(vcf, element = "GT")
-    geno_mat <- apply(geno_mat, 2, convert_to_dosage)
+    geno_mat <- apply(geno_mat, 2, BIGapp:::convert_to_dosage)
     class(geno_mat) <- "numeric"
     info <- data.frame(vcf@fix)
     gpoly_df <- cbind(info[,c("ID","CHROM","POS")], geno_mat)
+
+    if(!any(colnames(gpoly_df) %in% phenotype_file$Sample_ID)) { # Add
+      stop("Make sure passport and VCF samples have same name")
+    }
+
     write.csv(gpoly_df, file = temp_geno_file, row.names = FALSE)
 
     data <- read.GWASpoly(ploidy= ploidy, pheno.file= temp_pheno_file, geno.file=temp_geno_file,
@@ -106,7 +115,7 @@ test_that("test GWAS",{
   PC<-as.matrix(PCs)
   K=as.matrix(Kin)
 
-  kin.adj<-posdefmat(K)
+  kin.adj<-BIGapp:::posdefmat(K)
   kin.test<-as.matrix(kin.adj)
 
   for (i in 2:ncol(GE)){
@@ -114,7 +123,7 @@ test_that("test GWAS",{
     #model selection
     y=as.numeric(GE[,i])
 
-    BICs<-CalcBIC(y=y,PC=PC,K=kin.test)
+    BICs<- BIGapp:::CalcBIC(y=y,PC=PC,K=kin.test)
 
     plotBICs<-cbind(rbind.data.frame(BICs$BIC$withK,BICs$BIC$withoutK),rep(c("w/Kinship","no Kinship"),each=nrow(BICs$BIC$withK)))
     colnames(plotBICs)[ncol(plotBICs)]<-"RelationshipMatrix"
@@ -165,7 +174,7 @@ test_that("test GWAS",{
 
     #Save qq_plot info
 
-    CMplot_shiny(data_qq,plot.type="q",col=c(1:8),
+    BIGapp:::CMplot_shiny(data_qq,plot.type="q",col=c(1:8),
                  ylab.pos=2,
                  file.name=colnames(data@pheno[i]),
                  conf.int=FALSE,

From 7c885ec9d79a3bd065cd0e0ee760be138f99a9d5 Mon Sep 17 00:00:00 2001
From: Cristianetaniguti <chaytaniguti@gmail.com>
Date: Fri, 16 Aug 2024 12:49:49 -0400
Subject: [PATCH 2/3] fixed issues #37 and #38

---
 R/mod_Filtering.R               | 201 ++++++++++++++++++++------------
 R/utils.R                       |  12 ++
 tests/testthat/test-filtering.R | 146 ++++++++++++++++++++++-
 3 files changed, 283 insertions(+), 76 deletions(-)

diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R
index 3e52daf..6cf92ca 100644
--- a/R/mod_Filtering.R
+++ b/R/mod_Filtering.R
@@ -58,14 +58,7 @@ mod_Filtering_ui <- function(id){
              )
       ),
       column(width = 6,
-             tabBox(width =12, collapsible = FALSE, status = "info",
-                    id = "updog_tab", height = "600px",
-                    tabPanel("Bias Histogram", icon = icon("image"), plotOutput(ns("bias_hist"), height = '550px')),
-                    tabPanel("OD Histogram", icon = icon("image"), plotOutput(ns("od_hist"), height = '550px')),
-                    tabPanel("Prop_mis Histogram", icon = icon("image"), plotOutput(ns("maxpostprob_hist"), height = '550px')),
-                    tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
-                    tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
-             )
+             uiOutput(ns("din_tabs")),
       ),
       column(width = 3,
              valueBoxOutput(ns("snp_retained_box"), width = NULL),
@@ -165,7 +158,37 @@ mod_Filtering_server <- function(id){
 
       req(input$filter_ploidy, input$filter_output_name,input$updog_rdata)
 
-      if (input$use_updog) {
+      #Input file
+      vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)
+
+      # Identify if have updog parameters
+      format_fields <- unique(vcf@gt[,1])
+      info_fields <- vcf@fix[1,8]
+      updog_par <- grepl("MPP", format_fields) & grepl("PMC", info_fields) & grepl("BIAS", info_fields) & grepl("OD", info_fields)
+
+      if(updog_par){
+        output$din_tabs <- renderUI({
+          tabBox(width =12, collapsible = FALSE, status = "info",
+                 id = "updog_tab", height = "600px",
+                 tabPanel("Bias Histogram", icon = icon("image"), plotOutput(ns("bias_hist"), height = '550px')),
+                 tabPanel("OD Histogram", icon = icon("image"), plotOutput(ns("od_hist"), height = '550px')),
+                 tabPanel("Prop_mis Histogram", icon = icon("image"), plotOutput(ns("maxpostprob_hist"), height = '550px')),
+                 tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
+                 tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
+          )
+        })
+      } else {
+        output$din_tabs <- renderUI({
+          tabBox(width =12, collapsible = FALSE, status = "info",
+                 id = "updog_tab", height = "600px",
+                 tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
+                 tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
+          )
+        })
+      }
+
+
+      if (input$use_updog & updog_par) {
         # Use Updog filtering parameters
         OD_filter <- as.numeric(input$OD_filter)
         Prop_mis <- as.numeric(input$Prop_mis)
@@ -193,8 +216,7 @@ mod_Filtering_server <- function(id){
       maf_filter <- input$filter_maf
 
       updateProgressBar(session = session, id = "pb_filter", value = 10, title = "Processing VCF file")
-      #Input file
-      vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)
+
       #Starting SNPs
       starting_snps <- nrow(vcf)
       output$snp_removed_box <- renderValueBox({
@@ -226,6 +248,23 @@ mod_Filtering_server <- function(id){
                        filter.MAF = as.numeric(maf_filter),
                        filter.MPP = max_post)
 
+      if (length(vcf@gt) == 0) {
+        shinyalert(
+          title = "All markers were filtered out",
+          text = "Loose the parameters to access results in this tab",
+          size = "s",
+          closeOnEsc = TRUE,
+          closeOnClickOutside = FALSE,
+          html = TRUE,
+          type = "error",
+          showConfirmButton = TRUE,
+          confirmButtonText = "OK",
+          confirmButtonCol = "#004192",
+          showCancelButton = FALSE,
+          animation = TRUE
+        )
+      }
+
       #Getting missing data information
       #Add support for genotype matrix filtering?
       #Pb
@@ -336,6 +375,8 @@ mod_Filtering_server <- function(id){
           abline(v = median(as.numeric(filtering_output$df$BIAS)), col = "green", lty = 2)  # Median line
           abline(v = 0.5, col = "black", lty = 2)  # proposed lower line
           abline(v = 2, col = "black", lty = 2)  # proposed upper line
+          legend("topright", legend=c("mean", "median", "suggested threshold"),
+                 col=c("red", "green","black"), lty=2, cex=0.8)
 
         } else if (input$filter_hist == "OD Histogram") {
 
@@ -355,6 +396,8 @@ mod_Filtering_server <- function(id){
           abline(v = mean(as.numeric(filtering_output$df$OD)), col = "red", lty = 2)  # Mean line
           abline(v = median(as.numeric(filtering_output$df$OD)), col = "green", lty = 2)  # Median line
           abline(v = 0.05, col = "black", lty = 2)  # proposed filter by updog
+          legend("topright", legend=c("mean", "median", "suggested threshold"),
+                 col=c("red", "green","black"), lty=2, cex=0.8)
 
         } else if (input$filter_hist == "Prop_mis Histogram") {
 
@@ -372,6 +415,8 @@ mod_Filtering_server <- function(id){
           abline(v = mean(as.numeric(filtering_output$df$PMC)), col = "red", lty = 2)  # Mean line
           abline(v = median(as.numeric(filtering_output$df$PMC)), col = "green", lty = 2)  # Median line
           abline(v = quantile(as.numeric(filtering_output$df$PMC), 0.95), col = "blue", lty = 2)
+          legend("topright", legend=c("mean", "median", "quantile"),
+                 col=c("red", "green","blue"), lty=2, cex=0.8)
 
         } else if (input$filter_hist == "SNP_mis") {
 
@@ -389,6 +434,8 @@ mod_Filtering_server <- function(id){
           abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2)  # Mean line
           abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2)  # Median line
           abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)
+          legend("topright", legend=c("mean", "median", "quantile"),
+                 col=c("red", "green","blue"), lty=2, cex=0.8)
 
         } else if (input$filter_hist == "Sample_mis") {
 
@@ -406,6 +453,8 @@ mod_Filtering_server <- function(id){
           abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2)  # Mean line
           abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2)  # Median line
           abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)
+          legend("topright", legend=c("mean", "median", "quantile"),
+                 col=c("red", "green","blue"), lty=2, cex=0.8)
         }
         dev.off()
       }
@@ -421,19 +470,6 @@ mod_Filtering_server <- function(id){
 
     observeEvent(filtering_files$raw_vcf_df, {
 
-
-      # Function to split INFO column and expand it into multiple columns
-      split_info_column <- function(info) {
-        # Split the INFO column by semicolon
-        info_split <- str_split(info, ";")[[1]]
-
-        # Create a named list by splitting each element by equals sign
-        info_list <- set_names(map(info_split, ~ str_split(.x, "=")[[1]][2]),
-                               map(info_split, ~ str_split(.x, "=")[[1]][1]))
-
-        return(info_list)
-      }
-
       # Apply the function to each row and bind the results into a new dataframe
       new_df <- data.frame(filtering_files$raw_vcf_df) %>%
         mutate(INFO_list = map(INFO, split_info_column)) %>%
@@ -450,67 +486,80 @@ mod_Filtering_server <- function(id){
       ###Bias
 
       #Histogram
-      output$bias_hist <- renderPlot({
-        hist(as.numeric(new_df$BIAS),
-             main = "Unfiltered SNP bias histogram",
-             xlab = "bias",
-             ylab = "SNPs",
-             col = "lightblue",
-             border = "black",
-             xlim = c(0,5),
-             breaks = as.numeric(input$hist_bins))
-        axis(1, at = seq(0, 5, by = .2), labels = rep("", length(seq(0, 5, by = 0.2))))  # Add ticks
-        abline(v = mean(as.numeric(new_df$BIAS)), col = "red", lty = 2)  # Mean line
-        abline(v = median(as.numeric(new_df$BIAS)), col = "green", lty = 2)  # Median line
-        abline(v = 0.5, col = "black", lty = 2)  # proposed lower line
-        abline(v = 2, col = "black", lty = 2)  # proposed upper line
-      })
+      if(any(grepl("BIAS", colnames(new_df)))){
+        output$bias_hist <- renderPlot({
+          hist(as.numeric(new_df$BIAS),
+               main = "Unfiltered SNP bias histogram",
+               xlab = "bias",
+               ylab = "SNPs",
+               col = "lightblue",
+               border = "black",
+               xlim = c(0,5),
+               breaks = as.numeric(input$hist_bins))
+          axis(1, at = seq(0, 5, by = .2), labels = rep("", length(seq(0, 5, by = 0.2))))  # Add ticks
+          abline(v = mean(as.numeric(new_df$BIAS)), col = "red", lty = 2)  # Mean line
+          abline(v = median(as.numeric(new_df$BIAS)), col = "green", lty = 2)  # Median line
+          abline(v = 0.5, col = "black", lty = 2)  # proposed lower line
+          abline(v = 2, col = "black", lty = 2)  # proposed upper line
+          legend("topright", legend=c("mean", "median", "suggested threshold"),
+                 col=c("red", "green","black"), lty=2, cex=0.8)
+        })
+      }
 
       ###OD
-      quantile(as.numeric(new_df$OD), 0.95)
-      #Histogram
-      output$od_hist <- renderPlot({
-        hist(as.numeric(new_df$OD),
-             main = "Unfiltered SNP overdispersion parameter histogram",
-             xlab = "OD",
-             ylab = "SNPs",
-             col = "lightblue",
-             border = "black",
-             xlim = c(0,0.6),
-             breaks = as.numeric(input$hist_bins))
-        axis(1, at = seq(0, 0.6, by = .01), labels = rep("", length(seq(0, 0.6, by = 0.01))))  # Add ticks
-        abline(v = 0.05, col = "black", lty = 2)  # proposed filter by updog
+      if(any(grepl("OD", colnames(new_df)))){
 
-        # Add vertical lines
-        abline(v = mean(as.numeric(new_df$OD)), col = "red", lty = 2)  # Mean line
-        abline(v = median(as.numeric(new_df$OD)), col = "green", lty = 2)  # Median line
-        abline(v = 0.05, col = "black", lty = 2)  # proposed filter by updog
+        quantile(as.numeric(new_df$OD), 0.95)
+        #Histogram
+        output$od_hist <- renderPlot({
+          hist(as.numeric(new_df$OD),
+               main = "Unfiltered SNP overdispersion parameter histogram",
+               xlab = "OD",
+               ylab = "SNPs",
+               col = "lightblue",
+               border = "black",
+               xlim = c(0,0.6),
+               breaks = as.numeric(input$hist_bins))
+          axis(1, at = seq(0, 0.6, by = .01), labels = rep("", length(seq(0, 0.6, by = 0.01))))  # Add ticks
+          abline(v = 0.05, col = "black", lty = 2)  # proposed filter by updog
 
-      })
+          # Add vertical lines
+          abline(v = mean(as.numeric(new_df$OD)), col = "red", lty = 2)  # Mean line
+          abline(v = median(as.numeric(new_df$OD)), col = "green", lty = 2)  # Median line
+          abline(v = 0.05, col = "black", lty = 2)  # proposed filter by updog
+          legend("topright", legend=c("mean", "median", "suggested threshold"),
+                 col=c("red", "green","black"), lty=2, cex=0.8)
+
+        })
+      }
 
       ##MAXPOSTPROB
 
       #Histogram
+      if(any(grepl("PMC", colnames(new_df)))){
 
-      output$maxpostprob_hist <- renderPlot({
+        output$maxpostprob_hist <- renderPlot({
 
-        #Histogram
-        hist(as.numeric(new_df$PMC),
-             main = "The estimated proportion of individuals misclassified in the SNP from updog",
-             xlab = "Proportion of Misclassified Genotypes per SNP",
-             ylab = "Number of SNPs",
-             col = "lightblue",
-             border = "black",
-             xlim = c(0,1),
-             breaks = as.numeric(input$hist_bins))
-        axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
+          #Histogram
+          hist(as.numeric(new_df$PMC),
+               main = "The estimated proportion of individuals misclassified in the SNP from updog",
+               xlab = "Proportion of Misclassified Genotypes per SNP",
+               ylab = "Number of SNPs",
+               col = "lightblue",
+               border = "black",
+               xlim = c(0,1),
+               breaks = as.numeric(input$hist_bins))
+          axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
 
-        # Add vertical lines
-        abline(v = mean(as.numeric(new_df$PMC)), col = "red", lty = 2)  # Mean line
-        abline(v = median(as.numeric(new_df$PMC)), col = "green", lty = 2)  # Median line
-        abline(v = quantile(as.numeric(new_df$PMC), 0.95), col = "blue", lty = 2)
+          # Add vertical lines
+          abline(v = mean(as.numeric(new_df$PMC)), col = "red", lty = 2)  # Mean line
+          abline(v = median(as.numeric(new_df$PMC)), col = "green", lty = 2)  # Median line
+          abline(v = quantile(as.numeric(new_df$PMC), 0.95), col = "blue", lty = 2)
+          legend("topright", legend=c("mean", "median", "quantile"),
+                 col=c("red", "green","blue"), lty=2, cex=0.8)
 
-      })
+        })
+      }
 
       #Missing data
       output$missing_snp_hist <- renderPlot({
@@ -530,7 +579,8 @@ mod_Filtering_server <- function(id){
         abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2)  # Mean line
         abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2)  # Median line
         abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)
-
+        legend("topright", legend=c("mean", "median", "quantile"),
+               col=c("red", "green","blue"), lty=2, cex=0.8)
       })
 
       output$missing_sample_hist <- renderPlot({
@@ -550,7 +600,8 @@ mod_Filtering_server <- function(id){
         abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2)  # Mean line
         abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2)  # Median line
         abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)
-
+        legend("topright", legend=c("mean", "median", "quantile"),
+               col=c("red", "green","blue"), lty=2, cex=0.8)
       })
 
       ##Read Depth (I would prefer that this show the mean depth for SNPs or Samples instead of all loci/sample cells)
diff --git a/R/utils.R b/R/utils.R
index d39d70c..811f485 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -280,3 +280,15 @@ posdefmat <- function(mat) {
   }
   return(g)
 }
+
+# Function to split INFO column and expand it into multiple columns
+split_info_column <- function(info) {
+  # Split the INFO column by semicolon
+  info_split <- str_split(info, ";")[[1]]
+
+  # Create a named list by splitting each element by equals sign
+  info_list <- set_names(map(info_split, ~ str_split(.x, "=")[[1]][2]),
+                         map(info_split, ~ str_split(.x, "=")[[1]][1]))
+
+  return(info_list)
+}
diff --git a/tests/testthat/test-filtering.R b/tests/testthat/test-filtering.R
index aec9033..493c676 100644
--- a/tests/testthat/test-filtering.R
+++ b/tests/testthat/test-filtering.R
@@ -1,6 +1,14 @@
 context("Filtering")
 
-test_that("Filtering",{
+#library(vcfR)
+#library(BIGr)
+#library(testthat)
+library(tidyr)
+library(dplyr)
+library(purrr)
+library(stringr)
+
+test_that("Filtering with updog metrics",{
 
   #Variables
   filter_ploidy <- 2
@@ -26,6 +34,81 @@ test_that("Filtering",{
 
   temp_file <- tempfile(fileext = ".vcf.gz")
 
+  #Input file
+  vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)
+
+  # Identify if have updog parameters
+  format_fields <- unique(vcf@gt[,1])
+  info_fields <- vcf@fix[1,8]
+
+  updog_par <- grepl("MPP", format_fields) & grepl("PMC", info_fields) & grepl("BIAS", info_fields)
+
+  #Starting SNPs
+  starting_snps <- nrow(vcf)
+  #export INFO dataframe
+  filtering_files$raw_vcf_df <- data.frame(vcf@fix)
+
+  #Filtering
+  vcf <- filterVCF(vcf.file = vcf,
+                   ploidy=ploidy,
+                   output.file=NULL,
+                   filter.OD = OD_filter,
+                   filter.BIAS.min = Bias_min,
+                   filter.BIAS.max = Bias_max,
+                   filter.DP = as.numeric(size_depth),
+                   filter.PMC = Prop_mis,
+                   filter.SAMPLE.miss = as.numeric(sample_miss),
+                   filter.SNP.miss = as.numeric(snp_miss),
+                   filter.MAF = as.numeric(maf_filter),
+                   filter.MPP = max_post)
+
+  #Getting missing data information
+  #Add support for genotype matrix filtering?
+  gt_matrix <- extract.gt(vcf, element = "GT", as.numeric = FALSE)
+  filtering_files$snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values
+  filtering_files$sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
+
+  expect_true(all(table(gt_matrix[,10]) == c(20,13,8)))
+
+  rm(gt_matrix) #Remove gt matrix
+
+  #Writing file
+  write.vcf(vcf, file = temp_file)
+
+  #Get final_snps
+  final_snps <- nrow(vcf)
+  expect_equal(final_snps, 43)
+
+})
+
+
+test_that("Filtering without updog metrics",{
+
+  #Variables
+  filter_ploidy <- 2
+  filter_maf <- 0.05
+  size_depth <- 10
+  snp_miss <- 100
+  sample_miss <- 100
+  OD_filter <- NULL
+  Bias <- NULL
+  Bias_min <- NULL
+  Bias_max <- NULL
+  Prop_mis <- 0.05
+  maxpostprob_filter <- NULL
+  max_post <- maxpostprob_filter
+  output_name <- "out"
+  snp_miss <- snp_miss/100
+  sample_miss <- sample_miss/100
+  ploidy <- filter_ploidy
+  maf_filter <- filter_maf
+  input$hist_bins <- 50
+
+  input <- filtering_files <- list()
+  input$updog_rdata$datapath <- system.file("vcf_example_out.vcf.gz", package = "BIGapp")
+
+  temp_file <- tempfile(fileext = ".vcf.gz")
+
   #Input file
   vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)
   #Starting SNPs
@@ -47,11 +130,14 @@ test_that("Filtering",{
                    filter.MAF = as.numeric(maf_filter),
                    filter.MPP = max_post)
 
+  if(length(vcf@gt) == 0) stop("All markers were filtered. Loose the parameters to access results in this tab.")
+
   #Getting missing data information
   #Add support for genotype matrix filtering?
   gt_matrix <- extract.gt(vcf, element = "GT", as.numeric = FALSE)
   filtering_files$snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values
   filtering_files$sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
+
   rm(gt_matrix) #Remove gt matrix
 
   #Writing file
@@ -59,4 +145,62 @@ test_that("Filtering",{
 
   #Get final_snps
   final_snps <- nrow(vcf)
+
+  #export INFO dataframe
+  filtering_files$raw_vcf_df
+
+  # Apply the function to each row and bind the results into a new dataframe
+  new_df <- data.frame(filtering_files$raw_vcf_df) %>%
+    mutate(INFO_list = map(INFO, split_info_column)) %>%
+    unnest_wider(INFO_list)
+
+  #Save df to reactive value
+  filtering_output <- list()
+  filtering_output$df <- new_df
+
+  ##Make plots
+
+  #Missing data
+
+    #Histogram
+    hist(as.numeric(filtering_files$snp_miss_df),
+         main = "Ratio of Missing Data per SNP After Filtering",
+         xlab = "Proportion of Missing Data per SNP",
+         ylab = "Number of SNPs",
+         col = "lightblue",
+         border = "black",
+         xlim = c(0,1),
+         breaks = as.numeric(input$hist_bins))
+    axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
+
+    # Add vertical lines
+    abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2)  # Mean line
+    abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2)  # Median line
+    abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)
+    legend("topright", legend=c("mean", "median", "quantile"),
+           col=c("red", "green","blue"), lty=1:2, cex=0.8)
+
+    #Histogram
+    hist(as.numeric(filtering_files$sample_miss_df),
+         main = "Ratio of Missing Data per Sample After Filtering",
+         xlab = "Proportion of Missing Data per Sample",
+         ylab = "Number of Samples",
+         col = "lightblue",
+         border = "black",
+         xlim = c(0,1),
+         breaks = as.numeric(input$hist_bins))
+    axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
+
+    # Add vertical lines
+    abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2)  # Mean line
+    abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2)  # Median line
+    abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)
+    legend("topright", legend=c("mean", "median", "quantile"),
+           col=c("red", "green","blue"), lty=1:2, cex=0.8)
+
+
+  ##Read Depth (I would prefer that this show the mean depth for SNPs or Samples instead of all loci/sample cells)
+  quantile(as.numeric(new_df$DP), 0.95)
+
+
 })

From fd8255f682eb098c909f8aca0fa077319d4ce493 Mon Sep 17 00:00:00 2001
From: Cristianetaniguti <chaytaniguti@gmail.com>
Date: Wed, 28 Aug 2024 11:30:30 -0400
Subject: [PATCH 3/3] Add empty tab + fix tests

---
 R/mod_Filtering.R               |   7 ++
 tests/testthat/test-GWAS.R      |   1 +
 tests/testthat/test-filtering.R | 123 --------------------------------
 3 files changed, 8 insertions(+), 123 deletions(-)

diff --git a/R/mod_Filtering.R b/R/mod_Filtering.R
index 6cf92ca..72ba828 100644
--- a/R/mod_Filtering.R
+++ b/R/mod_Filtering.R
@@ -132,6 +132,13 @@ mod_Filtering_server <- function(id){
 
     disable("start_updog_filter")
 
+    output$din_tabs <- renderUI({
+      tabBox(width =12, collapsible = FALSE, status = "info",
+             id = "updog_tab", height = "600px",
+             tabPanel("Results", p("Upload VCF file to access results in this section."))
+      )
+    })
+
     vcf <- eventReactive(input$run_filters, {
 
       # Ensure the files are uploaded
diff --git a/tests/testthat/test-GWAS.R b/tests/testthat/test-GWAS.R
index 7d5833e..a2d9420 100644
--- a/tests/testthat/test-GWAS.R
+++ b/tests/testthat/test-GWAS.R
@@ -1,6 +1,7 @@
 context("GWAS")
 
 test_that("test GWAS",{
+
   input <- list()
   input$cores <- 1
   input$phenotype_file$datapath <- system.file("iris_passport_file.csv", package = "BIGapp")
diff --git a/tests/testthat/test-filtering.R b/tests/testthat/test-filtering.R
index 493c676..cc74fea 100644
--- a/tests/testthat/test-filtering.R
+++ b/tests/testthat/test-filtering.R
@@ -81,126 +81,3 @@ test_that("Filtering with updog metrics",{
 
 })
 
-
-test_that("Filtering without updog metrics",{
-
-  #Variables
-  filter_ploidy <- 2
-  filter_maf <- 0.05
-  size_depth <- 10
-  snp_miss <- 100
-  sample_miss <- 100
-  OD_filter <- NULL
-  Bias <- NULL
-  Bias_min <- NULL
-  Bias_max <- NULL
-  Prop_mis <- 0.05
-  maxpostprob_filter <- NULL
-  max_post <- maxpostprob_filter
-  output_name <- "out"
-  snp_miss <- snp_miss/100
-  sample_miss <- sample_miss/100
-  ploidy <- filter_ploidy
-  maf_filter <- filter_maf
-  input$hist_bins <- 50
-
-  input <- filtering_files <- list()
-  input$updog_rdata$datapath <- system.file("vcf_example_out.vcf.gz", package = "BIGapp")
-
-  temp_file <- tempfile(fileext = ".vcf.gz")
-
-  #Input file
-  vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)
-  #Starting SNPs
-  starting_snps <- nrow(vcf)
-  #export INFO dataframe
-  filtering_files$raw_vcf_df <- data.frame(vcf@fix)
-
-  #Filtering
-  vcf <- filterVCF(vcf.file = vcf,
-                   ploidy=ploidy,
-                   output.file=NULL,
-                   filter.OD = OD_filter,
-                   filter.BIAS.min = Bias_min,
-                   filter.BIAS.max = Bias_max,
-                   filter.DP = as.numeric(size_depth),
-                   filter.PMC = Prop_mis,
-                   filter.SAMPLE.miss = as.numeric(sample_miss),
-                   filter.SNP.miss = as.numeric(snp_miss),
-                   filter.MAF = as.numeric(maf_filter),
-                   filter.MPP = max_post)
-
-  if(length(vcf@gt) == 0) stop("All markers were filtered. Loose the parameters to access results in this tab.")
-
-  #Getting missing data information
-  #Add support for genotype matrix filtering?
-  gt_matrix <- extract.gt(vcf, element = "GT", as.numeric = FALSE)
-  filtering_files$snp_miss_df <- rowMeans(is.na(gt_matrix)) #SNP missing values
-  filtering_files$sample_miss_df <- as.numeric(colMeans(is.na(gt_matrix))) #Sample missing values
-
-  rm(gt_matrix) #Remove gt matrix
-
-  #Writing file
-  write.vcf(vcf, file = temp_file)
-
-  #Get final_snps
-  final_snps <- nrow(vcf)
-
-  #export INFO dataframe
-  filtering_files$raw_vcf_df
-
-  # Apply the function to each row and bind the results into a new dataframe
-  new_df <- data.frame(filtering_files$raw_vcf_df) %>%
-    mutate(INFO_list = map(INFO, split_info_column)) %>%
-    unnest_wider(INFO_list)
-
-  #Save df to reactive value
-  filtering_output <- list()
-  filtering_output$df <- new_df
-
-  ##Make plots
-
-  #Missing data
-
-    #Histogram
-    hist(as.numeric(filtering_files$snp_miss_df),
-         main = "Ratio of Missing Data per SNP After Filtering",
-         xlab = "Proportion of Missing Data per SNP",
-         ylab = "Number of SNPs",
-         col = "lightblue",
-         border = "black",
-         xlim = c(0,1),
-         breaks = as.numeric(input$hist_bins))
-    axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
-
-    # Add vertical lines
-    abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2)  # Mean line
-    abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2)  # Median line
-    abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)
-    legend("topright", legend=c("mean", "median", "quantile"),
-           col=c("red", "green","blue"), lty=1:2, cex=0.8)
-
-    #Histogram
-    hist(as.numeric(filtering_files$sample_miss_df),
-         main = "Ratio of Missing Data per Sample After Filtering",
-         xlab = "Proportion of Missing Data per Sample",
-         ylab = "Number of Samples",
-         col = "lightblue",
-         border = "black",
-         xlim = c(0,1),
-         breaks = as.numeric(input$hist_bins))
-    axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1))))  # Add ticks
-
-    # Add vertical lines
-    abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2)  # Mean line
-    abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2)  # Median line
-    abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)
-    legend("topright", legend=c("mean", "median", "quantile"),
-           col=c("red", "green","blue"), lty=1:2, cex=0.8)
-
-
-  ##Read Depth (I would prefer that this show the mean depth for SNPs or Samples instead of all loci/sample cells)
-  quantile(as.numeric(new_df$DP), 0.95)
-
-
-})