Skip to content

Commit

Permalink
Merge pull request #45 from Breeding-Insight/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
alex-sandercock authored Aug 28, 2024
2 parents 0e287cd + 8f7e599 commit 56ddbba
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 86 deletions.
208 changes: 133 additions & 75 deletions R/mod_Filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,7 @@ mod_Filtering_ui <- function(id){
)
),
column(width = 6,
tabBox(width =12, collapsible = FALSE, status = "info",
id = "updog_tab", height = "600px",
tabPanel("Bias Histogram", icon = icon("image"), plotOutput(ns("bias_hist"), height = '550px')),
tabPanel("OD Histogram", icon = icon("image"), plotOutput(ns("od_hist"), height = '550px')),
tabPanel("Prop_mis Histogram", icon = icon("image"), plotOutput(ns("maxpostprob_hist"), height = '550px')),
tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
)
uiOutput(ns("din_tabs")),
),
column(width = 3,
valueBoxOutput(ns("snp_retained_box"), width = NULL),
Expand Down Expand Up @@ -139,6 +132,13 @@ mod_Filtering_server <- function(id){

disable("start_updog_filter")

output$din_tabs <- renderUI({
tabBox(width =12, collapsible = FALSE, status = "info",
id = "updog_tab", height = "600px",
tabPanel("Results", p("Upload VCF file to access results in this section."))
)
})

vcf <- eventReactive(input$run_filters, {

# Ensure the files are uploaded
Expand All @@ -165,7 +165,37 @@ mod_Filtering_server <- function(id){

req(input$filter_ploidy, input$filter_output_name,input$updog_rdata)

if (input$use_updog) {
#Input file
vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)

# Identify if have updog parameters
format_fields <- unique(vcf@gt[,1])
info_fields <- vcf@fix[1,8]
updog_par <- grepl("MPP", format_fields) & grepl("PMC", info_fields) & grepl("BIAS", info_fields) & grepl("OD", info_fields)

if(updog_par){
output$din_tabs <- renderUI({
tabBox(width =12, collapsible = FALSE, status = "info",
id = "updog_tab", height = "600px",
tabPanel("Bias Histogram", icon = icon("image"), plotOutput(ns("bias_hist"), height = '550px')),
tabPanel("OD Histogram", icon = icon("image"), plotOutput(ns("od_hist"), height = '550px')),
tabPanel("Prop_mis Histogram", icon = icon("image"), plotOutput(ns("maxpostprob_hist"), height = '550px')),
tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
)
})
} else {
output$din_tabs <- renderUI({
tabBox(width =12, collapsible = FALSE, status = "info",
id = "updog_tab", height = "600px",
tabPanel("SNP_miss", icon = icon("image"), plotOutput(ns("missing_snp_hist"), height = '550px')),
tabPanel("Sample_miss", icon = icon("image"), plotOutput(ns("missing_sample_hist"), height = '550px'))
)
})
}


if (input$use_updog & updog_par) {
# Use Updog filtering parameters
OD_filter <- as.numeric(input$OD_filter)
Prop_mis <- as.numeric(input$Prop_mis)
Expand Down Expand Up @@ -193,8 +223,7 @@ mod_Filtering_server <- function(id){
maf_filter <- input$filter_maf

updateProgressBar(session = session, id = "pb_filter", value = 10, title = "Processing VCF file")
#Input file
vcf <- read.vcfR(input$updog_rdata$datapath, verbose = FALSE)

#Starting SNPs
starting_snps <- nrow(vcf)
output$snp_removed_box <- renderValueBox({
Expand Down Expand Up @@ -226,6 +255,23 @@ mod_Filtering_server <- function(id){
filter.MAF = as.numeric(maf_filter),
filter.MPP = max_post)

if (length(vcf@gt) == 0) {
shinyalert(
title = "All markers were filtered out",
text = "Loose the parameters to access results in this tab",
size = "s",
closeOnEsc = TRUE,
closeOnClickOutside = FALSE,
html = TRUE,
type = "error",
showConfirmButton = TRUE,
confirmButtonText = "OK",
confirmButtonCol = "#004192",
showCancelButton = FALSE,
animation = TRUE
)
}

#Getting missing data information
#Add support for genotype matrix filtering?
#Pb
Expand Down Expand Up @@ -336,6 +382,8 @@ mod_Filtering_server <- function(id){
abline(v = median(as.numeric(filtering_output$df$BIAS)), col = "green", lty = 2) # Median line
abline(v = 0.5, col = "black", lty = 2) # proposed lower line
abline(v = 2, col = "black", lty = 2) # proposed upper line
legend("topright", legend=c("mean", "median", "suggested threshold"),
col=c("red", "green","black"), lty=2, cex=0.8)

} else if (input$filter_hist == "OD Histogram") {

Expand All @@ -355,6 +403,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_output$df$OD)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_output$df$OD)), col = "green", lty = 2) # Median line
abline(v = 0.05, col = "black", lty = 2) # proposed filter by updog
legend("topright", legend=c("mean", "median", "suggested threshold"),
col=c("red", "green","black"), lty=2, cex=0.8)

} else if (input$filter_hist == "Prop_mis Histogram") {

Expand All @@ -372,6 +422,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_output$df$PMC)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_output$df$PMC)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(filtering_output$df$PMC), 0.95), col = "blue", lty = 2)
legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)

} else if (input$filter_hist == "SNP_mis") {

Expand All @@ -389,6 +441,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)
legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)

} else if (input$filter_hist == "Sample_mis") {

Expand All @@ -406,6 +460,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)
legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)
}
dev.off()
}
Expand All @@ -421,19 +477,6 @@ mod_Filtering_server <- function(id){

observeEvent(filtering_files$raw_vcf_df, {


# Function to split INFO column and expand it into multiple columns
split_info_column <- function(info) {
# Split the INFO column by semicolon
info_split <- str_split(info, ";")[[1]]

# Create a named list by splitting each element by equals sign
info_list <- set_names(map(info_split, ~ str_split(.x, "=")[[1]][2]),
map(info_split, ~ str_split(.x, "=")[[1]][1]))

return(info_list)
}

# Apply the function to each row and bind the results into a new dataframe
new_df <- data.frame(filtering_files$raw_vcf_df) %>%
mutate(INFO_list = map(INFO, split_info_column)) %>%
Expand All @@ -450,67 +493,80 @@ mod_Filtering_server <- function(id){
###Bias

#Histogram
output$bias_hist <- renderPlot({
hist(as.numeric(new_df$BIAS),
main = "Unfiltered SNP bias histogram",
xlab = "bias",
ylab = "SNPs",
col = "lightblue",
border = "black",
xlim = c(0,5),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 5, by = .2), labels = rep("", length(seq(0, 5, by = 0.2)))) # Add ticks
abline(v = mean(as.numeric(new_df$BIAS)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$BIAS)), col = "green", lty = 2) # Median line
abline(v = 0.5, col = "black", lty = 2) # proposed lower line
abline(v = 2, col = "black", lty = 2) # proposed upper line
})
if(any(grepl("BIAS", colnames(new_df)))){
output$bias_hist <- renderPlot({
hist(as.numeric(new_df$BIAS),
main = "Unfiltered SNP bias histogram",
xlab = "bias",
ylab = "SNPs",
col = "lightblue",
border = "black",
xlim = c(0,5),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 5, by = .2), labels = rep("", length(seq(0, 5, by = 0.2)))) # Add ticks
abline(v = mean(as.numeric(new_df$BIAS)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$BIAS)), col = "green", lty = 2) # Median line
abline(v = 0.5, col = "black", lty = 2) # proposed lower line
abline(v = 2, col = "black", lty = 2) # proposed upper line
legend("topright", legend=c("mean", "median", "suggested threshold"),
col=c("red", "green","black"), lty=2, cex=0.8)
})
}

###OD
quantile(as.numeric(new_df$OD), 0.95)
#Histogram
output$od_hist <- renderPlot({
hist(as.numeric(new_df$OD),
main = "Unfiltered SNP overdispersion parameter histogram",
xlab = "OD",
ylab = "SNPs",
col = "lightblue",
border = "black",
xlim = c(0,0.6),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 0.6, by = .01), labels = rep("", length(seq(0, 0.6, by = 0.01)))) # Add ticks
abline(v = 0.05, col = "black", lty = 2) # proposed filter by updog
if(any(grepl("OD", colnames(new_df)))){

# Add vertical lines
abline(v = mean(as.numeric(new_df$OD)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$OD)), col = "green", lty = 2) # Median line
abline(v = 0.05, col = "black", lty = 2) # proposed filter by updog
quantile(as.numeric(new_df$OD), 0.95)
#Histogram
output$od_hist <- renderPlot({
hist(as.numeric(new_df$OD),
main = "Unfiltered SNP overdispersion parameter histogram",
xlab = "OD",
ylab = "SNPs",
col = "lightblue",
border = "black",
xlim = c(0,0.6),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 0.6, by = .01), labels = rep("", length(seq(0, 0.6, by = 0.01)))) # Add ticks
abline(v = 0.05, col = "black", lty = 2) # proposed filter by updog

})
# Add vertical lines
abline(v = mean(as.numeric(new_df$OD)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$OD)), col = "green", lty = 2) # Median line
abline(v = 0.05, col = "black", lty = 2) # proposed filter by updog
legend("topright", legend=c("mean", "median", "suggested threshold"),
col=c("red", "green","black"), lty=2, cex=0.8)

})
}

##MAXPOSTPROB

#Histogram
if(any(grepl("PMC", colnames(new_df)))){

output$maxpostprob_hist <- renderPlot({
output$maxpostprob_hist <- renderPlot({

#Histogram
hist(as.numeric(new_df$PMC),
main = "The estimated proportion of individuals misclassified in the SNP from updog",
xlab = "Proportion of Misclassified Genotypes per SNP",
ylab = "Number of SNPs",
col = "lightblue",
border = "black",
xlim = c(0,1),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1)))) # Add ticks
#Histogram
hist(as.numeric(new_df$PMC),
main = "The estimated proportion of individuals misclassified in the SNP from updog",
xlab = "Proportion of Misclassified Genotypes per SNP",
ylab = "Number of SNPs",
col = "lightblue",
border = "black",
xlim = c(0,1),
breaks = as.numeric(input$hist_bins))
axis(1, at = seq(0, 1, by = .1), labels = rep("", length(seq(0, 1, by = 0.1)))) # Add ticks

# Add vertical lines
abline(v = mean(as.numeric(new_df$PMC)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$PMC)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(new_df$PMC), 0.95), col = "blue", lty = 2)
# Add vertical lines
abline(v = mean(as.numeric(new_df$PMC)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(new_df$PMC)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(new_df$PMC), 0.95), col = "blue", lty = 2)
legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)

})
})
}

#Missing data
output$missing_snp_hist <- renderPlot({
Expand All @@ -530,7 +586,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_files$snp_miss_df)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_files$snp_miss_df)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(filtering_files$snp_miss_df), 0.95), col = "blue", lty = 2)

legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)
})

output$missing_sample_hist <- renderPlot({
Expand All @@ -550,7 +607,8 @@ mod_Filtering_server <- function(id){
abline(v = mean(as.numeric(filtering_files$sample_miss_df)), col = "red", lty = 2) # Mean line
abline(v = median(as.numeric(filtering_files$sample_miss_df)), col = "green", lty = 2) # Median line
abline(v = quantile(as.numeric(filtering_files$sample_miss_df), 0.95), col = "blue", lty = 2)

legend("topright", legend=c("mean", "median", "quantile"),
col=c("red", "green","blue"), lty=2, cex=0.8)
})

##Read Depth (I would prefer that this show the mean depth for SNPs or Samples instead of all loci/sample cells)
Expand Down
34 changes: 31 additions & 3 deletions R/mod_gwas.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ mod_gwas_server <- function(id){
#I think I can subset the read.GWAS file pheno and fixed categories (data@pheno[,c("trait")]) and data@fixed = phenotype_file[,c("List of fixed traits")]
phenotype_file <- read.csv(input$phenotype_file$datapath, header = TRUE, check.names = FALSE)

# Remove empty lines
rm.empty <- which(apply(phenotype_file, 1, function(x) all(is.na(x) | x == "")))
if(length(rm.empty) > 0){
warning(paste("Removing", length(rm.empty),"empty lines"))
phenotype_file <- phenotype_file[-rm.empty,]
}

ids <- colnames(phenotype_file)[1]
traits <- input$trait_info
fixed <- input$fixed_info
Expand Down Expand Up @@ -204,9 +211,6 @@ mod_gwas_server <- function(id){
#Save new phenotype file with selected traits and fixed effects
write.csv(phenotype_file, file = temp_pheno_file, row.names = FALSE)

#Remove the phenotype_file from memory
rm(phenotype_file)

#Status
updateProgressBar(session = session, id = "pb_gwas", value = 5, title = "Upload Complete: Now Formatting GWASpoly Data")

Expand All @@ -215,6 +219,8 @@ mod_gwas_server <- function(id){

#Geno.file conversion if needed
if (grepl("\\.csv$", file_path)) {
#TODO: Add check for matches of sample names in genotype and phenotype data

data <- read.GWASpoly(ploidy= ploidy, pheno.file= temp_pheno_file, geno.file=input$gwas_file$datapath,
format="numeric", n.traits=length(traits), delim=",") #only need to change files here

Expand All @@ -231,6 +237,28 @@ mod_gwas_server <- function(id){
class(geno_mat) <- "numeric"
info <- data.frame(vcf@fix)
gpoly_df <- cbind(info[,c("ID","CHROM","POS")], geno_mat)

if(!any(colnames(gpoly_df) %in% phenotype_file$Sample_ID)) {
shinyalert(
title = "Samples ID do not match",
text = paste("Check if passport/phenotype files have same sample ID as the VCF/genotype file."),
size = "s",
closeOnEsc = TRUE,
closeOnClickOutside = FALSE,
html = TRUE,
type = "error",
showConfirmButton = TRUE,
confirmButtonText = "OK",
confirmButtonCol = "#004192",
showCancelButton = FALSE,
animation = TRUE
)

}
validate(
need(any(colnames(gpoly_df) %in% phenotype_file$Sample_ID), "The selected traits must be numerical.")
)

write.csv(gpoly_df, file = temp_geno_file, row.names = FALSE)

data <- read.GWASpoly(ploidy= ploidy, pheno.file= temp_pheno_file, geno.file=temp_geno_file,
Expand Down
12 changes: 12 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,15 @@ posdefmat <- function(mat) {
}
return(g)
}

# Function to split INFO column and expand it into multiple columns
split_info_column <- function(info) {
# Split the INFO column by semicolon
info_split <- str_split(info, ";")[[1]]

# Create a named list by splitting each element by equals sign
info_list <- set_names(map(info_split, ~ str_split(.x, "=")[[1]][2]),
map(info_split, ~ str_split(.x, "=")[[1]][1]))

return(info_list)
}
Loading

0 comments on commit 56ddbba

Please sign in to comment.