diff --git a/R/mod_DosageCall.R b/R/mod_DosageCall.R index 64f6674..736978d 100644 --- a/R/mod_DosageCall.R +++ b/R/mod_DosageCall.R @@ -86,7 +86,7 @@ mod_DosageCall_ui <- function(id){ p(downloadButton(ns('download_vcf'),""), "VCF Example File"), p(downloadButton(ns('download_madc'),""), "MADC Example File"), hr(), p(HTML("Parameters description:"), actionButton(ns("goPar"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), - p(HTML("Graphics description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), + p(HTML("Results description:"), actionButton(ns("goRes"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), p(HTML("How to cite:"), actionButton(ns("goCite"), icon("arrow-up-right-from-square", verify_fa = FALSE) )), hr(), p(HTML("Updog tutorial:"), actionButton(ns("goUpdog"), icon("arrow-up-right-from-square", verify_fa = FALSE), onclick ="window.open('https://dcgerard.github.io/updog/', '_blank')" )), circle = FALSE, diff --git a/R/mod_help.R b/R/mod_help.R index b6d5d41..19960f0 100644 --- a/R/mod_help.R +++ b/R/mod_help.R @@ -28,16 +28,16 @@ mod_help_ui <- function(id){ )) ), box(title="Updog Dosage Calling", id = "Updog_Dosage_Calling_box",width = 12, collapsible = TRUE, collapsed = TRUE, status = "info", solidHeader = TRUE, - "**Draft**This tab is designed to handle the process of dosage calling in genomic data. Dosage calling is essential for determining the number of copies of a particular allele at each genomic location. The app likely includes functionalities to upload raw genomic data, apply various filtering criteria, and generate plots to visualize the distribution of dosages. Users can examine histograms for SNP max post probabilities and read depths, which help in assessing the quality and accuracy of the dosage calls.**Updog**", + "This tab is designed to handle the process of dosage calling in genomic data. Dosage calling is essential for determining the number of copies of a particular allele at each genomic location.", br(), br(), bs4Dash::tabsetPanel(id = "Updog_Dosage_Calling_tabset", - tabPanel("Parameters description", value = "Updog_Dosage_Calling_par", + tabPanel("Parameters description", value = "Updog_Dosage_Calling_par", br(), includeMarkdown(system.file("help_files/Updog_Dosage_Calling_par.Rmd", package = "BIGapp")) ), - tabPanel("Results description", value = "Updog_Dosage_Calling_results", + tabPanel("Results description", value = "Updog_Dosage_Calling_results", br(), includeMarkdown(system.file("help_files/Updog_Dosage_Calling_res.Rmd", package = "BIGapp")) ), - tabPanel("How to cite", value = "Updog_Dosage_Calling_cite", + tabPanel("How to cite", value = "Updog_Dosage_Calling_cite", br(), includeMarkdown(system.file("help_files/Updog_Dosage_Calling_cite.Rmd", package = "BIGapp")) )) ), diff --git a/inst/help_files/Updog_Dosage_Calling_cite.Rmd b/inst/help_files/Updog_Dosage_Calling_cite.Rmd index 9004582..21e35d4 100644 --- a/inst/help_files/Updog_Dosage_Calling_cite.Rmd +++ b/inst/help_files/Updog_Dosage_Calling_cite.Rmd @@ -4,3 +4,13 @@ output: html_document date: "2024-08-29" --- +* **BIGapp** + + +* **Updog package** + +Gerard, D., Ferrão, L. F. V., Garcia, A. A. F., & Stephens, M. (2018). Genotyping Polyploids from Messy Sequencing Data. Genetics, 210(3), 789-807. doi: 10.1534/genetics.118.301468. + +If you used the “norm” model cite also: + +Gerard D, Ferrão L (2020). “Priors for Genotyping Polyploids.” Bioinformatics, 36(6), 1795-1800. ISSN 1367-4803, doi: 10.1093/bioinformatics/btz852. diff --git a/inst/help_files/Updog_Dosage_Calling_par.Rmd b/inst/help_files/Updog_Dosage_Calling_par.Rmd index 9ea5f87..40305c8 100644 --- a/inst/help_files/Updog_Dosage_Calling_par.Rmd +++ b/inst/help_files/Updog_Dosage_Calling_par.Rmd @@ -4,11 +4,47 @@ output: html_document date: "2024-08-29" --- -:hammer: Under development - -About Population Models: -Model: What form should the prior (genotype distribution) take? -The following information is from the Updog manual: -Possible values of the genotype distribution (values of model) are: -`norm` A distribution whose genotype frequencies are proportional to the density value of a normal with some mean and some standard deviation. Unlike the `bb` and `hw` options, this will allow for distributions both more and less dispersed than a binomial. This seems to be the most robust to violations in modeling assumptions, and so is the default. This prior class was developed in Gerard and Ferrao (2020). `hw` A binomial distribution that results from assuming that the population is in Hardy-Weinberg equilibrium (HWE). This actually does pretty well even when there are minor to moderate deviations from HWE. Though it does not perform as well as the `norm` option when there are severe deviations from HWE. `bb` A beta-binomial distribution. This is an overdispersed version of `hw` and can be derived from a special case of the Balding-Nichols model. `s1` This prior assumes the individuals are all full-siblings resulting from one generation of selfing. I.e. there is only one parent. This model assumes a particular type of meiotic behavior: polysomic inheritance with bivalent, non-preferential pairing. -`f1` This prior assumes the individuals are all full-siblings resulting from one generation of a bi-parental cross. This model assumes a particular type of meiotic behavior: polysomic inheritance with bivalent, non-preferential pairing. `f1pp` This prior allows for double reduction and preferential pairing in an F1 population of tretraploids. `s1pp` This prior allows for double reduction and preferential pairing in an S1 population of tretraploids. `flex` Generically any categorical distribution. Theoretically, this works well if you have a lot of individuals. In practice, it seems to be much less robust to violations in modeling assumptions.`uniform` A discrete uniform distribution. This should never be used in practice.", +* **MADC or VCF file** + + * **MADC file**: + + * **VCF file**: +Variant Call Format (VCF) is a standard file format to store genetic variant information. The genotype (GT) data within the VCF is required for the analysis in this tab. For more details about the VCF format, see this document: https://samtools.github.io/hts-specs/VCFv4.2.pdf. + +* **Passport File**: A comma-separated values (CSV) file containing individual names (Sample_ID) in the first column and phenotype values in the subsequent columns. The phenotype column names should correspond to the phenotype ID. + + * **Select Category Subset**: After loading the passport file, this option will be available. You can select the column name to base the subsetting for the samples + + * **Select Category Values**: Select the value within the select column that should be kept for the analysis. For example, selecting column “Species” from the example bellow, and selecting only “setosa” as value, the “Sample_1” will be removed from the analysis. + +
+ +|Sample_ID | Sepal.Length| Sepal.Width| Petal.Length| Petal.Width|Species | +|:---------:|:------------:|:-----------:|:------------:|:-----------:|:-------:| +|Sample_1 | 5.1| 3.5| 1.4| 0.2|versicolor | +|Sample_2 | 4.9| 3.0| 1.4| 0.2|setosa | +|Sample_3 | 4.7| 3.2| 1.3| 0.2|setosa | +|Sample_4 | 4.6| 3.1| 1.5| 0.2|setosa | +|Sample_5 | 5.0| 3.6| 1.4| 0.2|setosa | +|Sample_6 | 5.4| 3.9| 1.7| 0.4|setosa | + +
+ +  + +* **Output File Name**: Define output VCF file name + +* **Species Ploidy**: Specifies the ploidy level of the species. The current analysis supports both diploids and autopolyploids. + +* **Updog Model**: Select the model to be applied. + +The following information is from the Updog manual. Possible values of the genotype distribution (values of model) are: + +`norm` A distribution whose genotype frequencies are proportional to the density value of a normal with some mean and some standard deviation. Unlike the `bb` and `hw` options, this will allow for distributions both more and less dispersed than a binomial. This seems to be the most robust to violations in modeling assumptions, and so is the default. This prior class was developed in Gerard and Ferrao (2020). `hw` A binomial distribution that results from assuming that the population is in Hardy-Weinberg equilibrium (HWE). This actually does pretty well even when there are minor to moderate deviations from HWE. Though it does not perform as well as the `norm` option when there are severe deviations from HWE. `bb` A beta-binomial distribution. This is an overdispersed version of `hw` and can be derived from a special case of the Balding-Nichols model. `s1` This prior assumes the individuals are all full-siblings resulting from one generation of selfing. I.e. there is only one parent. This model assumes a particular type of meiotic behavior: polysomic inheritance with bivalent, non-preferential pairing. + +`f1` This prior assumes the individuals are all full-siblings resulting from one generation of a bi-parental cross. This model assumes a particular type of meiotic behavior: polysomic inheritance with bivalent, non-preferential pairing. `f1pp` This prior allows for double reduction and preferential pairing in an F1 population of tretraploids. `s1pp` This prior allows for double reduction and preferential pairing in an S1 population of tretraploids. `flex` Generically any categorical distribution. Theoretically, this works well if you have a lot of individuals. In practice, it seems to be much less robust to violations in modeling assumptions.`uniform` A discrete uniform distribution. This should never be used in practice." + + * **Parent**: If “s1” or “s1pp” model is selected you must define which sample is correspondent to the parent including the sample ID in this box. The input sample ID must match to the sample ID in the input genotype file + * **Parent1 and Parent2**: if “f1” or “f1pp” model is selected you must define which samples correspondent to the parent1 and parent2 including the samples ID in the respective boxes. The input sample ID must match to the sample ID in the input genotype file + +* **Number of CPU Cores**: Number of cores to be used in the multidog function paralelization diff --git a/inst/help_files/Updog_Dosage_Calling_res.Rmd b/inst/help_files/Updog_Dosage_Calling_res.Rmd index d7806fa..cd92fa3 100644 --- a/inst/help_files/Updog_Dosage_Calling_res.Rmd +++ b/inst/help_files/Updog_Dosage_Calling_res.Rmd @@ -3,3 +3,24 @@ title: "Updog_Dosage_Calling_res" output: html_document date: "2024-08-29" --- + +* **Download VCF file**: + +VCF file generated by function updog2vcf from package BIGr. The function add lines to the VCF header specifying updog and BIGr versions and command line used. Updog information are kept in the INFO fields: + + * DP: Total Depth + * ADS: Depths for the ref and each alt allele in the order listed + * BIAS: The estimated allele bias of the SNP from updog + * OD: The estimated overdispersion parameter of the SNP from updog + * PMC: The estimated proportion of individuals misclassified in the SNP from updog + +And in the FORMAT fields: + + * GT: Genotype, where 1 is the count of alternate alleles + * UD: Dosage count of reference alleles from updog, where 0 = homozygous alternate + * DP: Read depth + * RA: Reference allele read depth + * AD: Allelic depths for the ref and alt alleles in the order listed + * MPP: Maximum posterior probability for that dosage call from updog + +In case model “f1”, “f1pp” selected, the VCF will contain “parent1” and “parent2” as IDs for the parents defined. If “s1”, “s1pp” are selected, the VCF will contain “parent” as the ID for the input parent.