Skip to content

Commit

Permalink
STAARpipeline v0.9.7
Browse files Browse the repository at this point in the history
  • Loading branch information
xihaoli committed Nov 10, 2023
1 parent 89d552b commit bca0e17
Show file tree
Hide file tree
Showing 93 changed files with 8,796 additions and 658 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
shell: Rscript {0}
- name: Install GENESIS, GenomicFeatures
run: |
BiocManager::install(c("GENESIS", "GenomicFeatures")
BiocManager::install(c("GENESIS", "GenomicFeatures"))
shell: Rscript {0}
- name: Install TxDb.Hsapiens.UCSC.hg38.knownGene
run: |
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: STAARpipeline
Type: Package
Title: STAARpipeline for Analyzing Whole-Genome/Whole-Exome Sequencing Data
Version: 0.9.7
Date: 2023-10-31
Date: 2023-11-09
Author: Xihao Li [aut, cre], Zilin Li [aut, cre], Sheila M. Gaynor [aut], Han Chen [aut]
Maintainer: Xihao Li <[email protected]>, Zilin Li <[email protected]>
Description: An R package for performing STAARpipeline in analyzing whole-genome/whole-exome sequencing data.
Expand Down
14 changes: 8 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ useDynLib(STAARpipeline, .registration = TRUE)

# functions
export(fit_nullmodel,
genesis2staar_nullmodel,staar2scang_nullmodel,
Individual_Analysis,Individual_Analysis_cond,
Gene_Centric_Coding,Gene_Centric_Coding_cond,
Gene_Centric_Noncoding,Gene_Centric_Noncoding_cond,
ncRNA,ncRNA_cond,
Sliding_Window,Sliding_Window_cond,Dynamic_Window_SCANG,
genesis2staar_nullmodel,
staar2scang_nullmodel,
Individual_Analysis,Individual_Analysis_cond,Individual_Analysis_cond_spa,
Gene_Centric_Coding,Gene_Centric_Coding_cond,Gene_Centric_Coding_cond_spa,
Gene_Centric_Noncoding,Gene_Centric_Noncoding_cond,Gene_Centric_Noncoding_cond_spa,
ncRNA,ncRNA_cond,ncRNA_cond_spa,
Sliding_Window,Sliding_Window_cond,Sliding_Window_cond_spa,
Dynamic_Window_SCANG,
LD_pruning)
2 changes: 1 addition & 1 deletion R/Dynamic_Window_SCANG.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ Dynamic_Window_SCANG <- function(chr,start_loc,end_loc,genofile,obj_nullmodel,
try(res <- SCANG(genotype=Geno,obj_nullmodel=obj_nullmodel,annotation_phred=Anno.Int.PHRED.sub,Lmin=Lmin,Lmax=Lmax,steplength=steplength,alpha=alpha,rare_maf_cutoff=rare_maf_cutoff,filter=p_filter,f=f),silent=silent)
}

if(class(res)=="list")
if(inherits(res, "list"))
{
position_sub <- position_sub[res$RV_label]

Expand Down
69 changes: 57 additions & 12 deletions R/Gene_Centric_Coding.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
#'
#' The \code{Gene_Centric_Coding} function takes in chromosome, gene name, functional category,
#' the object of opened annotated GDS file, and the object from fitting the null model to analyze the association between a
#' quantitative/dichotomous phenotype and coding functional categories of a gene by using STAAR procedure.
#' quantitative/dichotomous phenotype (including imbalanced case-control design) and coding functional categories of a gene by using STAAR procedure.
#' For each coding functional category, the STAAR-O p-value is a p-value from an omnibus test
#' that aggregated SKAT(1,25), SKAT(1,1), Burden(1,25), Burden(1,1), ACAT-V(1,25),
#' and ACAT-V(1,1) together with p-values of each test weighted by each annotation
#' using Cauchy method. For multiple phenotype analysis (\code{obj_nullmodel$n.pheno > 1}),
#' using Cauchy method. For imbalance case-control setting, the results correspond to the STAAR-B p-value, which is a p-value from
#' an omnibus test that aggregated Burden(1,25) and Burden(1,1) together with p-values of each test weighted by each annotation using Cauchy method.
#' For multiple phenotype analysis (\code{obj_nullmodel$n.pheno > 1}),
#' the results correspond to multi-trait association p-values (e.g. MultiSTAAR-O) by leveraging
#' the correlation structure between multiple phenotypes.
#' @param chr chromosome.
#' @param gene_name name of the gene to be analyzed using STAAR procedure.
#' @param category the coding functional category to be analyzed using STAAR procedure. Choices include
#' \code{all_categories}, \code{plof}, \code{plof_ds}, \code{missense}, \code{disruptive_missense}, \code{synonymous} (default = \code{all_categories}).
#' \code{all_categories}, \code{plof}, \code{plof_ds}, \code{missense}, \code{disruptive_missense}, \code{synonymous},
#' \code{ptv}, \code{ptv_ds}, \code{all_categories_incl_ptv} (default = \code{all_categories}).
#' @param genofile an object of opened annotated GDS (aGDS) file.
#' @param obj_nullmodel an object from fitting the null model, which is either the output from \code{\link{fit_nullmodel}} function,
#' or the output from \code{fitNullModel} function in the \code{GENESIS} package and transformed using the \code{\link{genesis2staar_nullmodel}} function.
Expand All @@ -27,8 +30,10 @@
#' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
#' @param Use_annotation_weights use annotations as weights or not (default = TRUE).
#' @param Annotation_name a vector of annotation names used in STAAR (default = NULL).
#' @param SPA_p_filter logical: are only the variants with a normal approximation based p-value smaller than a pre-specified threshold use the SPA method to recalculate the p-value, only used for imbalanced case-control setting (default = FALSE).
#' @param p_filter_cutoff threshold for the p-value recalculation using the SPA method, only used for imbalanced case-control setting (default = 0.05).
#' @param silent logical: should the report of error messages be suppressed (default = FALSE).
#' @return a list of data frames containing the STAAR p-values (including STAAR-O) corresponding to the coding functional category of the given gene.
#' @return A list of data frames containing the STAAR p-values (including STAAR-O or STAAR-B in imbalanced case-control setting) corresponding to the coding functional category of the given gene.
#' @references Li, Z., Li, X., et al. (2022). A framework for detecting
#' noncoding rare-variant associations of large-scale whole-genome sequencing
#' studies. \emph{Nature Methods}, \emph{19}(12), 1599-1611.
Expand All @@ -39,11 +44,12 @@
#' (\href{https://doi.org/10.1038/s41588-020-0676-4}{pub})
#' @export

Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof","plof_ds","missense","disruptive_missense","synonymous"),
Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof","plof_ds","missense","disruptive_missense","synonymous","ptv","ptv_ds","all_categories_incl_ptv"),
genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,
QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL,silent=FALSE){
Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL,
SPA_p_filter=FALSE,p_filter_cutoff=0.05,silent=FALSE){

## evaluate choices
category <- match.arg(category)
Expand All @@ -58,7 +64,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

if(category=="plof")
Expand All @@ -67,7 +74,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

if(category=="plof_ds")
Expand All @@ -76,7 +84,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

if(category=="missense")
Expand All @@ -85,7 +94,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

if(category=="disruptive_missense")
Expand All @@ -94,7 +104,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

if(category=="synonymous")
Expand All @@ -103,7 +114,41 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof"
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,silent=silent)
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}


if(category=="ptv")
{
results <- ptv(chr,gene_name,genofile,obj_nullmodel,genes,
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}


if(category=="ptv_ds")
{
results <- ptv_ds(chr,gene_name,genofile,obj_nullmodel,genes,
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}


if(category=="all_categories_incl_ptv")
{
results <- coding_incl_ptv(chr,gene_name,genofile,obj_nullmodel,genes,
rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name,
SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff,silent=silent)
}

return(results)
Expand Down
28 changes: 25 additions & 3 deletions R/Gene_Centric_Coding_cond.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param chr chromosome.
#' @param gene_name name of the gene to be analyzed using STAAR procedure.
#' @param category the coding functional category to be analyzed using STAAR procedure. Choices include
#' \code{plof}, \code{plof_ds}, \code{missense}, \code{disruptive_missense}, \code{synonymous} (default = \code{plof}).
#' \code{plof}, \code{plof_ds}, \code{missense}, \code{disruptive_missense}, \code{synonymous}, \code{ptv}, \code{ptv_ds} (default = \code{plof}).
#' @param genofile an object of opened annotated GDS (aGDS) file.
#' @param obj_nullmodel an object from fitting the null model, which is either the output from \code{\link{fit_nullmodel}} function,
#' or the output from \code{fitNullModel} function in the \code{GENESIS} package and transformed using the \code{\link{genesis2staar_nullmodel}} function.
Expand All @@ -36,7 +36,7 @@
#' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
#' @param Use_annotation_weights use annotations as weights or not (default = TRUE).
#' @param Annotation_name a vector of annotation names used in STAAR (default = NULL).
#' @return a data frame containing the conditional STAAR p-values (including STAAR-O) corresponding to each coding functional category of the given gene.
#' @return A data frame containing the conditional STAAR p-values (including STAAR-O) corresponding to each coding functional category of the given gene.
#' @references Li, Z., Li, X., et al. (2022). A framework for detecting
#' noncoding rare-variant associations of large-scale whole-genome sequencing
#' studies. \emph{Nature Methods}, \emph{19}(12), 1599-1611.
Expand All @@ -50,7 +50,7 @@
#' (\href{https://doi.org/10.1002/gepi.22188}{pub})
#' @export

Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds","missense","disruptive_missense","synonymous"),
Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds","missense","disruptive_missense","synonymous","ptv","ptv_ds"),
genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2,
method_cond=c("optimal","naive"),
QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
Expand Down Expand Up @@ -119,6 +119,28 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds","
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name)
}

if(category=="ptv")
{
results <- ptv_cond(chr,gene_name,genofile,obj_nullmodel,genes,
known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
method_cond=method_cond,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name)
}

if(category=="ptv_ds")
{
results <- ptv_ds_cond(chr,gene_name,genofile,obj_nullmodel,genes,
known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,
method_cond=method_cond,
QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,
Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name)
}



return(results)
}

Loading

0 comments on commit bca0e17

Please sign in to comment.