diff --git a/.travis.yml b/.travis.yml index 8373798..00f216a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,9 +12,7 @@ env: - _R_CHECK_TIMINGS_="0" ## get the timing information for the examples for all of your functions r: - - oldrel - release - - devel # do not build vignettes...takes too long and times out on travis r_build_args: --no-build-vignettes --no-manual diff --git a/DESCRIPTION b/DESCRIPTION index d6be4a4..96d0439 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ Imports: caTools, ggplot2, reshape2, - gridExtra, + gridExtra, rjags, coda, parallel, @@ -22,6 +22,6 @@ Imports: HiddenMarkov RoxygenNote: 6.0.1 Suggests: - knitr, - rmarkdown + knitr, + rmarkdown VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 3feeaef..4f66d4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,3 +24,10 @@ export(setMvFit) export(summarizeMatrix) export(trimSnps) exportClasses(HoneyBADGER) +import(ggplot2) +import(grDevices) +import(graphics) +import(gridExtra) +import(reshape2) +import(rjags) +import(stats) diff --git a/R/HoneyBADGER.R b/R/HoneyBADGER.R index de5e268..9d527ba 100644 --- a/R/HoneyBADGER.R +++ b/R/HoneyBADGER.R @@ -112,7 +112,9 @@ HoneyBADGER <- setRefClass( #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' hb <- HoneyBADGER$new() #' hb$setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' @@ -200,7 +202,9 @@ HoneyBADGER$methods( #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' hb <- HoneyBADGER$new() #' hb$setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' hb$plotGexpProfile() diff --git a/R/HoneyBADGER_allele.R b/R/HoneyBADGER_allele.R index 4bb8558..3659d00 100644 --- a/R/HoneyBADGER_allele.R +++ b/R/HoneyBADGER_allele.R @@ -7,6 +7,7 @@ #' @param n.sc.init SNP site coverage count matrix for single cells #' @param l.init SNP site alternate allele counts for bulk reference. If NULL, in silico bulk will be created from single cells. #' @param n.bulk.init SNP site coverage counts for bulk reference. If NULL, in silico bulk will be created from single cells. +#' @param filter Whether to filter SNPs to only putative hterozygous SNPs based on the het.deviance.threshold #' @param het.deviance.threshold Deviation from expected 0.5 heterozygous fraction #' @param min.cell Minimum number of cells a SNP must have coverage observed in #' @param n.cores Number of cores @@ -162,8 +163,8 @@ setAlleleMats=function(r.init, n.sc.init, l.init=NULL, n.bulk.init=NULL, filter= #' Maps snps to genes -#' -#' @name setGeneFactors +#' +#' @param snps SNP annotations #' @param txdb TxDb object (ex. TxDb.Hsapiens.UCSC.hg19.knownGene). #' @param fill SNPs mapping to genes not annotated in txdb will be given unique IDs #' @param verbose Verbosity @@ -198,10 +199,10 @@ setGeneFactors=function(snps, txdb, fill=TRUE, verbose=TRUE) { #' Plot allele profile #' -#' @param r.sub SNP lesser allele count matrix for single cells. If NULL, object's r.maf will be used -#' @param n.sc.sub SNP coverage count matrix for single cells. If NULL, object's n.sc will be used -#' @param l.sub SNP lesser allele count matrix for bulk refernece. If NULL, object's l.maf will be used -#' @param n.bulk.sub SNP coverage count matrix for bulk refernece. If NULL, object's n.bulk will be used +#' @param r.maf SNP lesser allele count matrix for single cells. +#' @param n.sc SNP coverage count matrix for single cells. +#' @param l.maf SNP lesser allele count matrix for bulk refernece. +#' @param n.bulk SNP coverage count matrix for bulk refernece. #' @param snps SNP annotations #' @param region Limit plotting to particular GenomicRanges regions #' @param chrs Limit plotting to select chromosomes. Default autosomes only. (default: paste0('chr', c(1:22))) @@ -215,10 +216,17 @@ setGeneFactors=function(snps, txdb, fill=TRUE, verbose=TRUE) { #' data(r) #' data(cov.sc) #' allele.mats <- setAlleleMats(r, cov.sc) -#' plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, widths=c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) +#' plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, +#' allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, +#' widths=c(249250621, 243199373, 198022430, 191154276, 180915260, +#' 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, +#' 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, +#' 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) #' #' @export #' +#' @import ggplot2 reshape2 gridExtra stats +#' plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=paste0('chr', c(1:22)), widths=NULL, cellOrder=NULL, filter=FALSE, max.ps=3, verbose=FALSE) { if(!is.null(region)) { overlap <- IRanges::findOverlaps(region, snps) @@ -282,9 +290,6 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p r.tot <- cbind(r.maf/n.sc, 'Bulk'=l.maf/n.bulk) n.tot <- cbind(n.sc, 'Bulk'=n.bulk) - require(ggplot2) - require(reshape2) - if(is.null(widths)) { widths <- rep(1, length(chrs)) } else if (widths[1]=='set'){ @@ -295,11 +300,11 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p plist <- lapply(chrs, function(chr) { vi <- grepl(paste0('^',chr,':'), rownames(r.tot)) - m <- melt(t(r.tot[vi,])) + m <- reshape2::melt(t(r.tot[vi,])) colnames(m) <- c('cell', 'snp', 'alt.frac') rownames(m) <- paste(m$cell, m$snp) m$alt.frac[is.nan(m$alt.frac)] <- NA - n <- melt(t(n.tot[vi,])) + n <- reshape2::melt(t(n.tot[vi,])) colnames(n) <- c('cell', 'snp', 'coverage') rownames(n) <- paste(n$cell, n$snp) n$coverage[n$coverage>30] <- 30 # max for visualization purposes @@ -308,27 +313,27 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p n$coverage[n$coverage==0] <- NA # if no coverage, just don't show dat <- cbind(m, coverage=n$coverage) - p <- ggplot(dat, aes(snp, cell)) + + p <- ggplot2::ggplot(dat, ggplot2::aes(snp, cell)) + ## geom_tile(alpha=0) + - geom_point(aes(colour = alt.frac, size = coverage), na.rm=TRUE) + - scale_size_continuous(range = c(0, max.ps)) + + ggplot2::geom_point(ggplot2::aes(colour = alt.frac, size = coverage), na.rm=TRUE) + + ggplot2::scale_size_continuous(range = c(0, max.ps)) + ## scale_colour_gradientn(colours = rainbow(10)) + - scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5) + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.x=element_blank(), - axis.title.x=element_blank(), - axis.ticks.x=element_blank(), - axis.text.y=element_blank(), - axis.title.y=element_blank(), - axis.ticks.y=element_blank(), + ggplot2::scale_colour_gradient2(mid="yellow", low = "turquoise", high = "red", midpoint=0.5) + + ggplot2::theme( + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.text.x=ggplot2::element_blank(), + axis.title.x=ggplot2::element_blank(), + axis.ticks.x=ggplot2::element_blank(), + axis.text.y=ggplot2::element_blank(), + axis.title.y=ggplot2::element_blank(), + axis.ticks.y=ggplot2::element_blank(), legend.position="none", - plot.margin=unit(c(0,0,0,0), "cm"), - panel.border = element_rect(fill = NA, linetype = "solid", colour = "black"), - plot.title = element_text(hjust = 0.5) - ) + labs(title = chr) + plot.margin=ggplot2::unit(c(0,0,0,0), "cm"), + panel.border = ggplot2::element_rect(fill = NA, linetype = "solid", colour = "black"), + plot.title = ggplot2::element_text(hjust = 0.5) + ) + ggplot2::labs(title = chr) ## theme( ## ## axis.text.x=element_text(angle=90,hjust=1,vjust=0.5,size=rel(0.5),lineheight=1), ## ## axis.text.y=element_blank(), @@ -341,7 +346,6 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p return(p) }) - require(gridExtra) do.call("grid.arrange", c(plist, list(ncol=length(plist), widths=widths))) } @@ -349,15 +353,16 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p #' Calculate posterior probability of CNVs using allele data #' -#' @param r.sub Optional matrix of alt allele count in single cells. If not provided, internal r.sc matrix is used. -#' @param n.sub Optional matrix of site coverage count in single cells. If not provided, internal n.sc matrix is used. -#' @param l.sub Optional vector of alt allele count in pooled single cells or bulk. If not provided, internal l vector is used. -#' @param n.bulk.sub Optional vector of site coverage count in pooled single cells or bulk. If not provided, internal n.bulk vector is used. +#' @param r.maf Matrix of alt allele count in single cells. +#' @param n.sc Matrix of site coverage count in single cells. +#' @param l.maf Vector of alt allele count in pooled single cells or bulk. +#' @param n.bulk Vector of site coverage count in pooled single cells or bulk. +#' @param snps SNP annotations +#' @param geneFactor Output of \code{\link{setGeneFactors}} #' @param region GenomicRanges region of interest such as expected CNV boundaries. #' @param filter Boolean for whether to filter out SNP sites with no coverage. (default: TRUE) #' @param pe Effective error rate to capture error from sequencing, etc. (default: 0.01) #' @param mono Rate of mono-allelic expression. (default: 0.7) -#' @param quiet Boolean of whether to suppress progress bar. (default: TRUE) #' @param verbose Verbosity(default: FALSE) #' #' @examples @@ -367,10 +372,14 @@ plotAlleleProfile=function(r.maf, n.sc, l.maf, n.bulk, snps, region=NULL, chrs=p #' library(TxDb.Hsapiens.UCSC.hg19.knownGene) #' geneFactor <- setGeneFactors(allele.mats$snps, TxDb.Hsapiens.UCSC.hg19.knownGene) #' ## test region known to be commonly deleted in glioblastoma -#' results <- calcAlleleCnvProb(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) +#' results <- calcAlleleCnvProb(allele.mats$r.maf, allele.mats$n.sc, +#' allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor, +#' region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) #' #' @export #' +#' @import rjags +#' calcAlleleCnvProb=function(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region=NULL, filter=FALSE, pe=0.1, mono=0.7, verbose=FALSE) { quiet = !verbose @@ -479,7 +488,6 @@ calcAlleleCnvProb=function(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region= if(verbose) { cat('Running model ... \n') } - require(rjags) # 4 random chains model <- rjags::jags.model(modelFile, data=data, n.chains=4, n.adapt=100, quiet=quiet) update(model, 100, progress.bar=ifelse(quiet,"none","text")) @@ -500,17 +508,18 @@ calcAlleleCnvProb=function(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region= #' Use HMM to identify potential CNV boundaries based on patterns of persistent allelic imbalance #' -#' @param r.sub Optional matrix of alt allele count in single cells. If not provided, internal r.sc matrix is used. -#' @param n.sub Optional matrix of site coverage count in single cells. If not provided, internal n.sc matrix is used. -#' @param l.sub Optional vector of alt allele count in pooled single cells or bulk. If not provided, internal l vector is used. -#' @param n.bulk.sub Optional vector of site coverage count in pooled single cells or bulk. If not provided, internal n.bulk vector is used. +#' @param r.maf Matrix of alt allele count in single cells. +#' @param n.sc Matrix of site coverage count in single cells. +#' @param l.maf Vector of alt allele count in pooled single cells or bulk. +#' @param n.bulk Vector of site coverage count in pooled single cells or bulk. +#' @param snps SNP annotations +#' @param geneFactor Output of \code{\link{setGeneFactors}} #' @param min.traverse Depth traversal to look for subclonal CNVs. Higher depth, potentially smaller subclones detectable. (default: 3) #' @param t HMM transition parameter. Higher number, more transitions. (default: 1e-6) #' @param pd Probability of lesser allele detection in deleted region (ie. due to error) #' @param pn Probability of lesser allele detection in neutral region (ie. 0.5 - error rate) #' @param min.num.snps Minimum number of snps in candidate CNV #' @param trim Trim boundary SNPs -#' @param init Boolean whether to initialize #' @param verbose Verbosity(default: FALSE) #' @param ... Additional parameters to pass to calcAlleleCnvProb #' @@ -520,13 +529,17 @@ calcAlleleCnvProb=function(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region= #' allele.mats <- setAlleleMats(r, cov.sc) #' library(TxDb.Hsapiens.UCSC.hg19.knownGene) #' geneFactor <- setGeneFactors(allele.mats$snps, TxDb.Hsapiens.UCSC.hg19.knownGene) -#' potentialCnvs <- calcAlleleCnvBoundaries(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor) +#' potentialCnvs <- calcAlleleCnvBoundaries(allele.mats$r.maf, allele.mats$n.sc, +#' allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor) #' ## visualize affected regions -#' plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, region=potentialCnvs$region) +#' plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, +#' allele.mats$n.bulk, allele.mats$snps, region=potentialCnvs$region) #' } #' #' @export #' +#' @import stats +#' calcAlleleCnvBoundaries=function(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, min.traverse=3, t=1e-6, pd=0.1, pn=0.45, min.num.snps=5, trim=0.1, verbose=FALSE, ...) { snps <- snps[rownames(r.maf)] diff --git a/R/HoneyBADGER_comb.R b/R/HoneyBADGER_comb.R index bde378d..5156ad8 100644 --- a/R/HoneyBADGER_comb.R +++ b/R/HoneyBADGER_comb.R @@ -5,13 +5,15 @@ #' #' @export #' -calcCombCnvProb=function(gexp.norm, genes, mvFit, m=0.15, r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region=NULL, filter=FALSE, pe=0.1, mono=0.7, n.iter=1000, quiet=FALSE, verbose=FALSE) { +calcCombCnvProb=function(gexp.norm, genes, mvFit, m=0.15, r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region=NULL, filter=FALSE, pe=0.1, mono=0.7, verbose=FALSE) { - geneFactor <- geneFactor[rownames(r.maf)] - snps <- snps[rownames(r.maf),] + quiet <- !verbose - genes <- genes[rownames(gexp.norm)] - mvFit <- mvFit[colnames(gexp.norm)] + geneFactor <- geneFactor[rownames(r.maf)] + snps <- snps[rownames(r.maf),] + + genes <- genes[rownames(gexp.norm)] + mvFit <- mvFit[colnames(gexp.norm)] gexp <- gexp.norm gos <- genes diff --git a/R/HoneyBADGER_gexp.R b/R/HoneyBADGER_gexp.R index 9c981ab..0cac87c 100644 --- a/R/HoneyBADGER_gexp.R +++ b/R/HoneyBADGER_gexp.R @@ -18,7 +18,9 @@ #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' #' @export @@ -103,6 +105,7 @@ setGexpMats=function(gexp.sc.init, gexp.ref.init, mart.obj, filter=TRUE, minMean #' @param gexp.norm Normalized gene expression matrix #' @param genes GRanges annotation of gene names and coordinates #' @param chrs Chromosomes to be plotted (default: paste0('chr', c(1:22, 'X'))) +#' @param region Optional GenomicRanges region of interest such as expected CNV boundaries. (default: NULL) #' @param window.size Window size for sliding window mean. Must be odd number. (default: 101) #' @param zlim Limit for plotting heatmap (default: c(-2,2)) #' @param widths Widths of chromosomes in plot. If 'set' will depend on number of genes in region. Else will be equal. @@ -112,14 +115,22 @@ setGexpMats=function(gexp.sc.init, gexp.ref.init, mart.obj, filter=TRUE, minMean #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' ##Set by known chromosome size widths: #' ##https://genome.ucsc.edu/goldenpath/help/hg19.chrom.sizes -#' gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes, widths=c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) +#' gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes, +#' widths=c(249250621, 243199373, 198022430, 191154276, 180915260, +#' 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, +#' 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, +#' 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) #' #' @export #' +#' @import grDevices graphics stats +#' plotGexpProfile=function(gexp.norm, genes, chrs=paste0('chr', c(1:22)), region=NULL, window.size=101, zlim=c(-2,2), cellOrder=NULL, widths=NULL) { genes <- genes[rownames(gexp.norm)] @@ -211,7 +222,9 @@ plotGexpProfile=function(gexp.norm, genes, chrs=paste0('chr', c(1:22)), region=N #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' mvFit <- setMvFit(gexp.mats$gexp.norm) #' @@ -239,10 +252,8 @@ setMvFit=function(gexp.norm, num.genes = seq(5, 100, by=5), rep = 50, plot=FALSE if(plot) { par(mfrow=c(1,3), mar=rep(2,4)) perf.test <- function(mat) { - require(ggplot2) - require(reshape2) - m <- melt(mat) - p <- ggplot(m) + geom_boxplot(aes(x = factor(Var2), y = value)) + m <- reshape2::melt(mat) + p <- ggplot2::ggplot(m) + ggplot2::geom_boxplot(ggplot2::aes(x = factor(Var2), y = value)) return(p) } perf.test(mean.comp) @@ -290,7 +301,9 @@ setMvFit=function(gexp.norm, num.genes = seq(5, 100, by=5), rep = 50, plot=FALSE #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' dev <- setGexpDev(gexp.mats$gexp.norm) #' @@ -333,13 +346,18 @@ setGexpDev=function(gexp.norm, alpha=0.25, n=100, seed=0, plot=FALSE, verbose=FA #' data(gexp) #' data(ref) #' require(biomaRt) ## for gene coordinates -#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +#' mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", +#' dataset = 'hsapiens_gene_ensembl', +#' host = "jul2015.archive.ensembl.org") #' gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) #' mvFit <- setMvFit(gexp.mats$gexp.norm) -#' results <- calcGexpCnvProb(gexp.mats$gexp.norm, gexp.mats$genes, mvFit, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) +#' results <- calcGexpCnvProb(gexp.mats$gexp.norm, gexp.mats$genes, +#' mvFit, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) #' #' @export #' +#' @import rjags +#' calcGexpCnvProb=function(gexp.norm, genes, mvFit, m=0.15, region=NULL, verbose=FALSE) { gexp <- gexp.norm gos <- genes[rownames(gexp.norm)] @@ -400,8 +418,7 @@ calcGexpCnvProb=function(gexp.norm, genes, mvFit, m=0.15, region=NULL, verbose=F list(S = rep(0, ncol(gexp)), dd = 1), list(S = rep(1, ncol(gexp)), dd = 1) ) - require(rjags) - model <- jags.model(modelFile, data=data, inits=inits, n.chains=4, n.adapt=100, quiet=quiet) + model <- rjags::jags.model(modelFile, data=data, inits=inits, n.chains=4, n.adapt=100, quiet=quiet) update(model, 100, progress.bar=ifelse(quiet,"none","text")) parameters <- c('S', 'dd') @@ -430,15 +447,18 @@ calcGexpCnvProb=function(gexp.norm, genes, mvFit, m=0.15, region=NULL, verbose=F #' #' @param gexp.norm Normalized gene expression matrix #' @param genes GRanges annotation of gene names and coordinates +#' @param m Expression magnitude deviation needed to distinguish CNV from neutral #' @param chrs List of chromosome names. Genes not mapping to these chromosomes will be excluded. Default autosomes only: paste0('chr', c(1:22)) #' @param min.traverse Depth traversal to look for subclonal CNVs. Higher depth, potentially smaller subclones detectable. (default: 2) #' @param min.num.genes Minimum number of genes within a CNV. (default: 3) +#' @param trim Trim boundary SNPs #' @param t HMM transition parameter. Higher number, more transitions. (default: 1e-6) -#' @param init Initialize recursion (default: FALSE) #' @param verbose Verbosity (default: FALSE) #' #' @export #' +#' @import stats +#' calcGexpCnvBoundaries=function(gexp.norm, genes, m=0.15, chrs=paste0('chr', c(1:22)), min.traverse=3, t=1e-6, min.num.genes=3, trim=0.1, verbose=FALSE) { genes <- genes[rownames(gexp.norm)] diff --git a/R/prepareData_cxue.R b/R/clonal_deletion_detection.R similarity index 99% rename from R/prepareData_cxue.R rename to R/clonal_deletion_detection.R index f087cc5..526277d 100644 --- a/R/prepareData_cxue.R +++ b/R/clonal_deletion_detection.R @@ -1,5 +1,5 @@ #' @author Catherine Xue -#' Functions for assessing clonal deletions +#' Functions for assessing for clonal deletions #' Helper function to get total allele counts and coverage across all cells at each diff --git a/R/prepareData.R b/R/prepareData.R index 6d1ee4f..d68eb6e 100644 --- a/R/prepareData.R +++ b/R/prepareData.R @@ -29,7 +29,7 @@ #' } #' #' @export -#' +#' getAlleleCount <- function (gr, bamFile, indexFile, verbose = FALSE) { df <- data.frame(seqnames(gr), ranges(gr)) names <- paste(df$seqnames.gr., paste(df$start, df$end, sep='-'), sep=':') @@ -94,7 +94,7 @@ getAlleleCount <- function (gr, bamFile, indexFile, verbose = FALSE) { #' } #' #' @export -#' +#' getCellAlleleCount <- function (gr, bamFile, indexFile, cellBarcodes, verbose = FALSE, n.cores=1) { df <- data.frame(seqnames(gr), ranges(gr)) names <- paste(df$seqnames.gr., paste(df$start, df$end, sep='-'), sep=':') @@ -310,6 +310,8 @@ getSnpMats <- function(snps, bamFiles, indexFiles, n.cores=1, verbose=FALSE) { #' @param indexFiles list of bai index file #' @param n.cores number of cores #' @param verbose Boolean of whether or not to print progress and info +#' @param barcodes Cell barcodes +#' #' @return #' refCount reference allele count matrix for each cell and each position of interest #' altCount alternative allele count matrix for each cell and each position of interest diff --git a/man/HoneyBADGER_plotGexpProfile.Rd b/man/HoneyBADGER_plotGexpProfile.Rd index 87a1ec5..0140c28 100644 --- a/man/HoneyBADGER_plotGexpProfile.Rd +++ b/man/HoneyBADGER_plotGexpProfile.Rd @@ -27,7 +27,9 @@ Plot gene expression profile data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") hb <- HoneyBADGER$new() hb$setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) hb$plotGexpProfile() diff --git a/man/HoneyBADGER_setGexpMats.Rd b/man/HoneyBADGER_setGexpMats.Rd index 4e34161..a8180f9 100644 --- a/man/HoneyBADGER_setGexpMats.Rd +++ b/man/HoneyBADGER_setGexpMats.Rd @@ -31,7 +31,9 @@ Set gene expression matrices, normalizes, and maps genes to genomic coordinates data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") hb <- HoneyBADGER$new() hb$setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) diff --git a/man/calcAlleleCnvBoundaries.Rd b/man/calcAlleleCnvBoundaries.Rd index 40adfbd..509e90e 100644 --- a/man/calcAlleleCnvBoundaries.Rd +++ b/man/calcAlleleCnvBoundaries.Rd @@ -9,6 +9,18 @@ calcAlleleCnvBoundaries(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, trim = 0.1, verbose = FALSE, ...) } \arguments{ +\item{r.maf}{Matrix of alt allele count in single cells.} + +\item{n.sc}{Matrix of site coverage count in single cells.} + +\item{l.maf}{Vector of alt allele count in pooled single cells or bulk.} + +\item{n.bulk}{Vector of site coverage count in pooled single cells or bulk.} + +\item{snps}{SNP annotations} + +\item{geneFactor}{Output of \code{\link{setGeneFactors}}} + \item{min.traverse}{Depth traversal to look for subclonal CNVs. Higher depth, potentially smaller subclones detectable. (default: 3)} \item{t}{HMM transition parameter. Higher number, more transitions. (default: 1e-6)} @@ -24,16 +36,6 @@ calcAlleleCnvBoundaries(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, \item{verbose}{Verbosity(default: FALSE)} \item{...}{Additional parameters to pass to calcAlleleCnvProb} - -\item{r.sub}{Optional matrix of alt allele count in single cells. If not provided, internal r.sc matrix is used.} - -\item{n.sub}{Optional matrix of site coverage count in single cells. If not provided, internal n.sc matrix is used.} - -\item{l.sub}{Optional vector of alt allele count in pooled single cells or bulk. If not provided, internal l vector is used.} - -\item{n.bulk.sub}{Optional vector of site coverage count in pooled single cells or bulk. If not provided, internal n.bulk vector is used.} - -\item{init}{Boolean whether to initialize} } \description{ Use HMM to identify potential CNV boundaries based on patterns of persistent allelic imbalance @@ -45,9 +47,11 @@ data(cov.sc) allele.mats <- setAlleleMats(r, cov.sc) library(TxDb.Hsapiens.UCSC.hg19.knownGene) geneFactor <- setGeneFactors(allele.mats$snps, TxDb.Hsapiens.UCSC.hg19.knownGene) -potentialCnvs <- calcAlleleCnvBoundaries(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor) +potentialCnvs <- calcAlleleCnvBoundaries(allele.mats$r.maf, allele.mats$n.sc, + allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor) ## visualize affected regions -plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, region=potentialCnvs$region) +plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, + allele.mats$n.bulk, allele.mats$snps, region=potentialCnvs$region) } } diff --git a/man/calcAlleleCnvProb.Rd b/man/calcAlleleCnvProb.Rd index 9f39bc5..e1985d2 100644 --- a/man/calcAlleleCnvProb.Rd +++ b/man/calcAlleleCnvProb.Rd @@ -8,25 +8,27 @@ calcAlleleCnvProb(r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region = NULL, filter = FALSE, pe = 0.1, mono = 0.7, verbose = FALSE) } \arguments{ -\item{region}{GenomicRanges region of interest such as expected CNV boundaries.} +\item{r.maf}{Matrix of alt allele count in single cells.} -\item{filter}{Boolean for whether to filter out SNP sites with no coverage. (default: TRUE)} +\item{n.sc}{Matrix of site coverage count in single cells.} -\item{pe}{Effective error rate to capture error from sequencing, etc. (default: 0.01)} +\item{l.maf}{Vector of alt allele count in pooled single cells or bulk.} -\item{mono}{Rate of mono-allelic expression. (default: 0.7)} +\item{n.bulk}{Vector of site coverage count in pooled single cells or bulk.} -\item{verbose}{Verbosity(default: FALSE)} +\item{snps}{SNP annotations} -\item{r.sub}{Optional matrix of alt allele count in single cells. If not provided, internal r.sc matrix is used.} +\item{geneFactor}{Output of \code{\link{setGeneFactors}}} -\item{n.sub}{Optional matrix of site coverage count in single cells. If not provided, internal n.sc matrix is used.} +\item{region}{GenomicRanges region of interest such as expected CNV boundaries.} -\item{l.sub}{Optional vector of alt allele count in pooled single cells or bulk. If not provided, internal l vector is used.} +\item{filter}{Boolean for whether to filter out SNP sites with no coverage. (default: TRUE)} -\item{n.bulk.sub}{Optional vector of site coverage count in pooled single cells or bulk. If not provided, internal n.bulk vector is used.} +\item{pe}{Effective error rate to capture error from sequencing, etc. (default: 0.01)} -\item{quiet}{Boolean of whether to suppress progress bar. (default: TRUE)} +\item{mono}{Rate of mono-allelic expression. (default: 0.7)} + +\item{verbose}{Verbosity(default: FALSE)} } \description{ Calculate posterior probability of CNVs using allele data @@ -38,6 +40,8 @@ allele.mats <- setAlleleMats(r, cov.sc) library(TxDb.Hsapiens.UCSC.hg19.knownGene) geneFactor <- setGeneFactors(allele.mats$snps, TxDb.Hsapiens.UCSC.hg19.knownGene) ## test region known to be commonly deleted in glioblastoma -results <- calcAlleleCnvProb(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) +results <- calcAlleleCnvProb(allele.mats$r.maf, allele.mats$n.sc, + allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, geneFactor, + region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) } diff --git a/man/calcCombCnvProb.Rd b/man/calcCombCnvProb.Rd index 706c5fc..1e6b04e 100644 --- a/man/calcCombCnvProb.Rd +++ b/man/calcCombCnvProb.Rd @@ -6,7 +6,7 @@ \usage{ calcCombCnvProb(gexp.norm, genes, mvFit, m = 0.15, r.maf, n.sc, l.maf, n.bulk, snps, geneFactor, region = NULL, filter = FALSE, pe = 0.1, mono = 0.7, - n.iter = 1000, quiet = FALSE, verbose = FALSE) + verbose = FALSE) } \arguments{ \item{gexp.norm}{Normalized gene expression matrix.} @@ -17,6 +17,18 @@ calcCombCnvProb(gexp.norm, genes, mvFit, m = 0.15, r.maf, n.sc, l.maf, n.bulk, \item{m}{Expression deviation due to copy number change (default: 0.15)} +\item{r.maf}{Matrix of alt allele count in single cells.} + +\item{n.sc}{Matrix of site coverage count in single cells.} + +\item{l.maf}{Vector of alt allele count in pooled single cells or bulk.} + +\item{n.bulk}{Vector of site coverage count in pooled single cells or bulk.} + +\item{snps}{SNP annotations} + +\item{geneFactor}{Output of \code{\link{setGeneFactors}}} + \item{region}{Optional GenomicRanges region of interest such as expected CNV boundaries. (default: NULL)} \item{filter}{Boolean for whether to filter out SNP sites with no coverage. (default: TRUE)} @@ -25,8 +37,6 @@ calcCombCnvProb(gexp.norm, genes, mvFit, m = 0.15, r.maf, n.sc, l.maf, n.bulk, \item{mono}{Rate of mono-allelic expression. (default: 0.7)} -\item{quiet}{Boolean of whether to suppress progress bar. (default: TRUE)} - \item{verbose}{Verbosity (default: FALSE)} } \description{ diff --git a/man/calcGexpCnvBoundaries.Rd b/man/calcGexpCnvBoundaries.Rd index 300be83..e051f1c 100644 --- a/man/calcGexpCnvBoundaries.Rd +++ b/man/calcGexpCnvBoundaries.Rd @@ -13,6 +13,8 @@ calcGexpCnvBoundaries(gexp.norm, genes, m = 0.15, chrs = paste0("chr", \item{genes}{GRanges annotation of gene names and coordinates} +\item{m}{Expression magnitude deviation needed to distinguish CNV from neutral} + \item{chrs}{List of chromosome names. Genes not mapping to these chromosomes will be excluded. Default autosomes only: paste0('chr', c(1:22))} \item{min.traverse}{Depth traversal to look for subclonal CNVs. Higher depth, potentially smaller subclones detectable. (default: 2)} @@ -21,9 +23,9 @@ calcGexpCnvBoundaries(gexp.norm, genes, m = 0.15, chrs = paste0("chr", \item{min.num.genes}{Minimum number of genes within a CNV. (default: 3)} -\item{verbose}{Verbosity (default: FALSE)} +\item{trim}{Trim boundary SNPs} -\item{init}{Initialize recursion (default: FALSE)} +\item{verbose}{Verbosity (default: FALSE)} } \description{ HMM to identify CNV boundaries using normalized gene expression data diff --git a/man/calcGexpCnvProb.Rd b/man/calcGexpCnvProb.Rd index 50c70a1..da39164 100644 --- a/man/calcGexpCnvProb.Rd +++ b/man/calcGexpCnvProb.Rd @@ -27,9 +27,12 @@ Calculate posterior probability of CNVs using normalized expression data data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) mvFit <- setMvFit(gexp.mats$gexp.norm) -results <- calcGexpCnvProb(gexp.mats$gexp.norm, gexp.mats$genes, mvFit, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) +results <- calcGexpCnvProb(gexp.mats$gexp.norm, gexp.mats$genes, + mvFit, region=GenomicRanges::GRanges('chr10', IRanges::IRanges(0,1e9)), verbose=TRUE) } diff --git a/man/getHetRate.Rd b/man/getHetRate.Rd index ff04b69..fb37319 100644 --- a/man/getHetRate.Rd +++ b/man/getHetRate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prepareData_cxue.R +% Please edit documentation in R/clonal_deletion_detection.R \name{getHetRate} \alias{getHetRate} \title{Helper function to get het rate from allele counts at a list of common variant diff --git a/man/getSnpMats10X.Rd b/man/getSnpMats10X.Rd index f1278c6..dfd1b98 100644 --- a/man/getSnpMats10X.Rd +++ b/man/getSnpMats10X.Rd @@ -14,6 +14,8 @@ getSnpMats10X(snps, bamFiles, indexFiles, barcodes, n.cores = 1, \item{indexFiles}{list of bai index file} +\item{barcodes}{Cell barcodes} + \item{n.cores}{number of cores} \item{verbose}{Boolean of whether or not to print progress and info} diff --git a/man/isHet.Rd b/man/isHet.Rd index f0ac9a7..444a3b9 100644 --- a/man/isHet.Rd +++ b/man/isHet.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prepareData_cxue.R +% Please edit documentation in R/clonal_deletion_detection.R \name{isHet} \alias{isHet} \title{Helper function to determine heterzogosity at a position from allele counts diff --git a/man/isHetAlt.Rd b/man/isHetAlt.Rd index 1394dae..5632e10 100644 --- a/man/isHetAlt.Rd +++ b/man/isHetAlt.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prepareData_cxue.R +% Please edit documentation in R/clonal_deletion_detection.R \name{isHetAlt} \alias{isHetAlt} \title{Alternate helper function to determine heterzogosity at a position from allele diff --git a/man/plotAlleleProfile.Rd b/man/plotAlleleProfile.Rd index ee4eafe..0f47101 100644 --- a/man/plotAlleleProfile.Rd +++ b/man/plotAlleleProfile.Rd @@ -9,6 +9,14 @@ plotAlleleProfile(r.maf, n.sc, l.maf, n.bulk, snps, region = NULL, filter = FALSE, max.ps = 3, verbose = FALSE) } \arguments{ +\item{r.maf}{SNP lesser allele count matrix for single cells.} + +\item{n.sc}{SNP coverage count matrix for single cells.} + +\item{l.maf}{SNP lesser allele count matrix for bulk refernece.} + +\item{n.bulk}{SNP coverage count matrix for bulk refernece.} + \item{snps}{SNP annotations} \item{region}{Limit plotting to particular GenomicRanges regions} @@ -24,14 +32,6 @@ plotAlleleProfile(r.maf, n.sc, l.maf, n.bulk, snps, region = NULL, \item{max.ps}{Maximum point size for plot.} \item{verbose}{Verbosity} - -\item{r.sub}{SNP lesser allele count matrix for single cells. If NULL, object's r.maf will be used} - -\item{n.sc.sub}{SNP coverage count matrix for single cells. If NULL, object's n.sc will be used} - -\item{l.sub}{SNP lesser allele count matrix for bulk refernece. If NULL, object's l.maf will be used} - -\item{n.bulk.sub}{SNP coverage count matrix for bulk refernece. If NULL, object's n.bulk will be used} } \description{ Plot allele profile @@ -40,6 +40,11 @@ Plot allele profile data(r) data(cov.sc) allele.mats <- setAlleleMats(r, cov.sc) -plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, widths=c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) +plotAlleleProfile(allele.mats$r.maf, allele.mats$n.sc, + allele.mats$l.maf, allele.mats$n.bulk, allele.mats$snps, + widths=c(249250621, 243199373, 198022430, 191154276, 180915260, + 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, + 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, + 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) } diff --git a/man/plotGexpProfile.Rd b/man/plotGexpProfile.Rd index b398116..947ad4c 100644 --- a/man/plotGexpProfile.Rd +++ b/man/plotGexpProfile.Rd @@ -15,6 +15,8 @@ plotGexpProfile(gexp.norm, genes, chrs = paste0("chr", c(1:22)), \item{chrs}{Chromosomes to be plotted (default: paste0('chr', c(1:22, 'X')))} +\item{region}{Optional GenomicRanges region of interest such as expected CNV boundaries. (default: NULL)} + \item{window.size}{Window size for sliding window mean. Must be odd number. (default: 101)} \item{zlim}{Limit for plotting heatmap (default: c(-2,2))} @@ -30,10 +32,16 @@ Plot gene expression profile data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) ##Set by known chromosome size widths: ##https://genome.ucsc.edu/goldenpath/help/hg19.chrom.sizes -gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes, widths=c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) +gexp.plot <- plotGexpProfile(gexp.mats$gexp.norm, gexp.mats$genes, + widths=c(249250621, 243199373, 198022430, 191154276, 180915260, + 171115067, 159138663, 146364022, 141213431, 135534747, 135006516, + 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, + 78077248, 59128983, 63025520, 51304566, 48129895)/1e7) } diff --git a/man/setAlleleMats.Rd b/man/setAlleleMats.Rd index 0d01492..4549114 100644 --- a/man/setAlleleMats.Rd +++ b/man/setAlleleMats.Rd @@ -18,6 +18,8 @@ setAlleleMats(r.init, n.sc.init, l.init = NULL, n.bulk.init = NULL, \item{n.bulk.init}{SNP site coverage counts for bulk reference. If NULL, in silico bulk will be created from single cells.} +\item{filter}{Whether to filter SNPs to only putative hterozygous SNPs based on the het.deviance.threshold} + \item{het.deviance.threshold}{Deviation from expected 0.5 heterozygous fraction} \item{min.cell}{Minimum number of cells a SNP must have coverage observed in} diff --git a/man/setGeneFactors.Rd b/man/setGeneFactors.Rd index 7a4c40d..084ba80 100644 --- a/man/setGeneFactors.Rd +++ b/man/setGeneFactors.Rd @@ -7,6 +7,8 @@ setGeneFactors(snps, txdb, fill = TRUE, verbose = TRUE) } \arguments{ +\item{snps}{SNP annotations} + \item{txdb}{TxDb object (ex. TxDb.Hsapiens.UCSC.hg19.knownGene).} \item{fill}{SNPs mapping to genes not annotated in txdb will be given unique IDs} diff --git a/man/setGexpDev.Rd b/man/setGexpDev.Rd index 1068e50..3b9a467 100644 --- a/man/setGexpDev.Rd +++ b/man/setGexpDev.Rd @@ -27,7 +27,9 @@ Set needed absolute gene expression deviance to be able to distinguish neutral f data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) dev <- setGexpDev(gexp.mats$gexp.norm) diff --git a/man/setGexpMats.Rd b/man/setGexpMats.Rd index bc414e3..647ae83 100644 --- a/man/setGexpMats.Rd +++ b/man/setGexpMats.Rd @@ -39,7 +39,9 @@ Set gene expression matrices, normalizes, and maps genes to genomic coordinates data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) } diff --git a/man/setMvFit.Rd b/man/setMvFit.Rd index fffb37b..c485e28 100644 --- a/man/setMvFit.Rd +++ b/man/setMvFit.Rd @@ -25,7 +25,9 @@ Model expected gene expression variance as a function of number of genes data(gexp) data(ref) require(biomaRt) ## for gene coordinates -mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org") +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") gexp.mats <- setGexpMats(gexp, ref, mart.obj, filter=FALSE, scale=FALSE) mvFit <- setMvFit(gexp.mats$gexp.norm) diff --git a/man/trimSnps.Rd b/man/trimSnps.Rd index 93a2b99..b330fee 100644 --- a/man/trimSnps.Rd +++ b/man/trimSnps.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prepareData_cxue.R +% Please edit documentation in R/clonal_deletion_detection.R \name{trimSnps} \alias{trimSnps} \title{Function to filter out SNPs from ExAC database}