Skip to content

Commit

Permalink
Dev 053120 (GreenleafLab#242)
Browse files Browse the repository at this point in the history
* update error meesages in RNA integration

* update motif names and gene score matrix documentation

* bugfix positions

* attempt to error handle co-accessibiltiy error

* add cellsToUse option to addCoAccessibility

* bug fix args chromSizes

* bugfix plotPeak2GeneHeatmap

* bugfix bigwig not being normalized

* add tryCatch to endLogging

* add pvalue to output of differential testing

* updates
  • Loading branch information
jgranja24 authored Jul 13, 2020
1 parent 5f855d7 commit ddcaae4
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 39 deletions.
Binary file modified .DS_Store
Binary file not shown.
20 changes: 12 additions & 8 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -391,16 +391,20 @@ loadArchRProject <- function(
#Postions
if(!is.null(ArchRProj@peakAnnotation[[i]]$Positions)){

PositionsNew <- gsub(outputDir, outputDirNew, ArchRProj@peakAnnotation[[i]]$Positions)
if(!all(file.exists(PositionsNew))){
if(force){
keepAnno[i] <- FALSE
message("Positions for peakAnnotation do not exist in saved ArchRProject!")
}else{
stop("Positions for peakAnnotation do not exist in saved ArchRProject!")
if(tolower(ArchRProj@peakAnnotation[[i]]$Positions) != "none"){

PositionsNew <- gsub(outputDir, outputDirNew, ArchRProj@peakAnnotation[[i]]$Positions)
if(!all(file.exists(PositionsNew))){
if(force){
keepAnno[i] <- FALSE
message("Positions for peakAnnotation do not exist in saved ArchRProject!")
}else{
stop("Positions for peakAnnotation do not exist in saved ArchRProject!")
}
}
ArchRProj@peakAnnotation[[i]]$Positions <- PositionsNew

}
ArchRProj@peakAnnotation[[i]]$Positions <- PositionsNew

}

Expand Down
38 changes: 36 additions & 2 deletions R/AnnotationPeaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ addMotifAnnotations <- function(
name = "Motif",
species = NULL,
collection = "CORE",
motifPWMs = NULL,
cutOff = 5e-05,
width = 7,
version = 2,
Expand All @@ -263,7 +264,7 @@ addMotifAnnotations <- function(
){

.validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj"))
.validInput(input = motifSet, name = "motifSet", valid = c("character"))
.validInput(input = motifSet, name = "motifSet", valid = c("character", "null"))
.validInput(input = name, name = "name", valid = c("character"))
.validInput(input = species, name = "species", valid = c("character", "null"))
.validInput(input = collection, name = "collection", valid = c("character", "null"))
Expand All @@ -272,6 +273,17 @@ addMotifAnnotations <- function(
.validInput(input = force, name = "force", valid = c("boolean"))
.validInput(input = logFile, name = "logFile", valid = c("character"))

if(!is.null(motifPWMs)){
if(!is(motifPWMs, "PWMatrixList")){
stop("User Supplied motifPWMS must be a PWMatrixList!")
}
motifSet <- "Custom"
}

if(is.null(motifSet)){
stop("Must provide motifSet or motifPWMs!")
}

.requirePackage("motifmatchr", installInfo='BiocManager::install("motifmatchr")')

tstart <- Sys.time()
Expand Down Expand Up @@ -389,6 +401,12 @@ addMotifAnnotations <- function(
motifs <- obj$motifs
motifSummary <- obj$motifSummary

}else if(tolower(motifSet)=="custom"){

obj <- NULL
motifs <- motifPWMs
motifSummary <- NULL

}else{

stop("Error MotifSet Not Recognized!")
Expand Down Expand Up @@ -501,17 +519,33 @@ addMotifAnnotations <- function(

motifNames <- lapply(seq_along(motifs), function(x){
namex <- make.names(motifs[[x]]@name)
if(grepl("LINE", namex)){
splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
}
if(substr(namex,nchar(namex),nchar(namex))=="."){
namex <- substr(namex,1,nchar(namex)-1)
}
namex <- paste0(namex, "_", x)
namex
}) %>% unlist(.)

motifNames2 <- lapply(seq_along(motifs), function(x){
namex <- make.names(motifs[[x]]@name)
if(grepl("LINE", namex)){
splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
}
if(substr(namex,nchar(namex),nchar(namex))=="."){
namex <- substr(namex,1,nchar(namex)-1)
}
namex
}) %>% unlist(.)

motifDF <- lapply(seq_along(motifs), function(x){
df <- data.frame(
row.names = motifNames[x],
name = motifs[[x]]@name[[1]],
name = motifNames2[[x]],
ID = motifs[[x]]@ID,
strand = motifs[[x]]@strand,
stringsAsFactors = FALSE
Expand Down
7 changes: 4 additions & 3 deletions R/GroupExport.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ getGroupBW <- function(
ArrowFiles <- getArrowFiles(ArchRProj)
Groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = TRUE)

if(tolower(normMethod) %in% c("ReadsInTSS", "ReadsInPromoter", "nFrags")){
if(tolower(normMethod) %in% tolower(c("ReadsInTSS", "ReadsInPromoter", "nFrags"))){
normBy <- getCellColData(ArchRProj = ArchRProj, select = normMethod)
}else{
normBy <- NULL
Expand Down Expand Up @@ -292,7 +292,8 @@ getGroupBW <- function(

#Cells
cellGroupi <- cellGroups[[i]]

#print(sum(normBy[cellGroupi, 1]))

#Bigwig File!
covFile <- file.path(bwDir, paste0(make.names(names(cellGroups)[i]), "-TileSize-",tileSize,"-normMethod-",normMethod,"-ArchR.bw"))
rmf <- .suppressAll(file.remove(covFile))
Expand Down Expand Up @@ -345,7 +346,7 @@ getGroupBW <- function(
tilesk$reads <- mat

if(tolower(normMethod) %in% c("ReadsInTSS", "ReadsInPromoter", "nFrags")){
tilesk$reads <- tilesk$reads * 10^6 / sum(normBy[cellGroupi, 1])
tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1])
}else if(tolower(normMethod) %in% c("nCells")){
tilesk$reads <- tilesk$reads / length(cellGroupi)
}
Expand Down
15 changes: 12 additions & 3 deletions R/IntegrativeAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,7 @@ correlateTrajectories <- function(
#' `reducedDims` were originally created during dimensionality reduction. This idea was introduced by Timothy Stuart.
#' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to
#' sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis.
#' @param cellsToUse A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells.
#' @param k The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses.
#' @param knnIteration The number of k-nearest neighbor groupings to test for passing the supplied `overlapCutoff`.
#' @param overlapCutoff The maximum allowable overlap between the current group and all previous groups to permit the current group be
Expand All @@ -688,6 +689,7 @@ addCoAccessibility <- function(
dimsToUse = 1:30,
scaleDims = NULL,
corCutOff = 0.75,
cellsToUse = NULL,
k = 100,
knnIteration = 500,
overlapCutoff = 0.8,
Expand All @@ -705,6 +707,7 @@ addCoAccessibility <- function(
.validInput(input = dimsToUse, name = "dimsToUse", valid = c("numeric", "null"))
.validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null"))
.validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null"))
.validInput(input = cellsToUse, name = "cellsToUse", valid = c("character", "null"))
.validInput(input = k, name = "k", valid = c("integer"))
.validInput(input = knnIteration, name = "knnIteration", valid = c("integer"))
.validInput(input = overlapCutoff, name = "overlapCutoff", valid = c("numeric"))
Expand All @@ -726,6 +729,9 @@ addCoAccessibility <- function(

#Get Reduced Dims
rD <- getReducedDims(ArchRProj, reducedDims = reducedDims, corCutOff = corCutOff, dimsToUse = dimsToUse)
if(!is.null(cellsToUse)){
rD <- rD[cellsToUse, ,drop=FALSE]
}

#Subsample
idx <- sample(seq_len(nrow(rD)), knnIteration, replace = !nrow(rD) >= knnIteration)
Expand Down Expand Up @@ -762,7 +768,7 @@ addCoAccessibility <- function(
o$seqnames <- seqnames(peakSet)[o[,1]]
o$idx1 <- peakSet$idx[o[,1]]
o$idx2 <- peakSet$idx[o[,2]]
o$correlation <- NA
o$correlation <- -999

#Peak Matrix ColSums
cS <- .getColSums(getArrowFiles(ArchRProj), chri, verbose = FALSE, useMatrix = "PeakMatrix")
Expand Down Expand Up @@ -795,7 +801,10 @@ addCoAccessibility <- function(

#Correlations
idx <- BiocGenerics::which(o$seqnames==chri[x])
o[idx,]$correlation <- rowCorCpp(idxX = o[idx,]$idx1, idxY = o[idx,]$idx2, X = as.matrix(groupMat), Y = as.matrix(groupMat))
corVals <- rowCorCpp(idxX = o[idx,]$idx1, idxY = o[idx,]$idx2, X = as.matrix(groupMat), Y = as.matrix(groupMat))
.logThis(head(corVals), paste0("SubsetCorVals-", x), logFile = logFile)

o[idx,]$correlation <- as.numeric(corVals)

.logThis(groupMat, paste0("SubsetGroupMat-", x), logFile = logFile)
.logThis(o[idx,], paste0("SubsetCoA-", x), logFile = logFile)
Expand Down Expand Up @@ -1425,7 +1434,7 @@ plotPeak2GeneHeatmap <- function(
KNNx <- KNNList[[x]]
names(sort(table(ccd[KNNx, 1, drop = TRUE]), decreasing = TRUE))[1]
}) %>% unlist
cD <- DataFrame(row.names=paste0("K", seq_len(ncol(mATAC))), groupBy = KNNGroups)
cD <- DataFrame(row.names=paste0("K_", seq_len(ncol(mATAC))), groupBy = KNNGroups)
pal <- paletteDiscrete(values=gtools::mixedsort(unique(ccd[,1])))
if(!is.null(palGroup)){
pal[names(palGroup)[names(palGroup) %in% names(pal)]] <- palGroup[names(palGroup) %in% names(pal)]
Expand Down
21 changes: 12 additions & 9 deletions R/LoggerUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -547,15 +547,18 @@ createLogFile <- function(
}

rL <- readLines(logFile)
t1 <- gsub("Start Time : ","", grep("Start Time", rL, ignore.case = TRUE, value = TRUE))
mn <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "mins"))
hr <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "hours"))
cat("\n------- Completed\n\n", file = logFile, append = TRUE)
cat(paste0("End Time : ",Sys.time(),"\n"), file = logFile, append = TRUE)
cat(paste0("Elapsed Time Minutes = ", mn), file = logFile, append = TRUE)
cat(paste0("\nElapsed Time Hours = ", hr), file = logFile, append = TRUE)
cat("\n\n-------\n\n\n\n", file = logFile, append = TRUE)
message("ArchR logging successful to : ", logFile)
o <- tryCatch({
t1 <- gsub("Start Time : ","", grep("Start Time", rL, ignore.case = TRUE, value = TRUE))
mn <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "mins"))
hr <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "hours"))
cat("\n------- Completed\n\n", file = logFile, append = TRUE)
cat(paste0("End Time : ",Sys.time(),"\n"), file = logFile, append = TRUE)
cat(paste0("Elapsed Time Minutes = ", mn), file = logFile, append = TRUE)
cat(paste0("\nElapsed Time Hours = ", hr), file = logFile, append = TRUE)
cat("\n\n-------\n\n\n\n", file = logFile, append = TRUE)
message("ArchR logging successful to : ", logFile)
}, error = function(x){
})

# tryCatch({
# R.utils::gzip(logFile, paste0(logFile, ".gz"))
Expand Down
21 changes: 12 additions & 9 deletions R/MarkerFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ getMarkerFeatures <- function(
Log2FC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$log2FC)) %>% Reduce("cbind",.),
Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
AUC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$auc)) %>% Reduce("cbind",.),
MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.)
Expand All @@ -265,6 +266,7 @@ getMarkerFeatures <- function(
Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
Variance = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$var1)) %>% Reduce("cbind",.),
FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.),
VarianceBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$var2)) %>% Reduce("cbind",.)
Expand All @@ -278,6 +280,7 @@ getMarkerFeatures <- function(
Log2FC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$log2FC)) %>% Reduce("cbind",.),
Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.)
),
Expand Down Expand Up @@ -863,8 +866,8 @@ plotMarkerHeatmap <- function(
.logThis(mat, "mat", logFile = logFile)

idx <- which(rowSums(passMat, na.rm = TRUE) > 0 & matrixStats::rowVars(mat) != 0 & !is.na(matrixStats::rowVars(mat)))
mat <- mat[idx,]
passMat <- passMat[idx,]
mat <- mat[idx,,drop=FALSE]
passMat <- passMat[idx,,drop=FALSE]

if(nrow(mat) == 0){
stop("No Makers Found!")
Expand All @@ -887,7 +890,7 @@ plotMarkerHeatmap <- function(
#identify to remove
if(!is.null(grepExclude) & !is.null(rownames(mat))){
idx2 <- which(!grepl(grepExclude, rownames(mat)))
mat <- mat[idx2,]
mat <- mat[idx2,,drop=FALSE]
}

if(nrow(mat)==0){
Expand All @@ -906,11 +909,11 @@ plotMarkerHeatmap <- function(

if(binaryClusterRows){
if(invert){
bS <- .binarySort(-mat, lmat = passMat[rownames(mat), colnames(mat)], clusterCols = clusterCols)
mat <- -bS[[1]][,colnames(mat)]
bS <- .binarySort(-mat, lmat = passMat[rownames(mat), colnames(mat),drop=FALSE], clusterCols = clusterCols)
mat <- -bS[[1]][,colnames(mat),drop=FALSE]
}else{
bS <- .binarySort(mat, lmat = passMat[rownames(mat), colnames(mat)], clusterCols = clusterCols)
mat <- bS[[1]][,colnames(mat)]
bS <- .binarySort(mat, lmat = passMat[rownames(mat), colnames(mat),drop=FALSE], clusterCols = clusterCols)
mat <- bS[[1]][,colnames(mat),drop=FALSE]
}
clusterRows <- FALSE
clusterCols <- bS[[2]]
Expand Down Expand Up @@ -945,9 +948,9 @@ plotMarkerHeatmap <- function(

#mat <- t(mat[rev(seq_len(nrow(mat))), rev(clusterCols$order)])
if(!is.null(clusterCols)){
mat <- t(mat[seq_len(nrow(mat)), clusterCols$order])
mat <- t(mat[seq_len(nrow(mat)), clusterCols$order, drop = FALSE])
}else{
mat <- t(mat[seq_len(nrow(mat)), ])
mat <- t(mat[seq_len(nrow(mat)), , drop = FALSE])
}

if(!is.null(labelMarkers)){
Expand Down
15 changes: 12 additions & 3 deletions R/MatrixGeneScores.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@
#' @param matrixName The name to be used for storage of the gene activity score matrix in the provided `ArchRProject` or ArrowFiles.
#' @param extendUpstream The minimum and maximum number of basepairs upstream of the transcription start site to consider for gene
#' activity score calculation.
#' @param extendDownstream The minimum and maximum number of basepairs downstream of the transcription start site to consider for gene activity score calculation.
#' @param tileSize The size of the tiles used for binning counts prior to gene activity score calculation.
#' @param ceiling The maximum counts per tile allowed. This is used to prevent large biases in tile counts.
#' @param extendDownstream The minimum and maximum number of basepairs downstream of the transcription start site or transcription termination site
#' (based on 'useTSS') to consider for gene activity score calculation.
#' @param useGeneBoundaries A boolean value indicating whether gene boundaries should be employed during gene activity score
#' calculation. Gene boundaries refers to the process of preventing tiles from contributing to the gene score of a given gene
#' if there is a second gene's transcription start site between the tile and the gene of interest.
#' @param geneUpstream An integer describing the number of bp upstream the gene to extend the gene body. This effectively makes the gene body larger as there
#' are proximal peaks that should be weighted equally to the gene body. This parameter is used if 'useTSS=FALSE'.
#' @param geneDownstream An integer describing the number of bp downstream the gene to extend the gene body.This effectively makes the gene body larger as there
#' are proximal peaks that should be weighted equally to the gene body. This parameter is used if 'useTSS=FALSE'.
#' @param useTSS A boolean describing whether to build gene model based on gene TSS or the gene body.
#' @param tileSize The size of the tiles used for binning counts prior to gene activity score calculation.
#' @param ceiling The maximum counts per tile allowed. This is used to prevent large biases in tile counts.
#' @param geneScaleFactor A numeric scaling factor to weight genes based on the inverse of there length i.e. [(Scale Factor)/(Gene Length)]. This
#' is scaled from 1 to the scale factor. Small genes will be the scale factor while extremely large genes will be closer to 1. This scaling helps with
#' the relative gene score value.
#' @param scaleTo Each column in the calculated gene score matrix will be normalized to a column sum designated by `scaleTo`.
#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis.
#' @param blacklist A `GRanges` object containing genomic regions to blacklist that may be extremeley over-represented and thus
Expand Down
1 change: 1 addition & 0 deletions R/MatrixTiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ addTileMatrix <- function(
excludeChr = c("chrM", "chrY"),
blacklist = NULL,
chromLengths = NULL,
chromSizes = NULL,
force = FALSE,
subThreads = 1,
tstart = NULL,
Expand Down
4 changes: 2 additions & 2 deletions R/RNAIntegration.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ addGeneIntegrationMatrix <- function(
}

if(!all(nCell == 1)){
.logMessage(paste0("Missing ", length(which(nCell == 0)), " Overlapping ", length(which(nCell > 1))," cells from ArchRProj in groupList!"), logFile = logFile)
stop("Missing ", length(which(nCell == 0)), " Overlapping ", length(which(nCell > 1))," cells from ArchRProj in groupList!")
.logMessage(paste0("Missing ", length(which(nCell == 0)), " cells. Found ", length(which(nCell > 1))," overlapping cells from ArchRProj in groupList! Cannot have overlapping/missing cells in ATAC input, check 'groupList' argument!"), logFile = logFile)
stop("Missing ", length(which(nCell == 0)), " cells. Found ", length(which(nCell > 1))," overlapping cells from ArchRProj in groupList! Cannot have overlapping/missing cells in ATAC input, check 'groupList' argument!")
}

#########################################################################################
Expand Down

0 comments on commit ddcaae4

Please sign in to comment.