From ddcaae4a6093685875052219141e5ea41030fc55 Mon Sep 17 00:00:00 2001
From: jgranja24 <jgranja@stanford.edu>
Date: Mon, 13 Jul 2020 11:17:44 -0700
Subject: [PATCH] Dev 053120 (#242)

* update error meesages in RNA integration

* update motif names and gene score matrix documentation

* bugfix positions

* attempt to error handle co-accessibiltiy error

* add cellsToUse option to addCoAccessibility

* bug fix args chromSizes

* bugfix plotPeak2GeneHeatmap

* bugfix bigwig not being normalized

* add tryCatch to endLogging

* add pvalue to output of differential testing

* updates
---
 .DS_Store               | Bin 12292 -> 12292 bytes
 R/AllClasses.R          |  20 ++++++++++++--------
 R/AnnotationPeaks.R     |  38 ++++++++++++++++++++++++++++++++++++--
 R/GroupExport.R         |   7 ++++---
 R/IntegrativeAnalysis.R |  15 ++++++++++++---
 R/LoggerUtils.R         |  21 ++++++++++++---------
 R/MarkerFeatures.R      |  21 ++++++++++++---------
 R/MatrixGeneScores.R    |  15 ++++++++++++---
 R/MatrixTiles.R         |   1 +
 R/RNAIntegration.R      |   4 ++--
 10 files changed, 103 insertions(+), 39 deletions(-)
diff --git a/.DS_Store b/.DS_Store
index ddbe2f5d6cfd576ddafdbee0242a81c1a2287bac..c0f50e6f66facda6c8a73b019cf69d3547665ea7 100644
GIT binary patch
delta 1396
zcmdUtO-xi*6vxkh7~o#H$i*4p0n&kve5hlb44`6&l+M(Ops0mtZHvtKp2>^D49<*1
z#db_<O`68m_!iaFXk3`a_!Y#j#E+!uqDEPmbd<=dF-;R!8f_PCH@$BjO>ELlckNlc
zcklh*bI-~7{SOWw96r*I(uoZ>%TPQi<SCc6pvhCn7%OE~XH!h$c5IVX<Krtrf%?|g
zwhsgK;~k+6tIJbqO*J(vB_hzKczP@xizj15t_!izxS7rbgEMdyxC@Jl8J|-kO0}M~
zJ$1dp>UM{%pFLsgl=N1dn_D%%p=oijePvf9S0>Bn39qQMp4VvEt)^}yGUg7`NbzzQ
zd>|1WG4fZmYOR(@Kijf9X71W;X7=Xh$?~$g*EI3I9?cl*lDeIx%ZH3R_x)NUlULb$
zwi-?u>Z?1oTC_!6CfGpQ+-Kw;2`mv}V5gbTWp<j+W`{%@lR6)aM`K3Zd2qStV6vBQ
z9y3yDGnsG>JH;wijsV}XJvlO>T2DL2-CCH*yKKlV-WN@0`bUk#Y<<nIz0KIWw$iQZ
zhR*zIl=<+#k!|^F6?7Jy7}ZgTBGgC2l%jDuOvmUnU7>4qoxY<f%Fzt{M9=6sy`*30
zH@HxMLKLA48XD1z7A!;vt!T$Gyop})VLkdWfDaIZ4g))o#3)ioV=q1q;1hg`L-+z;
z;TXQg37o-2T*3q{<6GRreN5pIrtu@5;)Qi+zAyMv_38Lvbk+w)|Jet=H9c$O+E;y0
zQ99RV@9|btR@FByw5qGNGT|Fk9E?5IkXOsfVtcYIr&ZU~HoWfhNqeL$S6f}9iDq93
zSFH2eqQy#cDb~(*Y0xKRfg;TFYFoST$wI|)Hgl!u<Z_xzcV1o9rPLyY)yWM<gpg!A
zkF)G6VE!GKd-MZ6QB3{-DH0{{C>9Nh#S$z>7rGI_TUf0qY{WZ?#2|*S4N=8oCr0u#
zc3~g(WAGznaR8s;a~xJgzEnhx=b4<uH#m!PIFAdsh8wtvTeyuo_#O}N*zt`E&R^N)
yZ-E)>SFL%{%RKIh1HZc_OX{B#)fw?<#vC&u%8_Xa{`kL*1^!8Q%hUf~*M9>0<1S_Z

delta 2310
zcmd6oYfw~W7{{OgvcS1;Ie@t5tirA=$i-E5VF48t5WIsL(s&KI?AEL<3$lQtX4}Q4
zOe>v}Syp4(Ok>fPRGdn?*axf8YEnwn{7^GFQy=m}O;(d*?|ILHm$drSv>*1J_jz{C
z`<(ap|NnPO%a)cMi;(u_;-jgM4<#vkExOD3Xv$=-+ga)@+3a?1@_Kyw5oe_v)EJ8N
z)wYE?+lVYj+qy!*NRP*J9+o(3d_tliO0t}+&Yd@ZL4AY1&zhmXm9b8*wPx5vu|LH&
zc3fUjv2V(Z+QC#MEj?qDEbFJNMNtyP!E_}pRn^ong5naE^|l4q^#*%3@SkO6tMV#;
zYfqq|(;vJy81O3tg2+2L*{ZtI=+t%x*tcI0_+#XJwNO-Ag{DYwW5BGq<w>Fv5u4Tq
z!+!H9uUsl9e(~;5Yg;rBGHd1XWI<UVF6|3+M}nPUvsful7t-Jsm$i3xbOgdZ5q@-4
zwW>B7N7uJTdKPsB!fdRM+4;tM)v>Xb8@(F8-xx}8Rrl^7<KwC>;48Vzg{Udq+7S)7
zsy${w0SS~v9-2w>shPTH18t*cX)hh70Xjk-(n<P?zNSm`BmG3b&=vX>fCW~>BLz0(
zVm$Iu057Iv2C6U@^RNK*xC<+=68E4P?FgX*o#;XzqIeJw;}Jx+;Ti10b9f&6(T{g<
z4DaGSe1tD>8fP$wi@1c#_!(EtAuWhazi{|1%oW$zY6_$)CXknJFAxV8Na1Ql>5T*$
z^#j(VVVx+VcrL{hN9JgoGp|UOY%3t^0b71_jAmCIIgVU`-LxVM8e2Kv!B7tf3i#HM
z@0chEie*sM>|=^O4q1s~IuQBRS0+1@c!pfy+bL6JS+O!rnB5swtee2#$DUg!&lb}x
zBq}sQFM0=j*XjK&yHDseeZ#o@PS+SM3F(ZNlksw+oUy7#E$T1}3$cjNYD5#K)q>U-
zu`s$;VFNZ{Gag{z9>){djwkUHb~9=(;6=QIS8)KZGk$Mi07vk4jN=J>jv-t$35}3z
zxP{y${j8dw7)KH6rgmL*?#w7**7te2^-DdI-Oh4fg?`LgRu=os4I!|5nDNjJA)w8h
zFV!y#ws(e?FcUJmlk^9bQ9Kc9teK-n1TXd{WoG4!pWrE*T2-g-8&{gNl@S?jQ<F8j
zHdfz}t4425gk+|UY=<WOH4%)C%b{tvB!W@SFVG~O23!qB#XV7ycp7j87!_}cq#0=t
z2yu-VrSb|%TN;g}0oR66t(+!lBVk}9gD_~6t7|mvc8S2>#i7j+gc~wpBnS-u2Sjdr
zjdI9G(`Yd*CqJ#H?X-(oe}9zvne&g)3HprAF!!IOA*Q46=`#JswDbpEMFLVW7TItx
zJr$yi>8XP0X%^;i|2JYOmUI96v4*=pjP*=H4{`5rj~RatUco^e=F)i|AK*An;uOBb
zc}%$Q585%bHLRR%iQ|%I&ynixNIvU2`EyCewHbc{nc1#_i6s@&YV=f_syF8rCf`I?
ze=K)q)ShS07YwIc40UKyk=<=%yb{N^_F~Cv#JLi0c70O0k@t$#>`sv?jbK+2%<c@S
znwfhmGq<_Vv)73%LYUKI@oZ9Ib{=68y$Q-|bdZkIr_9kqbdkQJALt4Q0?S~PbI5@U
zd6<M^mc%9S@ieYR4fA+C8koZuGl#FlDwfBqnZws$Ez4x)aAxq0xSv_vF!)w(@TYk!
z??F6wxZaYUml3<`z9rq^P-pl)cKg`5?!qD2`E5etu-}-+{ufEghKo1lQy{Z-7Lk%f
QbjV(C=HJd5{LfPVPuULeeE<Le

diff --git a/R/AllClasses.R b/R/AllClasses.R
index 466aadaa..299b1655 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -391,16 +391,20 @@ loadArchRProject <- function(
       #Postions
       if(!is.null(ArchRProj@peakAnnotation[[i]]$Positions)){
 
-        PositionsNew <- gsub(outputDir, outputDirNew, ArchRProj@peakAnnotation[[i]]$Positions)
-        if(!all(file.exists(PositionsNew))){
-          if(force){
-            keepAnno[i] <- FALSE
-            message("Positions for peakAnnotation do not exist in saved ArchRProject!")
-          }else{
-            stop("Positions for peakAnnotation do not exist in saved ArchRProject!")
+        if(tolower(ArchRProj@peakAnnotation[[i]]$Positions) != "none"){
+
+          PositionsNew <- gsub(outputDir, outputDirNew, ArchRProj@peakAnnotation[[i]]$Positions)
+          if(!all(file.exists(PositionsNew))){
+            if(force){
+              keepAnno[i] <- FALSE
+              message("Positions for peakAnnotation do not exist in saved ArchRProject!")
+            }else{
+              stop("Positions for peakAnnotation do not exist in saved ArchRProject!")
+            }
           }
+          ArchRProj@peakAnnotation[[i]]$Positions <- PositionsNew
+
         }
-        ArchRProj@peakAnnotation[[i]]$Positions <- PositionsNew
 
       }
 
diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R
index 5413120a..eda4bb88 100644
--- a/R/AnnotationPeaks.R
+++ b/R/AnnotationPeaks.R
@@ -254,6 +254,7 @@ addMotifAnnotations <- function(
   name = "Motif",
   species = NULL,
   collection = "CORE",
+  motifPWMs = NULL,
   cutOff = 5e-05, 
   width = 7,
   version = 2,
@@ -263,7 +264,7 @@ addMotifAnnotations <- function(
   ){
 
   .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj"))
-  .validInput(input = motifSet, name = "motifSet", valid = c("character"))
+  .validInput(input = motifSet, name = "motifSet", valid = c("character", "null"))
   .validInput(input = name, name = "name", valid = c("character"))
   .validInput(input = species, name = "species", valid = c("character", "null"))
   .validInput(input = collection, name = "collection", valid = c("character", "null"))
@@ -272,6 +273,17 @@ addMotifAnnotations <- function(
   .validInput(input = force, name = "force", valid = c("boolean"))
   .validInput(input = logFile, name = "logFile", valid = c("character"))
 
+  if(!is.null(motifPWMs)){
+    if(!is(motifPWMs, "PWMatrixList")){
+      stop("User Supplied motifPWMS must be a PWMatrixList!")
+    }
+    motifSet <- "Custom"
+  }
+
+  if(is.null(motifSet)){
+    stop("Must provide motifSet or motifPWMs!")
+  }
+
   .requirePackage("motifmatchr", installInfo='BiocManager::install("motifmatchr")')
 
   tstart <- Sys.time()
@@ -389,6 +401,12 @@ addMotifAnnotations <- function(
     motifs <- obj$motifs
     motifSummary <- obj$motifSummary
 
+  }else if(tolower(motifSet)=="custom"){
+
+    obj <- NULL
+    motifs <- motifPWMs
+    motifSummary <- NULL
+
   }else{
 
     stop("Error MotifSet Not Recognized!")
@@ -501,6 +519,10 @@ addMotifAnnotations <- function(
 
   motifNames <- lapply(seq_along(motifs), function(x){
     namex <- make.names(motifs[[x]]@name)
+    if(grepl("LINE", namex)){
+      splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
+      namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
+    }
     if(substr(namex,nchar(namex),nchar(namex))=="."){
       namex <- substr(namex,1,nchar(namex)-1)
     }
@@ -508,10 +530,22 @@ addMotifAnnotations <- function(
     namex
   }) %>% unlist(.)
 
+  motifNames2 <- lapply(seq_along(motifs), function(x){
+    namex <- make.names(motifs[[x]]@name)
+    if(grepl("LINE", namex)){
+      splitNamex <- stringr::str_split(motifs[[x]]@ID, pattern="\\_", simplify = TRUE)
+      namex <- splitNamex[1, grep("LINE",splitNamex[1,]) + 1]
+    }
+    if(substr(namex,nchar(namex),nchar(namex))=="."){
+      namex <- substr(namex,1,nchar(namex)-1)
+    }
+    namex
+  }) %>% unlist(.)
+
   motifDF <- lapply(seq_along(motifs), function(x){
     df <- data.frame(
       row.names = motifNames[x],
-      name = motifs[[x]]@name[[1]],
+      name = motifNames2[[x]],
       ID = motifs[[x]]@ID,
       strand = motifs[[x]]@strand,
       stringsAsFactors = FALSE
diff --git a/R/GroupExport.R b/R/GroupExport.R
index c99b6465..c9025f57 100644
--- a/R/GroupExport.R
+++ b/R/GroupExport.R
@@ -184,7 +184,7 @@ getGroupBW <- function(
   ArrowFiles <- getArrowFiles(ArchRProj)
   Groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = TRUE)
 
-  if(tolower(normMethod) %in% c("ReadsInTSS", "ReadsInPromoter", "nFrags")){
+  if(tolower(normMethod) %in% tolower(c("ReadsInTSS", "ReadsInPromoter", "nFrags"))){
     normBy <- getCellColData(ArchRProj = ArchRProj, select = normMethod)
   }else{
     normBy <- NULL
@@ -292,7 +292,8 @@ getGroupBW <- function(
 
   #Cells
   cellGroupi <- cellGroups[[i]]
-  
+  #print(sum(normBy[cellGroupi, 1]))
+
   #Bigwig File!
   covFile <- file.path(bwDir, paste0(make.names(names(cellGroups)[i]), "-TileSize-",tileSize,"-normMethod-",normMethod,"-ArchR.bw"))
   rmf <- .suppressAll(file.remove(covFile))
@@ -345,7 +346,7 @@ getGroupBW <- function(
       tilesk$reads <- mat
 
       if(tolower(normMethod) %in% c("ReadsInTSS", "ReadsInPromoter", "nFrags")){
-        tilesk$reads <- tilesk$reads * 10^6 / sum(normBy[cellGroupi, 1])
+        tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1])
       }else if(tolower(normMethod) %in% c("nCells")){
         tilesk$reads <- tilesk$reads / length(cellGroupi)
       }
diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R
index e4b6f936..69425cca 100644
--- a/R/IntegrativeAnalysis.R
+++ b/R/IntegrativeAnalysis.R
@@ -668,6 +668,7 @@ correlateTrajectories <- function(
 #' `reducedDims` were originally created during dimensionality reduction. This idea was introduced by Timothy Stuart.
 #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to
 #' sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis.
+#' @param cellsToUse A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells.
 #' @param k The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses.
 #' @param knnIteration The number of k-nearest neighbor groupings to test for passing the supplied `overlapCutoff`.
 #' @param overlapCutoff The maximum allowable overlap between the current group and all previous groups to permit the current group be
@@ -688,6 +689,7 @@ addCoAccessibility <- function(
   dimsToUse = 1:30,
   scaleDims = NULL,
   corCutOff = 0.75,
+  cellsToUse = NULL,
   k = 100, 
   knnIteration = 500, 
   overlapCutoff = 0.8, 
@@ -705,6 +707,7 @@ addCoAccessibility <- function(
   .validInput(input = dimsToUse, name = "dimsToUse", valid = c("numeric", "null"))
   .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null"))
   .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null"))
+  .validInput(input = cellsToUse, name = "cellsToUse", valid = c("character", "null"))
   .validInput(input = k, name = "k", valid = c("integer"))
   .validInput(input = knnIteration, name = "knnIteration", valid = c("integer"))
   .validInput(input = overlapCutoff, name = "overlapCutoff", valid = c("numeric"))
@@ -726,6 +729,9 @@ addCoAccessibility <- function(
 
   #Get Reduced Dims
   rD <- getReducedDims(ArchRProj, reducedDims = reducedDims, corCutOff = corCutOff, dimsToUse = dimsToUse)
+  if(!is.null(cellsToUse)){
+    rD <- rD[cellsToUse, ,drop=FALSE]
+  }
 
   #Subsample
   idx <- sample(seq_len(nrow(rD)), knnIteration, replace = !nrow(rD) >= knnIteration)
@@ -762,7 +768,7 @@ addCoAccessibility <- function(
   o$seqnames <- seqnames(peakSet)[o[,1]]
   o$idx1 <- peakSet$idx[o[,1]]
   o$idx2 <- peakSet$idx[o[,2]]
-  o$correlation <- NA
+  o$correlation <- -999
 
   #Peak Matrix ColSums
   cS <- .getColSums(getArrowFiles(ArchRProj), chri, verbose = FALSE, useMatrix = "PeakMatrix")
@@ -795,7 +801,10 @@ addCoAccessibility <- function(
 
     #Correlations
     idx <- BiocGenerics::which(o$seqnames==chri[x])
-    o[idx,]$correlation <- rowCorCpp(idxX = o[idx,]$idx1, idxY = o[idx,]$idx2, X = as.matrix(groupMat), Y = as.matrix(groupMat))
+    corVals <- rowCorCpp(idxX = o[idx,]$idx1, idxY = o[idx,]$idx2, X = as.matrix(groupMat), Y = as.matrix(groupMat))
+    .logThis(head(corVals), paste0("SubsetCorVals-", x), logFile = logFile)
+
+    o[idx,]$correlation <- as.numeric(corVals)
 
     .logThis(groupMat, paste0("SubsetGroupMat-", x), logFile = logFile)
     .logThis(o[idx,], paste0("SubsetCoA-", x), logFile = logFile)
@@ -1425,7 +1434,7 @@ plotPeak2GeneHeatmap <- function(
     KNNx <- KNNList[[x]]
     names(sort(table(ccd[KNNx, 1, drop = TRUE]), decreasing = TRUE))[1]
   }) %>% unlist
-  cD <- DataFrame(row.names=paste0("K", seq_len(ncol(mATAC))), groupBy = KNNGroups)
+  cD <- DataFrame(row.names=paste0("K_", seq_len(ncol(mATAC))), groupBy = KNNGroups)
   pal <- paletteDiscrete(values=gtools::mixedsort(unique(ccd[,1])))
   if(!is.null(palGroup)){
     pal[names(palGroup)[names(palGroup) %in% names(pal)]] <- palGroup[names(palGroup) %in% names(pal)]
diff --git a/R/LoggerUtils.R b/R/LoggerUtils.R
index d593ddc9..4df2e5b3 100644
--- a/R/LoggerUtils.R
+++ b/R/LoggerUtils.R
@@ -547,15 +547,18 @@ createLogFile <- function(
   }
 
   rL <- readLines(logFile)
-  t1 <- gsub("Start Time : ","", grep("Start Time", rL, ignore.case = TRUE, value = TRUE))
-  mn <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "mins"))
-  hr <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "hours"))
-  cat("\n------- Completed\n\n", file = logFile, append = TRUE)
-  cat(paste0("End Time : ",Sys.time(),"\n"), file = logFile, append = TRUE)
-  cat(paste0("Elapsed Time Minutes = ", mn), file = logFile, append = TRUE)
-  cat(paste0("\nElapsed Time Hours = ", hr), file = logFile, append = TRUE)
-  cat("\n\n-------\n\n\n\n", file = logFile, append = TRUE)
-  message("ArchR logging successful to : ", logFile)
+  o <- tryCatch({
+    t1 <- gsub("Start Time : ","", grep("Start Time", rL, ignore.case = TRUE, value = TRUE))
+    mn <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "mins"))
+    hr <- as.numeric(difftime(Sys.time(), as.POSIXct(t1), units = "hours"))
+    cat("\n------- Completed\n\n", file = logFile, append = TRUE)
+    cat(paste0("End Time : ",Sys.time(),"\n"), file = logFile, append = TRUE)
+    cat(paste0("Elapsed Time Minutes = ", mn), file = logFile, append = TRUE)
+    cat(paste0("\nElapsed Time Hours = ", hr), file = logFile, append = TRUE)
+    cat("\n\n-------\n\n\n\n", file = logFile, append = TRUE)
+    message("ArchR logging successful to : ", logFile)
+  }, error = function(x){
+  })
 
   # tryCatch({
   #   R.utils::gzip(logFile, paste0(logFile, ".gz"))
diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R
index 4ce95035..c6e9a38e 100644
--- a/R/MarkerFeatures.R
+++ b/R/MarkerFeatures.R
@@ -251,6 +251,7 @@ getMarkerFeatures <- function(
               Log2FC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$log2FC)) %>% Reduce("cbind",.),
               Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
               FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
+              Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
               MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
               AUC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$auc)) %>% Reduce("cbind",.),
               MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.)
@@ -265,6 +266,7 @@ getMarkerFeatures <- function(
               Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
               Variance = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$var1)) %>% Reduce("cbind",.),
               FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
+              Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
               MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
               MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.),
               VarianceBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$var2)) %>% Reduce("cbind",.)
@@ -278,6 +280,7 @@ getMarkerFeatures <- function(
               Log2FC = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$log2FC)) %>% Reduce("cbind",.),
               Mean = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1)) %>% Reduce("cbind",.),
               FDR = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$fdr)) %>% Reduce("cbind",.),
+              Pval = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$pval)) %>% Reduce("cbind",.),
               MeanDiff = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean1 - diffList[[x]]$mean2)) %>% Reduce("cbind",.),
               MeanBGD = lapply(seq_along(diffList), function(x) data.frame(x = diffList[[x]]$mean2)) %>% Reduce("cbind",.)
             ),
@@ -863,8 +866,8 @@ plotMarkerHeatmap <- function(
   .logThis(mat, "mat", logFile = logFile) 
 
   idx <- which(rowSums(passMat, na.rm = TRUE) > 0 & matrixStats::rowVars(mat) != 0 & !is.na(matrixStats::rowVars(mat)))
-  mat <- mat[idx,]
-  passMat <- passMat[idx,]
+  mat <- mat[idx,,drop=FALSE]
+  passMat <- passMat[idx,,drop=FALSE]
 
   if(nrow(mat) == 0){
     stop("No Makers Found!")
@@ -887,7 +890,7 @@ plotMarkerHeatmap <- function(
   #identify to remove
   if(!is.null(grepExclude) & !is.null(rownames(mat))){
     idx2 <- which(!grepl(grepExclude, rownames(mat)))
-    mat <- mat[idx2,]
+    mat <- mat[idx2,,drop=FALSE]
   }
 
   if(nrow(mat)==0){
@@ -906,11 +909,11 @@ plotMarkerHeatmap <- function(
 
   if(binaryClusterRows){
     if(invert){
-      bS <- .binarySort(-mat, lmat = passMat[rownames(mat), colnames(mat)], clusterCols = clusterCols)
-      mat <- -bS[[1]][,colnames(mat)]
+      bS <- .binarySort(-mat, lmat = passMat[rownames(mat), colnames(mat),drop=FALSE], clusterCols = clusterCols)
+      mat <- -bS[[1]][,colnames(mat),drop=FALSE]
     }else{
-      bS <- .binarySort(mat, lmat = passMat[rownames(mat), colnames(mat)], clusterCols = clusterCols)
-      mat <- bS[[1]][,colnames(mat)]
+      bS <- .binarySort(mat, lmat = passMat[rownames(mat), colnames(mat),drop=FALSE], clusterCols = clusterCols)
+      mat <- bS[[1]][,colnames(mat),drop=FALSE]
     }
     clusterRows <- FALSE
     clusterCols <- bS[[2]]
@@ -945,9 +948,9 @@ plotMarkerHeatmap <- function(
 
     #mat <- t(mat[rev(seq_len(nrow(mat))), rev(clusterCols$order)])
     if(!is.null(clusterCols)){
-      mat <- t(mat[seq_len(nrow(mat)), clusterCols$order])
+      mat <- t(mat[seq_len(nrow(mat)), clusterCols$order, drop = FALSE])
     }else{
-      mat <- t(mat[seq_len(nrow(mat)), ])
+      mat <- t(mat[seq_len(nrow(mat)), , drop = FALSE])
     }
 
     if(!is.null(labelMarkers)){
diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R
index 10aca1ef..5231d596 100644
--- a/R/MatrixGeneScores.R
+++ b/R/MatrixGeneScores.R
@@ -14,12 +14,21 @@
 #' @param matrixName The name to be used for storage of the gene activity score matrix in the provided `ArchRProject` or ArrowFiles.
 #' @param extendUpstream The minimum and maximum number of basepairs upstream of the transcription start site to consider for gene
 #' activity score calculation.
-#' @param extendDownstream The minimum and maximum number of basepairs downstream of the transcription start site to consider for gene activity score calculation.
-#' @param tileSize The size of the tiles used for binning counts prior to gene activity score calculation.
-#' @param ceiling The maximum counts per tile allowed. This is used to prevent large biases in tile counts.
+#' @param extendDownstream The minimum and maximum number of basepairs downstream of the transcription start site or transcription termination site 
+#' (based on 'useTSS') to consider for gene activity score calculation.
 #' @param useGeneBoundaries A boolean value indicating whether gene boundaries should be employed during gene activity score
 #' calculation. Gene boundaries refers to the process of preventing tiles from contributing to the gene score of a given gene
 #' if there is a second gene's transcription start site between the tile and the gene of interest.
+#' @param geneUpstream An integer describing the number of bp upstream the gene to extend the gene body. This effectively makes the gene body larger as there
+#' are proximal peaks that should be weighted equally to the gene body. This parameter is used if 'useTSS=FALSE'.
+#' @param geneDownstream An integer describing the number of bp downstream the gene to extend the gene body.This effectively makes the gene body larger as there
+#' are proximal peaks that should be weighted equally to the gene body. This parameter is used if 'useTSS=FALSE'.
+#' @param useTSS A boolean describing whether to build gene model based on gene TSS or the gene body.
+#' @param tileSize The size of the tiles used for binning counts prior to gene activity score calculation.
+#' @param ceiling The maximum counts per tile allowed. This is used to prevent large biases in tile counts.
+#' @param geneScaleFactor A numeric scaling factor to weight genes based on the inverse of there length i.e. [(Scale Factor)/(Gene Length)]. This
+#' is scaled from 1 to the scale factor. Small genes will be the scale factor while extremely large genes will be closer to 1. This scaling helps with
+#' the relative gene score value.
 #' @param scaleTo Each column in the calculated gene score matrix will be normalized to a column sum designated by `scaleTo`.
 #' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis.
 #' @param blacklist A `GRanges` object containing genomic regions to blacklist that may be extremeley over-represented and thus
diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R
index a4f8be2d..b00901fd 100644
--- a/R/MatrixTiles.R
+++ b/R/MatrixTiles.R
@@ -96,6 +96,7 @@ addTileMatrix <- function(
   excludeChr = c("chrM", "chrY"), 
   blacklist = NULL, 
   chromLengths = NULL, 
+  chromSizes = NULL,
   force = FALSE,
   subThreads = 1,
   tstart = NULL,
diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R
index 7c4b1be4..f9773a29 100644
--- a/R/RNAIntegration.R
+++ b/R/RNAIntegration.R
@@ -160,8 +160,8 @@ addGeneIntegrationMatrix <- function(
   }
 
   if(!all(nCell == 1)){
-    .logMessage(paste0("Missing ", length(which(nCell == 0)), " Overlapping ", length(which(nCell > 1))," cells from ArchRProj in groupList!"), logFile = logFile)
-    stop("Missing ", length(which(nCell == 0)), " Overlapping ", length(which(nCell > 1))," cells from ArchRProj in groupList!")
+    .logMessage(paste0("Missing ", length(which(nCell == 0)), " cells. Found ", length(which(nCell > 1))," overlapping cells from ArchRProj in groupList! Cannot have overlapping/missing cells in ATAC input, check 'groupList' argument!"), logFile = logFile)
+    stop("Missing ", length(which(nCell == 0)), " cells. Found ", length(which(nCell > 1))," overlapping cells from ArchRProj in groupList! Cannot have overlapping/missing cells in ATAC input, check 'groupList' argument!")
   }
 
   #########################################################################################