Accept covariance matrix as input

IlyaLab · Dec 19, 2017 · 7ab9c6e · 7ab9c6e
1 parent e52c4be
commit 7ab9c6e
Show file tree

Hide file tree

Showing 7 changed files with 98 additions and 18 deletions.
diff --git a/R/EmpiricalBrownsMethod/NAMESPACE b/R/EmpiricalBrownsMethod/NAMESPACE
@@ -1,2 +1,4 @@
 exportPattern("empiricalBrownsMethod")
 exportPattern("kostsMethod")
+export("calculateCovariances")
+export("calculateKostCovariance")
diff --git a/R/EmpiricalBrownsMethod/R/ebm.R b/R/EmpiricalBrownsMethod/R/ebm.R
@@ -77,24 +77,24 @@ combinePValues <- function(covar_matrix, p_values, extra_info = FALSE){
     }
 }
 
-#Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numpy.array
+#Input: Either an m x n data matrix with each of m rows representing a variable and each of n columns representing a sample or a covariance table.
 #       A vector of m P-values to combine. May be a list or of type numpy.array.
 #Output: A combined P-value.
 #        If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method
-empiricalBrownsMethod <- function(data_matrix, p_values, extra_info = FALSE) {
+empiricalBrownsMethod <- function(p_values, data_matrix, covar_matrix, extra_info = FALSE) {
   # inputs must be numeric
-    covar_matrix = calculateCovariances(data_matrix)
+    if (missing(covar_matrix)) covar_matrix <- calculateCovariances(data_matrix)
     return(combinePValues(covar_matrix, p_values, extra_info))
 }
 
 #
 
-#Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numeric matrix
+#Input: Either an m x n data matrix with each of m rows representing a variable and each of n columns representing a sample or a covariance table. Should be of type numeric matrix
 #       A numeric vector of m P-values to combine.
 #Output: A combined P-value using Kost's Method.
 #        If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method
-kostsMethod <- function(data_matrix, p_values, extra_info = FALSE) {
-    covar_matrix <- calculateKostCovariance(data_matrix)
+kostsMethod <- function(p_values, data_matrix, covar_matrix, extra_info = FALSE) {
+    if (missing(covar_matrix)) covar_matrix <- calculateKostCovariance(data_matrix)
     combinePValues(covar_matrix, p_values, extra_info = extra_info)
 }
 

diff --git a/R/EmpiricalBrownsMethod/man/calculateCovariances.Rd b/R/EmpiricalBrownsMethod/man/calculateCovariances.Rd
@@ -0,0 +1,20 @@
+\name{calculateCovariances}
+\alias{calculateCovariances}
+\title{Calculate pairwise covariances between transformed raw data vectors}
+\description{Calculate pairwise covariances between transformed raw data vectors}
+\usage{
+  calculateCovariances(data_matrix)
+}
+\arguments{
+  \item{data_matrix}{An m x n numeric matrix with m variables in rows and n
+                      samples in columns.}
+}
+\value{
+  The output is an m x m matrix of calculated covariances
+}
+\examples{
+  data(ebmTestdata)
+  glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"]
+  glypDat   <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
+  calculateCovariances(glypDat)
+}
diff --git a/R/EmpiricalBrownsMethod/man/calculateKostCovariance.Rd b/R/EmpiricalBrownsMethod/man/calculateKostCovariance.Rd
@@ -0,0 +1,21 @@
+\name{calculateKostCovariance}
+\alias{calculateKostCovariance}
+\title{Calculate pairwise covariances between transformed raw data vectors}
+\description{Calculate pairwise covariances between transformed raw data vectors 
+             using Kost's polynomial fit and the pearson correlation function}
+\usage{
+    calculateKostCovariance(data_matrix)
+}
+\arguments{
+    \item{data_matrix}{An m x n numeric matrix with m variables in rows and n
+        samples in columns.}
+}
+\value{
+    The output is an m x m matrix of calculated covariances
+}
+\examples{
+    data(ebmTestdata)
+    glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"]
+    glypDat   <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
+    calculateKostCovariance(glypDat)
+}
diff --git a/R/EmpiricalBrownsMethod/man/empiricalBrownsMethod.Rd b/R/EmpiricalBrownsMethod/man/empiricalBrownsMethod.Rd
@@ -8,12 +8,15 @@ This package provides an empirical adaptation of Brown’s Method (an extension
 of Fisher’s Method) for combining dependent P-values which is appropriate for
 highly correlated data sets, like those found in high-throughput biological experiments.}
 \usage{
-  empiricalBrownsMethod(data_matrix, p_values, extra_info)
+  empiricalBrownsMethod(p_values, data_matrix, covar_matrix, extra_info)
 }
 \arguments{
+  \item{p_values}{A numeric vector of p-values with length m.}
   \item{data_matrix}{An m x n numeric matrix with m variables in rows and n
                       samples in columns.}
-  \item{p_values}{A numeric vector of p-values with length m.}
+  \item{covar_matrix}{An m x m matrix of pairwise covariances between transformed
+                      raw data vectors. Only one of data_matrix and covar_matrix is
+                      required.}
   \item{extra_info}{boolean, TRUE additionally returns the p-value from
                     Fisher's method, the scale factor c, and the new degrees of
                     freedom from Brown's Method }
@@ -33,5 +36,10 @@ highly correlated data sets, like those found in high-throughput biological expe
   glypPvals <- allPvals$pvalue.with.CHD4[match(glypGenes, allPvals$gene)];
   glypDat   <- dat[match(glypGenes, dat$V1), 2:ncol(dat)];
   empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=TRUE);
+  
+## The covariance matrix can also be calculated before-hand, which is useful
+## if you need to run empiricalBrownsMethod multiple times
+  glypCovs <- calculateCovariances(glypDat)
+  empiricalBrownsMethod(covar_matrix=glypCovs, p_values=glypPvals, extra_info)
 }
 \keyword{multivariate}
diff --git a/R/EmpiricalBrownsMethod/man/kostsMethod.Rd b/R/EmpiricalBrownsMethod/man/kostsMethod.Rd
@@ -8,12 +8,15 @@ This package provides an implementation of Kost's Method for combining dependent
 P-values which is appropriate for highly correlated data sets, like those found in
 high-throughput biological experiments.}
 \usage{
-  kostsMethod(data_matrix, p_values, extra_info)
+  kostsMethod(p_values, data_matrix, covar_matrix, extra_info)
 }
 \arguments{
+  \item{p_values}{A numeric vector of p-values with length m.}
   \item{data_matrix}{An m x n numeric matrix with m variables in rows and n
                       samples in columns.}
-  \item{p_values}{A numeric vector of p-values with length m.}
+  \item{covar_matrix}{An m x m matrix of pairwise covariances between transformed
+                      raw data vectors. Only one of data_matrix and covar_matrix is
+                      required.}
   \item{extra_info}{boolean, TRUE additionally returns the p-value from
                     Fisher's method, the scale factor c, and the new degrees of
                     freedom from Brown's Method }
@@ -33,5 +36,11 @@ high-throughput biological experiments.}
   glypPvals <- allPvals$pvalue.with.CHD4[match(glypGenes, allPvals$gene)]
   glypDat   <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
   kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=TRUE);
+  
+## The covariance matrix can also be calculated before-hand, which is useful
+## if you need to run empiricalBrownsMethod multiple times
+  glypCovs <- calculateKostCovariance(glypDat)
+  kostsMethod(covar_matrix=glypCovs, p_values=glypPvals, extra_info)
 }
+
 \keyword{multivariate}
diff --git a/R/EmpiricalBrownsMethod/tests/testthat/test_ebm_main.R b/R/EmpiricalBrownsMethod/tests/testthat/test_ebm_main.R
@@ -52,7 +52,7 @@ test_that("random results are correct", {
   rd <- as.matrix(randData[-1,-1])
   pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
   res0 <- empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T)
-  expected = list(P_Brown=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF_Brown=8.136664603851868)
+  expected = list(P_test=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF=8.136664603851868)
   expect_equal(res0, expected)
 })
 
@@ -62,7 +62,7 @@ test_that("Kost method, random results are correct", {
   pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
   res0 <- kostsMethod(data_matrix=rd, p_values=pvals, extra_info=T)
   #expected = list(P_Brown=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF_Brown=7.1062963459598079)
-  expected = list(P_Brown=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF_Brown=7.1062963459598079)
+  expected = list(P_test=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF=7.1062963459598079)
   expect_equal(res0, expected)
 })
 
@@ -71,7 +71,7 @@ test_that("the glypican 3 network example works", {
   glypPvals <- as.matrix(allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes])
   glypDat <- dat[dat$V1 %in% glypGenes, 2:ncol(dat)]
   res0 <- empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)
-  expected = list(P_Brown=4.821679406409943e-07, P_Fisher=1.438732140605804e-08, Scale_Factor_C=1.297692749787417, DF_Brown=10.78837806737645)
+  expected = list(P_test=4.821679406409943e-07, P_Fisher=1.438732140605804e-08, Scale_Factor_C=1.297692749787417, DF=10.78837806737645)
   expect_equal(res0, expected)
 })
 
@@ -80,7 +80,7 @@ test_that("Kost method, the glypican 3 network example works", {
   glypPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes]
   glypDat <- as.matrix(dat[dat$V1 %in% glypGenes, 2:ncol(dat)])
   res0 <- kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)
-  expected = list(P_Brown=7.570776008807226e-07, P_Fisher=1.4387321406058163e-08, Scale_Factor_C=1.349048766471012, DF_Brown=10.377682666448537)
+  expected = list(P_test=7.570776008807226e-07, P_Fisher=1.4387321406058163e-08, Scale_Factor_C=1.349048766471012, DF=10.377682666448537)
   expect_equal(res0, expected)
 })
 
@@ -89,7 +89,7 @@ test_that("the SUMO pathway example works", {
   sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes]
   sumoDat <- as.matrix(dat[dat$V1 %in% sumoGenes, 2:ncol(dat)])
   res0 <- empiricalBrownsMethod(data_matrix=sumoDat, p_values=sumoPvals, extra_info=T)
-  expected = list(P_Brown=1.698056395040471e-41, P_Fisher=6.443838824431309e-45, Scale_Factor_C=1.087731057365708, DF_Brown=18.38689799704392)
+  expected = list(P_test=1.698056395040471e-41, P_Fisher=6.443838824431309e-45, Scale_Factor_C=1.087731057365708, DF=18.38689799704392)
   expect_equal(res0, expected)
 })
 
@@ -98,7 +98,7 @@ test_that("Kost method, the SUMO pathway example works", {
   sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes]
   sumoDat <- as.matrix(dat[dat$V1 %in% sumoGenes, 2:ncol(dat)])
   res0 <- kostsMethod(data_matrix=sumoDat, p_values=sumoPvals, extra_info=T)
-  expected = list(P_Brown=2.0949187511965534e-39, P_Fisher=6.4438388244313223e-45, Scale_Factor_C=1.1493509132040536, DF_Brown=17.40112594877213)
+  expected = list(P_test=2.0949187511965534e-39, P_Fisher=6.4438388244313223e-45, Scale_Factor_C=1.1493509132040536, DF=17.40112594877213)
   expect_equal(res0, expected)
 })
 
@@ -107,7 +107,7 @@ test_that("the FOXA1 network example works", {
   foxPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% foxGenes]
   foxDat <- as.matrix(dat[dat$V1 %in% foxGenes, 2:ncol(dat)])
   res0 <- empiricalBrownsMethod(data_matrix=foxDat, p_values=foxPvals, extra_info=T)
-  expected = list(P_Brown=7.777896979417795e-53, P_Fisher=4.043406925735124e-139, Scale_Factor_C=2.719366560758496, DF_Brown=21.32849643625184)
+  expected = list(P_test=7.777896979417795e-53, P_Fisher=4.043406925735124e-139, Scale_Factor_C=2.719366560758496, DF=21.32849643625184)
   expect_equal(res0, expected)
 })
 
@@ -116,6 +116,26 @@ test_that("Kost method, the FOXA1 network example works", {
   foxPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% foxGenes]
   foxDat <- as.matrix(dat[dat$V1 %in% foxGenes, 2:ncol(dat)])
   res0 <- kostsMethod(data_matrix=foxDat, p_values=foxPvals, extra_info=T)
-  expected = list(P_Brown=3.1576893819385723e-57, P_Fisher=4.043406925735029e-139, Scale_Factor_C=2.5008607800331659, DF_Brown=23.192014710723246)
+  expected = list(P_test=3.1576893819385723e-57, P_Fisher=4.043406925735029e-139, Scale_Factor_C=2.5008607800331659, DF=23.192014710723246)
   expect_equal(res0, expected)
 })
+
+test_that("random results are correct with covar_matrix", {
+    a <- as.numeric(randData[1,-1])
+    rd <- as.matrix(randData[-1,-1])
+    pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
+    covar_matrix <- calculateCovariances(rd)
+    res0 <- empiricalBrownsMethod(covar_matrix=covar_matrix, p_values=pvals, extra_info=T)
+    expected = list(P_test=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF=8.136664603851868)
+    expect_equal(res0, expected)
+})
+
+test_that("Kost method, random results are correct with covar_matrix", {
+    a <- as.numeric(randData[1,-1])
+    rd <- as.matrix(randData[-1,-1])
+    pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
+    covar_matrix <- calculateKostCovariance(rd)
+    res0 <- kostsMethod(covar=covar_matrix, p_values=pvals, extra_info=T)
+    expected = list(P_test=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF=7.1062963459598079)
+    expect_equal(res0, expected)
+})