Skip to content

Commit

Permalink
Accept covariance matrix as input
Browse files Browse the repository at this point in the history
  • Loading branch information
JonBarenboim committed Dec 19, 2017
1 parent e52c4be commit 7ab9c6e
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 18 deletions.
2 changes: 2 additions & 0 deletions R/EmpiricalBrownsMethod/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
exportPattern("empiricalBrownsMethod")
exportPattern("kostsMethod")
export("calculateCovariances")
export("calculateKostCovariance")
12 changes: 6 additions & 6 deletions R/EmpiricalBrownsMethod/R/ebm.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,24 @@ combinePValues <- function(covar_matrix, p_values, extra_info = FALSE){
}
}

#Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numpy.array
#Input: Either an m x n data matrix with each of m rows representing a variable and each of n columns representing a sample or a covariance table.
# A vector of m P-values to combine. May be a list or of type numpy.array.
#Output: A combined P-value.
# If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method
empiricalBrownsMethod <- function(data_matrix, p_values, extra_info = FALSE) {
empiricalBrownsMethod <- function(p_values, data_matrix, covar_matrix, extra_info = FALSE) {
# inputs must be numeric
covar_matrix = calculateCovariances(data_matrix)
if (missing(covar_matrix)) covar_matrix <- calculateCovariances(data_matrix)
return(combinePValues(covar_matrix, p_values, extra_info))
}

#

#Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numeric matrix
#Input: Either an m x n data matrix with each of m rows representing a variable and each of n columns representing a sample or a covariance table. Should be of type numeric matrix
# A numeric vector of m P-values to combine.
#Output: A combined P-value using Kost's Method.
# If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method
kostsMethod <- function(data_matrix, p_values, extra_info = FALSE) {
covar_matrix <- calculateKostCovariance(data_matrix)
kostsMethod <- function(p_values, data_matrix, covar_matrix, extra_info = FALSE) {
if (missing(covar_matrix)) covar_matrix <- calculateKostCovariance(data_matrix)
combinePValues(covar_matrix, p_values, extra_info = extra_info)
}

Expand Down
20 changes: 20 additions & 0 deletions R/EmpiricalBrownsMethod/man/calculateCovariances.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
\name{calculateCovariances}
\alias{calculateCovariances}
\title{Calculate pairwise covariances between transformed raw data vectors}
\description{Calculate pairwise covariances between transformed raw data vectors}
\usage{
calculateCovariances(data_matrix)
}
\arguments{
\item{data_matrix}{An m x n numeric matrix with m variables in rows and n
samples in columns.}
}
\value{
The output is an m x m matrix of calculated covariances
}
\examples{
data(ebmTestdata)
glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"]
glypDat <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
calculateCovariances(glypDat)
}
21 changes: 21 additions & 0 deletions R/EmpiricalBrownsMethod/man/calculateKostCovariance.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
\name{calculateKostCovariance}
\alias{calculateKostCovariance}
\title{Calculate pairwise covariances between transformed raw data vectors}
\description{Calculate pairwise covariances between transformed raw data vectors
using Kost's polynomial fit and the pearson correlation function}
\usage{
calculateKostCovariance(data_matrix)
}
\arguments{
\item{data_matrix}{An m x n numeric matrix with m variables in rows and n
samples in columns.}
}
\value{
The output is an m x m matrix of calculated covariances
}
\examples{
data(ebmTestdata)
glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"]
glypDat <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
calculateKostCovariance(glypDat)
}
12 changes: 10 additions & 2 deletions R/EmpiricalBrownsMethod/man/empiricalBrownsMethod.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ This package provides an empirical adaptation of Brown’s Method (an extension
of Fishers Method) for combining dependent P-values which is appropriate for
highly correlated data sets, like those found in high-throughput biological experiments.}
\usage{
empiricalBrownsMethod(data_matrix, p_values, extra_info)
empiricalBrownsMethod(p_values, data_matrix, covar_matrix, extra_info)
}
\arguments{
\item{p_values}{A numeric vector of p-values with length m.}
\item{data_matrix}{An m x n numeric matrix with m variables in rows and n
samples in columns.}
\item{p_values}{A numeric vector of p-values with length m.}
\item{covar_matrix}{An m x m matrix of pairwise covariances between transformed
raw data vectors. Only one of data_matrix and covar_matrix is
required.}
\item{extra_info}{boolean, TRUE additionally returns the p-value from
Fisher's method, the scale factor c, and the new degrees of
freedom from Brown's Method }
Expand All @@ -33,5 +36,10 @@ highly correlated data sets, like those found in high-throughput biological expe
glypPvals <- allPvals$pvalue.with.CHD4[match(glypGenes, allPvals$gene)];
glypDat <- dat[match(glypGenes, dat$V1), 2:ncol(dat)];
empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=TRUE);
## The covariance matrix can also be calculated before-hand, which is useful
## if you need to run empiricalBrownsMethod multiple times
glypCovs <- calculateCovariances(glypDat)
empiricalBrownsMethod(covar_matrix=glypCovs, p_values=glypPvals, extra_info)
}
\keyword{multivariate}
13 changes: 11 additions & 2 deletions R/EmpiricalBrownsMethod/man/kostsMethod.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ This package provides an implementation of Kost's Method for combining dependent
P-values which is appropriate for highly correlated data sets, like those found in
high-throughput biological experiments.}
\usage{
kostsMethod(data_matrix, p_values, extra_info)
kostsMethod(p_values, data_matrix, covar_matrix, extra_info)
}
\arguments{
\item{p_values}{A numeric vector of p-values with length m.}
\item{data_matrix}{An m x n numeric matrix with m variables in rows and n
samples in columns.}
\item{p_values}{A numeric vector of p-values with length m.}
\item{covar_matrix}{An m x m matrix of pairwise covariances between transformed
raw data vectors. Only one of data_matrix and covar_matrix is
required.}
\item{extra_info}{boolean, TRUE additionally returns the p-value from
Fisher's method, the scale factor c, and the new degrees of
freedom from Brown's Method }
Expand All @@ -33,5 +36,11 @@ high-throughput biological experiments.}
glypPvals <- allPvals$pvalue.with.CHD4[match(glypGenes, allPvals$gene)]
glypDat <- as.matrix(dat[match(glypGenes, dat$V1), 2:ncol(dat)])
kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=TRUE);
## The covariance matrix can also be calculated before-hand, which is useful
## if you need to run empiricalBrownsMethod multiple times
glypCovs <- calculateKostCovariance(glypDat)
kostsMethod(covar_matrix=glypCovs, p_values=glypPvals, extra_info)
}
\keyword{multivariate}
36 changes: 28 additions & 8 deletions R/EmpiricalBrownsMethod/tests/testthat/test_ebm_main.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ test_that("random results are correct", {
rd <- as.matrix(randData[-1,-1])
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
res0 <- empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T)
expected = list(P_Brown=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF_Brown=8.136664603851868)
expected = list(P_test=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF=8.136664603851868)
expect_equal(res0, expected)
})

Expand All @@ -62,7 +62,7 @@ test_that("Kost method, random results are correct", {
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
res0 <- kostsMethod(data_matrix=rd, p_values=pvals, extra_info=T)
#expected = list(P_Brown=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF_Brown=7.1062963459598079)
expected = list(P_Brown=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF_Brown=7.1062963459598079)
expected = list(P_test=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF=7.1062963459598079)
expect_equal(res0, expected)
})

Expand All @@ -71,7 +71,7 @@ test_that("the glypican 3 network example works", {
glypPvals <- as.matrix(allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes])
glypDat <- dat[dat$V1 %in% glypGenes, 2:ncol(dat)]
res0 <- empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)
expected = list(P_Brown=4.821679406409943e-07, P_Fisher=1.438732140605804e-08, Scale_Factor_C=1.297692749787417, DF_Brown=10.78837806737645)
expected = list(P_test=4.821679406409943e-07, P_Fisher=1.438732140605804e-08, Scale_Factor_C=1.297692749787417, DF=10.78837806737645)
expect_equal(res0, expected)
})

Expand All @@ -80,7 +80,7 @@ test_that("Kost method, the glypican 3 network example works", {
glypPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes]
glypDat <- as.matrix(dat[dat$V1 %in% glypGenes, 2:ncol(dat)])
res0 <- kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)
expected = list(P_Brown=7.570776008807226e-07, P_Fisher=1.4387321406058163e-08, Scale_Factor_C=1.349048766471012, DF_Brown=10.377682666448537)
expected = list(P_test=7.570776008807226e-07, P_Fisher=1.4387321406058163e-08, Scale_Factor_C=1.349048766471012, DF=10.377682666448537)
expect_equal(res0, expected)
})

Expand All @@ -89,7 +89,7 @@ test_that("the SUMO pathway example works", {
sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes]
sumoDat <- as.matrix(dat[dat$V1 %in% sumoGenes, 2:ncol(dat)])
res0 <- empiricalBrownsMethod(data_matrix=sumoDat, p_values=sumoPvals, extra_info=T)
expected = list(P_Brown=1.698056395040471e-41, P_Fisher=6.443838824431309e-45, Scale_Factor_C=1.087731057365708, DF_Brown=18.38689799704392)
expected = list(P_test=1.698056395040471e-41, P_Fisher=6.443838824431309e-45, Scale_Factor_C=1.087731057365708, DF=18.38689799704392)
expect_equal(res0, expected)
})

Expand All @@ -98,7 +98,7 @@ test_that("Kost method, the SUMO pathway example works", {
sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes]
sumoDat <- as.matrix(dat[dat$V1 %in% sumoGenes, 2:ncol(dat)])
res0 <- kostsMethod(data_matrix=sumoDat, p_values=sumoPvals, extra_info=T)
expected = list(P_Brown=2.0949187511965534e-39, P_Fisher=6.4438388244313223e-45, Scale_Factor_C=1.1493509132040536, DF_Brown=17.40112594877213)
expected = list(P_test=2.0949187511965534e-39, P_Fisher=6.4438388244313223e-45, Scale_Factor_C=1.1493509132040536, DF=17.40112594877213)
expect_equal(res0, expected)
})

Expand All @@ -107,7 +107,7 @@ test_that("the FOXA1 network example works", {
foxPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% foxGenes]
foxDat <- as.matrix(dat[dat$V1 %in% foxGenes, 2:ncol(dat)])
res0 <- empiricalBrownsMethod(data_matrix=foxDat, p_values=foxPvals, extra_info=T)
expected = list(P_Brown=7.777896979417795e-53, P_Fisher=4.043406925735124e-139, Scale_Factor_C=2.719366560758496, DF_Brown=21.32849643625184)
expected = list(P_test=7.777896979417795e-53, P_Fisher=4.043406925735124e-139, Scale_Factor_C=2.719366560758496, DF=21.32849643625184)
expect_equal(res0, expected)
})

Expand All @@ -116,6 +116,26 @@ test_that("Kost method, the FOXA1 network example works", {
foxPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% foxGenes]
foxDat <- as.matrix(dat[dat$V1 %in% foxGenes, 2:ncol(dat)])
res0 <- kostsMethod(data_matrix=foxDat, p_values=foxPvals, extra_info=T)
expected = list(P_Brown=3.1576893819385723e-57, P_Fisher=4.043406925735029e-139, Scale_Factor_C=2.5008607800331659, DF_Brown=23.192014710723246)
expected = list(P_test=3.1576893819385723e-57, P_Fisher=4.043406925735029e-139, Scale_Factor_C=2.5008607800331659, DF=23.192014710723246)
expect_equal(res0, expected)
})

test_that("random results are correct with covar_matrix", {
a <- as.numeric(randData[1,-1])
rd <- as.matrix(randData[-1,-1])
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
covar_matrix <- calculateCovariances(rd)
res0 <- empiricalBrownsMethod(covar_matrix=covar_matrix, p_values=pvals, extra_info=T)
expected = list(P_test=0.7228817373295435, P_Fisher=0.8613842570343421, Scale_Factor_C=2.45800963585645, DF=8.136664603851868)
expect_equal(res0, expected)
})

test_that("Kost method, random results are correct with covar_matrix", {
a <- as.numeric(randData[1,-1])
rd <- as.matrix(randData[-1,-1])
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
covar_matrix <- calculateKostCovariance(rd)
res0 <- kostsMethod(covar=covar_matrix, p_values=pvals, extra_info=T)
expected = list(P_test=0.70175288327251462, P_Fisher=0.86138425703434118, Scale_Factor_C=2.814405567447344, DF=7.1062963459598079)
expect_equal(res0, expected)
})

0 comments on commit 7ab9c6e

Please sign in to comment.