-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Theo Knijnenburg
committed
Feb 27, 2016
1 parent
ee1d6d2
commit f818a50
Showing
6 changed files
with
249 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
% Copyright 2015, Institute for Systems Biology. | ||
% | ||
% Licensed under the Apache License, Version 2.0 (the "License"); | ||
% you may not use this file except in compliance with the License. | ||
% You may obtain a copy of the License at | ||
% | ||
% http://www.apache.org/licenses/LICENSE-2.0 | ||
% | ||
% Unless required by applicable law or agreed to in writing, software | ||
% distributed under the License is distributed on an "AS IS" BASIS, | ||
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
% See the License for the specific language governing permissions and | ||
% limitations under the License. | ||
% | ||
% Author: William Poole | ||
% Ported to Matlab: Theo Knijnenburg | ||
% Email: [email protected] / [email protected] | ||
% Created: February 2016 | ||
|
||
function [Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values) | ||
|
||
%license, author, date | ||
|
||
%% Inputes and outputs | ||
|
||
% Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numpy.array | ||
% A vector of m P-values to combine. May be a list or of type numpy.array. | ||
% Output: A combined P-value. | ||
% If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method | ||
|
||
%% Compute P-values | ||
covar_matrix = CalculateCovariances(data_matrix); | ||
[Pkost,Pfisher,Ckost,DFkost] = CombinePValues(covar_matrix, p_values); | ||
|
||
%% Functions | ||
|
||
%Calculate Covariances | ||
function covar_matrix = CalculateCovariances(data_matrix); | ||
[m,n] = size(data_matrix); | ||
cor = corr(data_matrix'); | ||
%Kost's polynomial fit | ||
a1 = 3.263; | ||
a2 = 0.710; | ||
a3 = 0.027; | ||
covar_matrix = a1.*cor+a2.*cor.^2+a3.*cor.^3; | ||
end | ||
|
||
% Combining P-values | ||
function [p_kost,p_fisher,c,df_kost] = CombinePValues(covar_matrix, p_values); | ||
|
||
m = size(covar_matrix,1); | ||
df_fisher = 2.0*m; | ||
Expected = 2.0*m; | ||
cov_sum = sum(sum(covar_matrix))-sum(diag(covar_matrix)); | ||
Var = 4.0*m+cov_sum; | ||
c = Var/(2*Expected); | ||
df_kost = 2*(Expected^2)/Var; | ||
if df_kost > df_fisher; | ||
df_kost = df_fisher; | ||
c = 1; | ||
end | ||
|
||
x = 2*sum(-log(p_values)); | ||
p_kost = chi2cdf(1.0*x/c,df_kost,'upper'); | ||
p_fisher = chi2cdf(1.0*x,df_fisher,'upper'); | ||
end | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
% Ported to Matlab: Theo Knijnenburg | ||
% Email: [email protected] / [email protected] | ||
% Created: June 2015 | ||
% Updated: February 2016 | ||
|
||
%% Initialize | ||
Init | ||
|
@@ -28,7 +29,7 @@ | |
%% Pearson correlation | ||
[R,P] = corr(D(1,:)',D(2:end,:)'); | ||
|
||
%% Emperical Browns Methods | ||
%% Emperical Browns Method | ||
data_matrix = D(2:end,:); | ||
p_values = P; | ||
[Pbrown,Pfisher,Cbrown,DFbrown] = EmpiricalBrownsMethod(data_matrix, p_values) | ||
|
@@ -54,6 +55,30 @@ | |
% | ||
% 8.1367 | ||
|
||
%% Kosts Method | ||
[Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values) | ||
|
||
%% Should give... | ||
|
||
% Pkost = | ||
% | ||
% 0.7018 | ||
% | ||
% | ||
% Pfisher = | ||
% | ||
% 0.8614 | ||
% | ||
% | ||
% Ckost = | ||
% | ||
% 2.8144 | ||
% | ||
% | ||
% DFkost = | ||
% | ||
% 7.1063 | ||
|
||
|
||
%% Load TCGA dataset | ||
% parseTCGAdata | ||
|
@@ -70,11 +95,12 @@ | |
|
||
display(Pathways{p,1}); | ||
[Pbrown,Pfisher,Cbrown,DFbrown] = EmpiricalBrownsMethod(data_matrix, p_values) | ||
|
||
[Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values) | ||
end | ||
|
||
|
||
%% Should give... | ||
|
||
% FOXA1 TRANSCRIPTION FACTOR NETWORK | ||
% | ||
% Pbrown = | ||
|
@@ -96,6 +122,26 @@ | |
% | ||
% 21.3285 | ||
% | ||
% | ||
% Pkost = | ||
% | ||
% 3.1577e-57 | ||
% | ||
% | ||
% Pfisher = | ||
% | ||
% 4.0434e-139 | ||
% | ||
% | ||
% Ckost = | ||
% | ||
% 2.5009 | ||
% | ||
% | ||
% DFkost = | ||
% | ||
% 23.1920 | ||
% | ||
% GLYPICAN 3 NETWORK | ||
% | ||
% Pbrown = | ||
|
@@ -117,6 +163,26 @@ | |
% | ||
% 10.7884 | ||
% | ||
% | ||
% Pkost = | ||
% | ||
% 7.5708e-07 | ||
% | ||
% | ||
% Pfisher = | ||
% | ||
% 1.4387e-08 | ||
% | ||
% | ||
% Ckost = | ||
% | ||
% 1.3490 | ||
% | ||
% | ||
% DFkost = | ||
% | ||
% 10.3777 | ||
% | ||
% SUMOYLATION BY RANBP2 REGULATES TRANSCRIPTIONAL REPRESSION | ||
% | ||
% Pbrown = | ||
|
@@ -137,7 +203,26 @@ | |
% DFbrown = | ||
% | ||
% 18.3869 | ||
|
||
% | ||
% | ||
% Pkost = | ||
% | ||
% 2.0949e-39 | ||
% | ||
% | ||
% Pfisher = | ||
% | ||
% 6.4438e-45 | ||
% | ||
% | ||
% Ckost = | ||
% | ||
% 1.1494 | ||
% | ||
% | ||
% DFkost = | ||
% | ||
% 17.4011 | ||
|
||
|
||
|
||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
source("EmpericalBrownsMethod/R/ebm.R") | ||
options(digits=16) | ||
# Test with artificical data | ||
print("****************************************************************") | ||
print("RANDOM DATA") | ||
randData <- read.table("../Data/RandomData.tsv", sep="\t", header=F, stringsAsFactors=F) | ||
a <- as.numeric(randData[1,-1]) | ||
rd <- randData[-1,-1] | ||
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value) | ||
print(empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
source("EmpericalBrownsMethod/R/ebm.R") | ||
source("/EmpericalBrownsMethod/R/ebm.R") | ||
source("EmpiricalBrownsMethod/R/ebm.R") | ||
options(digits=16) | ||
print("****************************************************************") | ||
print("RANDOM DATA") | ||
randData <- read.table("../Data/RandomData.tsv", sep="\t", header=F, stringsAsFactors=F) | ||
a <- as.numeric(randData[1,-1]) | ||
rd <- randData[-1,-1] | ||
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value) | ||
print(empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
print(KostsMethod(data_matrix=rd, p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
print(kostsMethod(data_matrix=rd, p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
print(kostsMethod(data_matrix=as.matrix(rd), p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
pathways <- read.table("../Data/pathways.tsv", sep="\t", header=T, stringsAsFactors=F) | ||
allPvals <- read.table("../Data/CDH4_Pvalues.tsv", sep="\t", stringsAsFactors=F, header=T ) | ||
dat <- read.table("../Data/ReducedFeatureMatrix.tsv", sep="\t", stringsAsFactors=F, header=F) | ||
allPvals <- (unique(allPvals)) | ||
print("Glypican 3 Network") | ||
glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"] | ||
glypPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes] | ||
glypDat <- dat[dat$V1 %in% glypGenes, 2:ncol(dat)] | ||
print(empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)) | ||
print("****************************************************************") | ||
print(kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T)) | ||
print("****************************************************************") | ||
print(kostsMethod(data_matrix=as.matrix(glypDat), p_values=glypPvals, extra_info=T)) | ||
print("****************************************************************") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,9 @@ | |
# Ported to R: David L Gibbs | ||
# Email: [email protected] / [email protected] / [email protected] | ||
# Created: June 2015 | ||
# Updated: February 2016 | ||
|
||
source("EmpericalBrownsMethod/R/ebm.R") | ||
source("EmpiricalBrownsMethod/R/ebm.R") | ||
options(digits=16) | ||
|
||
# Test with artificical data | ||
|
@@ -44,6 +45,23 @@ print("****************************************************************") | |
#$DF_Brown | ||
#[1] 8.136664603851868 | ||
|
||
print(kostsMethod(data_matrix=as.matrix(rd), p_values=pvals, extra_info=T)) | ||
print("****************************************************************") | ||
|
||
#Should give: | ||
# $P_test | ||
# [1] 0.701752883272515 | ||
# | ||
# $P_Fisher | ||
# [1] 0.8613842570343421 | ||
# | ||
# $Scale_Factor_C | ||
# [1] 2.814405567447344 | ||
# | ||
# $DF | ||
# [1] 7.106296345959808 | ||
|
||
|
||
# Test with cancer data # | ||
|
||
pathways <- read.table("../Data/pathways.tsv", sep="\t", header=T, stringsAsFactors=F) | ||
|
@@ -71,6 +89,22 @@ print("****************************************************************") | |
#$DF_Brown | ||
#[1] 10.78837806737645 | ||
|
||
print(kostsMethod(data_matrix=as.matrix(glypDat), p_values=glypPvals, extra_info=T)) | ||
print("****************************************************************") | ||
|
||
#Should give: | ||
# $P_test | ||
# [1] 7.570776008807138e-07 | ||
# | ||
# $P_Fisher | ||
# [1] 1.438732140605804e-08 | ||
# | ||
# $Scale_Factor_C | ||
# [1] 1.349048766471012 | ||
# | ||
# $DF | ||
# [1] 10.3776826664485 | ||
|
||
print("SUMO Reg") | ||
sumoGenes <- pathways$gene[pathways$pathway == "SUMOYLATION BY RANBP2 REGULATES TRANSCRIPTIONAL REPRESSION"] | ||
sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
(on remus) | ||
|
||
git init | ||
git pull https://github.com/IlyaLab/CombiningDependentPvaluesUsingEBM | ||
|
||
git status | ||
git add * | ||
git status | ||
git commit -m 'Initial commit' | ||
git push https://github.com/IlyaLab/CombiningDependentPvaluesUsingEBM | ||
|
||
|
||
|
||
|