Skip to content

Commit

Permalink
Added Kosts Method for Matlab
Browse files Browse the repository at this point in the history
  • Loading branch information
Theo Knijnenburg committed Feb 27, 2016
1 parent ee1d6d2 commit f818a50
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 4 deletions.
69 changes: 69 additions & 0 deletions Matlab/KostsMethod.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
% Copyright 2015, Institute for Systems Biology.
%
% Licensed under the Apache License, Version 2.0 (the "License");
% you may not use this file except in compliance with the License.
% You may obtain a copy of the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS,
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
% See the License for the specific language governing permissions and
% limitations under the License.
%
% Author: William Poole
% Ported to Matlab: Theo Knijnenburg
% Email: [email protected] / [email protected]
% Created: February 2016

function [Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values)

%license, author, date

%% Inputes and outputs

% Input: An m x n data matrix with each of m rows representing a variable and each of n columns representing a sample. Should be of type numpy.array
% A vector of m P-values to combine. May be a list or of type numpy.array.
% Output: A combined P-value.
% If extra_info == True: also returns the p-value from Fisher's method, the scale factor c, and the new degrees of freedom from Brown's Method

%% Compute P-values
covar_matrix = CalculateCovariances(data_matrix);
[Pkost,Pfisher,Ckost,DFkost] = CombinePValues(covar_matrix, p_values);

%% Functions

%Calculate Covariances
function covar_matrix = CalculateCovariances(data_matrix);
[m,n] = size(data_matrix);
cor = corr(data_matrix');
%Kost's polynomial fit
a1 = 3.263;
a2 = 0.710;
a3 = 0.027;
covar_matrix = a1.*cor+a2.*cor.^2+a3.*cor.^3;
end

% Combining P-values
function [p_kost,p_fisher,c,df_kost] = CombinePValues(covar_matrix, p_values);

m = size(covar_matrix,1);
df_fisher = 2.0*m;
Expected = 2.0*m;
cov_sum = sum(sum(covar_matrix))-sum(diag(covar_matrix));
Var = 4.0*m+cov_sum;
c = Var/(2*Expected);
df_kost = 2*(Expected^2)/Var;
if df_kost > df_fisher;
df_kost = df_fisher;
c = 1;
end

x = 2*sum(-log(p_values));
p_kost = chi2cdf(1.0*x/c,df_kost,'upper');
p_fisher = chi2cdf(1.0*x,df_fisher,'upper');
end

end

91 changes: 88 additions & 3 deletions Matlab/WorkFlow.m
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
% Ported to Matlab: Theo Knijnenburg
% Email: [email protected] / [email protected]
% Created: June 2015
% Updated: February 2016

%% Initialize
Init
Expand All @@ -28,7 +29,7 @@
%% Pearson correlation
[R,P] = corr(D(1,:)',D(2:end,:)');

%% Emperical Browns Methods
%% Emperical Browns Method
data_matrix = D(2:end,:);
p_values = P;
[Pbrown,Pfisher,Cbrown,DFbrown] = EmpiricalBrownsMethod(data_matrix, p_values)
Expand All @@ -54,6 +55,30 @@
%
% 8.1367

%% Kosts Method
[Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values)

%% Should give...

% Pkost =
%
% 0.7018
%
%
% Pfisher =
%
% 0.8614
%
%
% Ckost =
%
% 2.8144
%
%
% DFkost =
%
% 7.1063


%% Load TCGA dataset
% parseTCGAdata
Expand All @@ -70,11 +95,12 @@

display(Pathways{p,1});
[Pbrown,Pfisher,Cbrown,DFbrown] = EmpiricalBrownsMethod(data_matrix, p_values)

[Pkost,Pfisher,Ckost,DFkost] = KostsMethod(data_matrix, p_values)
end


%% Should give...

% FOXA1 TRANSCRIPTION FACTOR NETWORK
%
% Pbrown =
Expand All @@ -96,6 +122,26 @@
%
% 21.3285
%
%
% Pkost =
%
% 3.1577e-57
%
%
% Pfisher =
%
% 4.0434e-139
%
%
% Ckost =
%
% 2.5009
%
%
% DFkost =
%
% 23.1920
%
% GLYPICAN 3 NETWORK
%
% Pbrown =
Expand All @@ -117,6 +163,26 @@
%
% 10.7884
%
%
% Pkost =
%
% 7.5708e-07
%
%
% Pfisher =
%
% 1.4387e-08
%
%
% Ckost =
%
% 1.3490
%
%
% DFkost =
%
% 10.3777
%
% SUMOYLATION BY RANBP2 REGULATES TRANSCRIPTIONAL REPRESSION
%
% Pbrown =
Expand All @@ -137,7 +203,26 @@
% DFbrown =
%
% 18.3869

%
%
% Pkost =
%
% 2.0949e-39
%
%
% Pfisher =
%
% 6.4438e-45
%
%
% Ckost =
%
% 1.1494
%
%
% DFkost =
%
% 17.4011



Expand Down
Binary file modified Python/EmpiricalBrownsMethod.pyc
100644 → 100755
Binary file not shown.
43 changes: 43 additions & 0 deletions R/.Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
source("EmpericalBrownsMethod/R/ebm.R")
options(digits=16)
# Test with artificical data
print("****************************************************************")
print("RANDOM DATA")
randData <- read.table("../Data/RandomData.tsv", sep="\t", header=F, stringsAsFactors=F)
a <- as.numeric(randData[1,-1])
rd <- randData[-1,-1]
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
print(empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T))
print("****************************************************************")
source("EmpericalBrownsMethod/R/ebm.R")
source("/EmpericalBrownsMethod/R/ebm.R")
source("EmpiricalBrownsMethod/R/ebm.R")
options(digits=16)
print("****************************************************************")
print("RANDOM DATA")
randData <- read.table("../Data/RandomData.tsv", sep="\t", header=F, stringsAsFactors=F)
a <- as.numeric(randData[1,-1])
rd <- randData[-1,-1]
pvals <- sapply(1:10, function(i) cor.test(a, as.numeric(rd[i,]))$p.value)
print(empiricalBrownsMethod(data_matrix=rd, p_values=pvals, extra_info=T))
print("****************************************************************")
print(KostsMethod(data_matrix=rd, p_values=pvals, extra_info=T))
print("****************************************************************")
print(kostsMethod(data_matrix=rd, p_values=pvals, extra_info=T))
print("****************************************************************")
print(kostsMethod(data_matrix=as.matrix(rd), p_values=pvals, extra_info=T))
print("****************************************************************")
pathways <- read.table("../Data/pathways.tsv", sep="\t", header=T, stringsAsFactors=F)
allPvals <- read.table("../Data/CDH4_Pvalues.tsv", sep="\t", stringsAsFactors=F, header=T )
dat <- read.table("../Data/ReducedFeatureMatrix.tsv", sep="\t", stringsAsFactors=F, header=F)
allPvals <- (unique(allPvals))
print("Glypican 3 Network")
glypGenes <- pathways$gene[pathways$pathway == "GLYPICAN 3 NETWORK"]
glypPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% glypGenes]
glypDat <- dat[dat$V1 %in% glypGenes, 2:ncol(dat)]
print(empiricalBrownsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T))
print("****************************************************************")
print(kostsMethod(data_matrix=glypDat, p_values=glypPvals, extra_info=T))
print("****************************************************************")
print(kostsMethod(data_matrix=as.matrix(glypDat), p_values=glypPvals, extra_info=T))
print("****************************************************************")
36 changes: 35 additions & 1 deletion R/WorkFlow.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
# Ported to R: David L Gibbs
# Email: [email protected] / [email protected] / [email protected]
# Created: June 2015
# Updated: February 2016

source("EmpericalBrownsMethod/R/ebm.R")
source("EmpiricalBrownsMethod/R/ebm.R")
options(digits=16)

# Test with artificical data
Expand All @@ -44,6 +45,23 @@ print("****************************************************************")
#$DF_Brown
#[1] 8.136664603851868

print(kostsMethod(data_matrix=as.matrix(rd), p_values=pvals, extra_info=T))
print("****************************************************************")

#Should give:
# $P_test
# [1] 0.701752883272515
#
# $P_Fisher
# [1] 0.8613842570343421
#
# $Scale_Factor_C
# [1] 2.814405567447344
#
# $DF
# [1] 7.106296345959808


# Test with cancer data #

pathways <- read.table("../Data/pathways.tsv", sep="\t", header=T, stringsAsFactors=F)
Expand Down Expand Up @@ -71,6 +89,22 @@ print("****************************************************************")
#$DF_Brown
#[1] 10.78837806737645

print(kostsMethod(data_matrix=as.matrix(glypDat), p_values=glypPvals, extra_info=T))
print("****************************************************************")

#Should give:
# $P_test
# [1] 7.570776008807138e-07
#
# $P_Fisher
# [1] 1.438732140605804e-08
#
# $Scale_Factor_C
# [1] 1.349048766471012
#
# $DF
# [1] 10.3776826664485

print("SUMO Reg")
sumoGenes <- pathways$gene[pathways$pathway == "SUMOYLATION BY RANBP2 REGULATES TRANSCRIPTIONAL REPRESSION"]
sumoPvals <- allPvals$pvalue.with.CHD4[allPvals$gene %in% sumoGenes]
Expand Down
14 changes: 14 additions & 0 deletions gitsteps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
(on remus)

git init
git pull https://github.com/IlyaLab/CombiningDependentPvaluesUsingEBM

git status
git add *
git status
git commit -m 'Initial commit'
git push https://github.com/IlyaLab/CombiningDependentPvaluesUsingEBM




0 comments on commit f818a50

Please sign in to comment.