Skip to content

Adding test construct prop kernel #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 60 additions & 54 deletions network_evaluation_tools/network_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,70 +8,76 @@
import pandas as pd
import copy


# Normalize network (or network subgraph) for random walk propagation
def normalize_network(network, symmetric_norm=False):
adj_mat = nx.adjacency_matrix(network)
adj_array = np.array(adj_mat.todense())
if symmetric_norm:
D = np.diag(1/np.sqrt(sum(adj_array)))
adj_array_norm = np.dot(np.dot(D, adj_array), D)
else:
degree_norm_array = np.diag(1/sum(adj_array).astype(float))
sparse_degree_norm_array = scipy.sparse.csr_matrix(degree_norm_array)
adj_array_norm = sparse_degree_norm_array.dot(adj_mat).toarray()
return adj_array_norm
adj_mat = nx.adjacency_matrix(network)
adj_array = np.array(adj_mat.todense())
if symmetric_norm:
D = np.diag(1 / np.sqrt(sum(adj_array)))
adj_array_norm = np.dot(np.dot(D, adj_array), D)
else:
degree_norm_array = np.diag(1 / sum(adj_array).astype(float))
sparse_degree_norm_array = scipy.sparse.csr_matrix(degree_norm_array)
adj_array_norm = sparse_degree_norm_array.dot(adj_mat).toarray()
return adj_array_norm


# Note about normalizing by degree, if multiply by degree_norm_array first (D^-1 * A), then do not need to return
# transposed adjacency array, it is already in the correct orientation

# Calculate optimal propagation coefficient (updated model)
def calculate_alpha(network, m=-0.02935302, b=0.74842057):
log_edge_count = np.log10(len(network.edges()))
alpha_val = round(m*log_edge_count+b,3)
if alpha_val <=0:
raise ValueError('Alpha <= 0 - Network Edge Count is too high')
# There should never be a case where Alpha >= 1, as avg node degree will never be negative
else:
return alpha_val
log_edge_count = np.log10(len(network.edges()))
alpha_val = round(m * log_edge_count + b, 3)
if alpha_val <= 0:
raise ValueError('Alpha <= 0 - Network Edge Count is too high')
# There should never be a case where Alpha >= 1, as avg node degree will never be negative
else:
return alpha_val


# Closed form random-walk propagation (as seen in HotNet2) for each subgraph: Ft = (1-alpha)*Fo * (I-alpha*norm_adj_mat)^-1
# Concatenate to previous set of subgraphs
def fast_random_walk(alpha, binary_mat, subgraph_norm, prop_data):
term1=(1-alpha)*binary_mat
term2=np.identity(binary_mat.shape[1])-alpha*subgraph_norm
term2_inv = np.linalg.inv(term2)
subgraph_prop = np.dot(term1, term2_inv)
return np.concatenate((prop_data, subgraph_prop), axis=1)
term1 = (1 - alpha) * binary_mat
term2 = np.identity(binary_mat.shape[1]) - alpha * subgraph_norm
term2_inv = np.linalg.inv(term2)
subgraph_prop = np.dot(term1, term2_inv)
return np.concatenate((prop_data, subgraph_prop), axis=1)


# Wrapper for random walk propagation of full network by subgraphs
def closed_form_network_propagation(network, binary_matrix, network_alpha, symmetric_norm=False, verbose=False, save_path=None):
starttime=time.time()
if verbose:
print 'Alpha:', network_alpha
# Separate network into connected components and calculate propagation values of each sub-sample on each connected component
subgraphs = list(nx.connected_component_subgraphs(network))
# Initialize propagation results by propagating first subgraph
subgraph = subgraphs[0]
subgraph_nodes = list(subgraph.nodes)
prop_data_node_order = list(subgraph_nodes)
binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
# Get propagated results for remaining subgraphs
for subgraph in subgraphs[1:]:
subgraph_nodes = list(subgraph.nodes)
prop_data_node_order = prop_data_node_order + subgraph_nodes
binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data)
# Return propagated result as dataframe
prop_data_df = pd.DataFrame(data=prop_data[:,1:], index = binary_matrix.index, columns=prop_data_node_order)
if save_path is None:
if verbose:
print 'Network Propagation Complete:', time.time()-starttime, 'seconds'
return prop_data_df
else:
prop_data_df.to_csv(save_path)
if verbose:
print 'Network Propagation Complete:', time.time()-starttime, 'seconds'
return prop_data_df
def closed_form_network_propagation(network, binary_matrix, network_alpha, symmetric_norm=False, verbose=False,
save_path=None):
starttime = time.time()
if verbose:
print 'Alpha:', network_alpha
# Separate network into connected components and calculate propagation values of each sub-sample on each connected component
subgraphs = list(nx.connected_component_subgraphs(network))
# Initialize propagation results by propagating first subgraph
subgraph = subgraphs[0]
subgraph_nodes = list(subgraph.nodes)
prop_data_node_order = list(subgraph_nodes)
binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
# Get propagated results for remaining subgraphs
for subgraph in subgraphs[1:]:
subgraph_nodes = list(subgraph.nodes)
prop_data_node_order = prop_data_node_order + subgraph_nodes
binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data)
# Return propagated result as dataframe
prop_data_df = pd.DataFrame(data=prop_data[:, 1:], index=binary_matrix.index, columns=prop_data_node_order)
if save_path is None:
if verbose:
print 'Network Propagation Complete:', time.time() - starttime, 'seconds'
return prop_data_df
else:
prop_data_df.to_csv(save_path)
if verbose:
print 'Network Propagation Complete:', time.time() - starttime, 'seconds'
return prop_data_df
Empty file added test_suite/__init__.py
Empty file.
52 changes: 52 additions & 0 deletions test_suite/test_network_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import math
import networkx as nx

import pytest
from network_evaluation_tools import data_import_tools as dit
from network_evaluation_tools import network_evaluation_functions as nef
from network_evaluation_tools import network_propagation as prop
import pandas as pd
import numpy as np
import pickle

network_test_file = '../Data/Networks/YoungvsOld_UP.csv'
disease_test_file = '../Data/Evaluations/DisGeNET_genesets.txt'
networkx_test_file = '../Data/NetworkCYJS/graph1_Young_Old_Fuzzy_95.pkl'

AUPRC_values = {'Carcinoma, Lewis Lung': 0.5136054421768708, 'Fanconi Anemia': 0.5048184241212726,
'Endometrial adenocarcinoma': 0.5036461554318696, 'Follicular adenoma': -1.0,
'Intracranial Aneurysm': -1.0}
network = dit.load_network_file('../Data/Networks/YoungvsOld_UP.csv', delimiter=',', verbose=True)
genesets = dit.load_node_sets('../Data/Evaluations/DisGeNET_genesets.txt')
genesets = {'Carcinoma, Lewis Lung': genesets['Carcinoma, Lewis Lung'],
'Fanconi Anemia': genesets['Fanconi Anemia'],
'Endometrial adenocarcinoma': genesets['Endometrial adenocarcinoma'],
'Follicular adenoma': genesets['Follicular adenoma'],
'Intracranial Aneurysm': genesets['Intracranial Aneurysm'],
'Muscle Weakness': genesets['Muscle Weakness']
}
genesets_p = {'Carcinoma, Lewis Lung': 0.5921,
'Fanconi Anemia': 0.5589,
'Endometrial adenocarcinoma': 0.5921,
'Follicular adenoma': 0.649,
'Intracranial Aneurysm': float('inf'),
'Muscle Weakness': float('inf')}
alpha = 0.684


def test_construct_prop_kernel():
"""
This test generates the kernel based on a specific network \
of 206 nodes.

:return:
"""
_network = dit.load_network_file(network_test_file, delimiter=',', verbose=True)
_gene_sets = dit.load_node_sets(disease_test_file)
_gene_sets_p = nef.calculate_p(_network, _gene_sets) # calculate the sub-sampling rate p for each node set
_alpha = prop.calculate_alpha(_network) # Calculate the Network Alpha
kernel = nef.construct_prop_kernel(_network, alpha=_alpha, verbose=True)
assert isinstance(kernel, pd.DataFrame)
assert kernel.shape == (len(_network.nodes), len(_network.nodes)) # Propagate using the random walk model