idekerlab · GuerreroVazquez · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/network_evaluation_tools/network_propagation.py b/network_evaluation_tools/network_propagation.py
@@ -8,70 +8,76 @@
 import pandas as pd
 import copy
 
+
 # Normalize network (or network subgraph) for random walk propagation
 def normalize_network(network, symmetric_norm=False):
-	adj_mat = nx.adjacency_matrix(network)
-	adj_array = np.array(adj_mat.todense())
-	if symmetric_norm:
-		D = np.diag(1/np.sqrt(sum(adj_array)))
-		adj_array_norm = np.dot(np.dot(D, adj_array), D)
-	else:
-		degree_norm_array = np.diag(1/sum(adj_array).astype(float))
-		sparse_degree_norm_array = scipy.sparse.csr_matrix(degree_norm_array)
-		adj_array_norm = sparse_degree_norm_array.dot(adj_mat).toarray()
-	return adj_array_norm
+    adj_mat = nx.adjacency_matrix(network)
+    adj_array = np.array(adj_mat.todense())
+    if symmetric_norm:
+        D = np.diag(1 / np.sqrt(sum(adj_array)))
+        adj_array_norm = np.dot(np.dot(D, adj_array), D)
+    else:
+        degree_norm_array = np.diag(1 / sum(adj_array).astype(float))
+        sparse_degree_norm_array = scipy.sparse.csr_matrix(degree_norm_array)
+        adj_array_norm = sparse_degree_norm_array.dot(adj_mat).toarray()
+    return adj_array_norm
+
+
 # Note about normalizing by degree, if multiply by degree_norm_array first (D^-1 * A), then do not need to return
 # transposed adjacency array, it is already in the correct orientation
 
 # Calculate optimal propagation coefficient (updated model)
 def calculate_alpha(network, m=-0.02935302, b=0.74842057):
-	log_edge_count = np.log10(len(network.edges()))
-	alpha_val = round(m*log_edge_count+b,3)
-	if alpha_val <=0:
-		raise ValueError('Alpha <= 0 - Network Edge Count is too high')
-		# There should never be a case where Alpha >= 1, as avg node degree will never be negative
-	else:
-		return alpha_val
+    log_edge_count = np.log10(len(network.edges()))
+    alpha_val = round(m * log_edge_count + b, 3)
+    if alpha_val <= 0:
+        raise ValueError('Alpha <= 0 - Network Edge Count is too high')
+    # There should never be a case where Alpha >= 1, as avg node degree will never be negative
+    else:
+        return alpha_val
+
 
 # Closed form random-walk propagation (as seen in HotNet2) for each subgraph: Ft = (1-alpha)*Fo * (I-alpha*norm_adj_mat)^-1
 # Concatenate to previous set of subgraphs
 def fast_random_walk(alpha, binary_mat, subgraph_norm, prop_data):
-	term1=(1-alpha)*binary_mat
-	term2=np.identity(binary_mat.shape[1])-alpha*subgraph_norm
-	term2_inv = np.linalg.inv(term2)
-	subgraph_prop = np.dot(term1, term2_inv)
-	return np.concatenate((prop_data, subgraph_prop), axis=1)
+    term1 = (1 - alpha) * binary_mat
+    term2 = np.identity(binary_mat.shape[1]) - alpha * subgraph_norm
+    term2_inv = np.linalg.inv(term2)
+    subgraph_prop = np.dot(term1, term2_inv)
+    return np.concatenate((prop_data, subgraph_prop), axis=1)
+
 
 # Wrapper for random walk propagation of full network by subgraphs
-def closed_form_network_propagation(network, binary_matrix, network_alpha, symmetric_norm=False,  verbose=False, save_path=None):
-	starttime=time.time()
-	if verbose:
-		print 'Alpha:', network_alpha
-	# Separate network into connected components and calculate propagation values of each sub-sample on each connected component
-	subgraphs = list(nx.connected_component_subgraphs(network))
-	# Initialize propagation results by propagating first subgraph
-	subgraph = subgraphs[0]
-	subgraph_nodes = list(subgraph.nodes)
-	prop_data_node_order = list(subgraph_nodes)
-	binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
-	subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
-	prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
-	prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
-	# Get propagated results for remaining subgraphs
-	for subgraph in subgraphs[1:]:
-		subgraph_nodes = list(subgraph.nodes)
-		prop_data_node_order = prop_data_node_order + subgraph_nodes
-		binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
-		subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
-		prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data)
-	# Return propagated result as dataframe
-	prop_data_df = pd.DataFrame(data=prop_data[:,1:], index = binary_matrix.index, columns=prop_data_node_order)
-	if save_path is None:
-		if verbose:
-			print 'Network Propagation Complete:', time.time()-starttime, 'seconds'		
-		return prop_data_df
-	else:
-		prop_data_df.to_csv(save_path)
-		if verbose:
-			print 'Network Propagation Complete:', time.time()-starttime, 'seconds'				
-		return prop_data_df
+def closed_form_network_propagation(network, binary_matrix, network_alpha, symmetric_norm=False, verbose=False,
+                                    save_path=None):
+    starttime = time.time()
+    if verbose:
+        print 'Alpha:', network_alpha
+    # Separate network into connected components and calculate propagation values of each sub-sample on each connected component
+    subgraphs = list(nx.connected_component_subgraphs(network))
+    # Initialize propagation results by propagating first subgraph
+    subgraph = subgraphs[0]
+    subgraph_nodes = list(subgraph.nodes)
+    prop_data_node_order = list(subgraph_nodes)
+    binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
+    subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
+    prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
+    prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
+    # Get propagated results for remaining subgraphs
+    for subgraph in subgraphs[1:]:
+        subgraph_nodes = list(subgraph.nodes)
+        prop_data_node_order = prop_data_node_order + subgraph_nodes
+        binary_matrix_filt = np.array(binary_matrix.T.ix[subgraph_nodes].fillna(0).T)
+        subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
+        prop_data = fast_random_walk(network_alpha, binary_matrix_filt, subgraph_norm, prop_data)
+    # Return propagated result as dataframe
+    prop_data_df = pd.DataFrame(data=prop_data[:, 1:], index=binary_matrix.index, columns=prop_data_node_order)
+    if save_path is None:
+        if verbose:
+            print 'Network Propagation Complete:', time.time() - starttime, 'seconds'
+        return prop_data_df
+    else:
+        prop_data_df.to_csv(save_path)
+        if verbose:
+            print 'Network Propagation Complete:', time.time() - starttime, 'seconds'
+        return prop_data_df
diff --git a/test_suite/__init__.py b/test_suite/__init__.py
diff --git a/test_suite/test_network_evaluation.py b/test_suite/test_network_evaluation.py
@@ -0,0 +1,52 @@
+import math
+import networkx as nx
+
+import pytest
+from network_evaluation_tools import data_import_tools as dit
+from network_evaluation_tools import network_evaluation_functions as nef
+from network_evaluation_tools import network_propagation as prop
+import pandas as pd
+import numpy as np
+import pickle
+
+network_test_file = '../Data/Networks/YoungvsOld_UP.csv'
+disease_test_file = '../Data/Evaluations/DisGeNET_genesets.txt'
+networkx_test_file = '../Data/NetworkCYJS/graph1_Young_Old_Fuzzy_95.pkl'
+
+AUPRC_values = {'Carcinoma, Lewis Lung': 0.5136054421768708, 'Fanconi Anemia': 0.5048184241212726,
+                'Endometrial adenocarcinoma': 0.5036461554318696, 'Follicular adenoma': -1.0,
+                'Intracranial Aneurysm': -1.0}
+network = dit.load_network_file('../Data/Networks/YoungvsOld_UP.csv', delimiter=',', verbose=True)
+genesets = dit.load_node_sets('../Data/Evaluations/DisGeNET_genesets.txt')
+genesets = {'Carcinoma, Lewis Lung': genesets['Carcinoma, Lewis Lung'],
+            'Fanconi Anemia': genesets['Fanconi Anemia'],
+            'Endometrial adenocarcinoma': genesets['Endometrial adenocarcinoma'],
+            'Follicular adenoma': genesets['Follicular adenoma'],
+            'Intracranial Aneurysm': genesets['Intracranial Aneurysm'],
+            'Muscle Weakness': genesets['Muscle Weakness']
+            }
+genesets_p = {'Carcinoma, Lewis Lung': 0.5921,
+              'Fanconi Anemia': 0.5589,
+              'Endometrial adenocarcinoma': 0.5921,
+              'Follicular adenoma': 0.649,
+              'Intracranial Aneurysm': float('inf'),
+              'Muscle Weakness': float('inf')}
+alpha = 0.684
+
+
+def test_construct_prop_kernel():
+    """
+    This test generates the kernel based on a specific network \
+    of 206 nodes.
+
+    :return:
+    """
+    _network = dit.load_network_file(network_test_file, delimiter=',', verbose=True)
+    _gene_sets = dit.load_node_sets(disease_test_file)
+    _gene_sets_p = nef.calculate_p(_network, _gene_sets)  # calculate the sub-sampling rate p for each node set
+    _alpha = prop.calculate_alpha(_network)  # Calculate the Network Alpha
+    kernel = nef.construct_prop_kernel(_network, alpha=_alpha, verbose=True)
+    assert isinstance(kernel, pd.DataFrame)
+    assert kernel.shape == (len(_network.nodes), len(_network.nodes))  # Propagate using the random walk model
+
+