-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_ultrametric.py
139 lines (114 loc) · 4.24 KB
/
compute_ultrametric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# coding:utf-8
import numpy as np
""" FUNCTIONS """
def loop_ultrametric_inner_state():
""" Iterates the ultrametric_inner_state()
for all states except District of Columbia. """
for state in states:
if state != 'District of Columbia':
ultrametric_inner_state(state)
def max_prod(mat1, mat2):
""" max_prod function
input: mat1, mat2 square matrices of same order
output: square matrix with (i,j) entry equals to
min_max(mat1, mat2, i, j) """
n = len(mat1)
tmp = np.zeros((n, n), dtype=float)
for i in range(n):
print 'max_prod: ', i
for j in range(i, n):
tmp[i, j] = min_max(mat1, mat2, i, j)
tmp[j, i] = min_max(mat2, mat1, j, i)
return tmp
def min_max(mat1, mat2, i, j):
""" min_max function
input: mat1, mat2 square matrices of same order
i, j positive integers
output: the minimum of the maximum pointwise vector
from mat1(row i) and mat2(col j) """
return min(np.maximum(mat1[i, :], mat2[:, j]))
def power(mat):
""" power function
computes the iterated max_prod
input: mat n-squared matrix
output: mat**(n-1) """
tmp = mat
n = len(tmp)
k = 1
while 2 ** (k - 1) < n:
# print k,2**k, n
tmp = max_prod(tmp, tmp)
k += 1
return tmp
def u_nr(mat, output_file='uNR.csv'):
""" non-reciprocal ultra metric function
input: mat squared matrix
output: ultra metric matrix by means of
non-reciprocal clustering method """
print 'Computing non-reciprocal ultra metric. Wait... ',
tmp1 = power(mat)
tmp2 = tmp1.T
tmp = np.maximum(tmp1, tmp2)
np.savetxt('ultra_metric/'+output_file.replace(' ', '_'), tmp, fmt='%f',
delimiter=',')
print 'saved in %s' % output_file.replace(' ', '_')
return tmp
def u_r(mat, output_file='uR.csv'):
""" reciprocal ultra metric function
input: mat squared matrix
output: ultra metric matrix by means of
reciprocal clustering method """
print 'Computing reciprocal ultra metric. Wait... ',
a_bar = np.maximum(mat, mat.T)
tmp = power(a_bar)
np.savetxt('ultra_metric/'+output_file.replace(' ', '_'), tmp, fmt='%f',
delimiter=',')
print 'saved in %s' % output_file.replace(' ', '_')
return tmp
def ultrametric_inner_state(state):
""" This function computes the reciprocal and non reciprocal ultrametric
for the inner migration flow in state. """
print '\n** %s' % state
mat = np.loadtxt('migration_matrices/mat_%s_to_%s.csv' % (state, state),
dtype=float, delimiter=',')
w_mat1, w_mat2 = weight_fc(mat)
u_r(w_mat1, output_file='col_uR_%s.csv' % state)
u_nr(w_mat1, output_file='col_uNR_%s.csv' % state)
u_r(w_mat2, output_file='row_uR_%s.csv' % state)
u_nr(w_mat2, output_file='row_uNR_%s.csv' % state)
def ultrametric_national():
""" This function computes the both reciprocal and non-reciprocal
ultrametric for the national migration flow.
This function takes hours to finish if matrix dimension is too big. """
print 'This function needs time to finish.'
mat = np.loadtxt(migration_matrix_file, dtype=float, delimiter=',')
w_mat1, w_mat2 = weight_fc(mat)
u_r(w_mat1, output_file='col_uR.csv')
u_nr(w_mat1, output_file='col_uNR.csv')
u_r(w_mat2, output_file='row_uR.csv')
u_nr(w_mat2, output_file='row_uNR.csv')
def weight_fc(mat):
""" weight function
input: 1 square matrix
output: 2 square matrices """
# global i, j, n
n = len(mat)
tmp1 = np.zeros((n, n), dtype=float)
tmp2 = np.zeros((n, n), dtype=float)
for j in range(n):
sum_col = max(mat[:, j].sum(), 1)
sum_row = max(mat[j, :].sum(), 1)
for i in range(n):
tmp1[i, j] = 1 - float(mat[i, j]) / sum_col
tmp2[i, j] = 1 - float(mat[i, j]) / sum_row
tmp1[j, j] = 0
tmp2[j, j] = 0
# del i, j, n
return tmp1, tmp2
""" MAIN CODE """
migration_matrix_file = 'migration_matrix.csv'
states = np.loadtxt('prepare_data_states_num_counties.csv', dtype=str,
delimiter=',')[:, 0]
# loop_ultrametric_inner_state()
""" This function takes hours to finish. """
ultrametric_national()