-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix.py
85 lines (70 loc) · 2.46 KB
/
matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pprint
###############################################################################
categoryExport = {}
currentCategory = None
currentArray = []
for line in open("data/categories2.txt"):
line = line.strip()
if not currentCategory:
currentCategory = line
currentArray = []
continue
if line:
pieces = line.split(" ", 1)
currentArray.append(pieces[1])
else:
categoryExport[currentCategory] = currentArray
currentCategory = None
if currentCategory:
categoryExport[currentCategory] = currentArray
currentCategory = None
###############################################################################
import re
COMMA = "<COMMA>"
QUOTE = "<QUOTE>"
def csvSplit(line):
line = line.strip()
line = line.replace('""', QUOTE)
line = re.sub(r'"([^"]+)"', (lambda m: m.group(1).replace(",", COMMA)), line)
return [cell.replace(COMMA, ",").replace(QUOTE, '"') for cell in line.split(",")]
matrix = [csvSplit(line) for line in open("data/matrix2.csv")]
def tokenize(text): return re.sub(r'[^\w\.]+', "_", text)
def canonicize(name):
if ("Women" in name) and ("Empowerment" in name): return "Women's Empowerment"
if "Chronic Diseases" in name: return "Chronic Diseases & Conditions"
if ("Ecosystems" in name) and ("Biodiversity" in name): return "Ecosystems & Biodiversity Loss"
return name
rows = [ {"token": tokenize(row[0]), "data": row[1:] } for row in matrix[1:]]
nodeList = []
for (i, name) in enumerate(matrix[0][1:]):
outbound = []
nodeData = {"name": name, "outbound": outbound, 'token': tokenize(name)}
for row in rows:
score = float(row["data"][i])
if score > 0.0:
outbound.append({'token': row["token"], 'score': score})
#nodeList.append(nodeData)
for catKey in categoryExport:
catList = categoryExport[catKey]
try:
catList[catList.index(name)] = nodeData
break
except ValueError: pass
else:
print name, "council not found in any category"
#pprint.pprint(categoryExport)
#sys.exit()
###############################################################################
comments = [csvSplit(line) for line in open("data/comments2.csv")]
commentsDict = {}
for row in comments[1:]:
srcName = tokenize(canonicize(row[0]))
for i in range(3, len(row)-1, 2):
dstName = tokenize(canonicize(row[i]))
comment = row[i+1].strip()
if comment:
#print "%s-%s: %s" % (srcName, dstName, comment)
commentsDict["%s-%s" % (srcName, dstName)] = comment
#print ""
import makeData
makeData.dumpToFlex({'categoryExport': categoryExport, 'comments':commentsDict})