-
Notifications
You must be signed in to change notification settings - Fork 3
/
aggregate_csv_comparator.py
74 lines (47 loc) · 1.84 KB
/
aggregate_csv_comparator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sys
import argparse
import re
import pymongo
from numpy import array, random
import utils
import mongodb_config
def main():
parser = argparse.ArgumentParser(description = 'Analyze HIT results submitted by Amazon Mechnical Turk workers.')
parser.add_argument('-f', action = 'append', help = 'The mtk data source file.')
parser.add_argument('-o', help = 'The output file of used data.')
args = parser.parse_args()
(header, data) = load_multi_data(args)
if (args.o != None):
output_comparator_file(header, data, args.o)
def output_comparator_file(header, data, filename):
output = []
output.append(', '.join(header))
for map_key, content in data.iteritems():
output.append(map_key + ', ' + ', '.join(content))
utils.write_file(output, filename)
def load_multi_data(args):
header = []
data = {}
if (args.f != None):
if not isinstance(args.f, basestring):
for afile in args.f:
file_lines = utils.load_file(afile)
count = 0
for line in file_lines:
org_line = line
line = line.rsplit(',')
if (len(line) <= 1):
line = org_line.rsplit("\t")
if (count == 0):
if (len(header) > 0):
header.append(', '.join(line[1:len(line)]))
else:
header.append(', '.join(line))
count = count + 1
continue
if (line[0] not in data):
data[line[0]] = []
data[line[0]].append(', '.join(line[1:len(line)]))
return (header, data)
if __name__ == "__main__":
main()