-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreducer.py
executable file
·48 lines (38 loc) · 1.22 KB
/
reducer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
import sys
# Create a dictionary to map words to counts
wordcount = {}
# Get input from stdin
for line in sys.stdin:
#Remove spaces from beginning and end of the line
line = line.strip()
# parse the input from mapper.py
word, count = line.split('\t', 1)
# convert count (currently a string) to int
try:
count = int(count)
except ValueError:
continue
try:
wordcount[word] = wordcount[word]+count
except:
wordcount[word] = count
# Write the tuples to stdout
# Currently tuples are unsorted
# Top 5 word count descending
print("Top 5 Word Frequencies")
for key, value in sorted(wordcount.iteritems(), key=lambda (k,v): (v,k), reverse=True)[:5]:
print "%s\t%s" % (value, key)
print("\n")
# Sort word count by highest frequency
print("All Word Frequencies")
for key, value in sorted(wordcount.iteritems(), key=lambda (k,v): (v,k), reverse=True):
print "%s\t%s" % (value, key)
# Sort word count by keys alphabetically
# keylist = wordcount.keys()
# keylist.sort()
# for key in keylist:
# print "%s\t%s" % (wordcount[key], key)
# Print unsorted word count
# for word in wordcount.keys():
# print '%s\t%s'% ( word, wordcount[word] )