forked from AllenDowney/ThinkPython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze_book2.py
44 lines (30 loc) · 1.01 KB
/
analyze_book2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""This module contains code from
Think Python by Allen B. Downey
http://thinkpython.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import string
import random
from analyze_book import *
def subtract(d1, d2):
"""Returns a set of all keys that appear in d1 but not d2.
d1, d2: dictionaries
"""
return set(d1) - set(d2)
if __name__ == '__main__':
hist = process_file('emma.txt', skip_header=True)
print 'Total number of words:', total_words(hist)
print 'Number of different words:', different_words(hist)
t = most_common(hist)
print 'The most common words are:'
for freq, word in t[0:20]:
print word, '\t', freq
words = process_file('words.txt', skip_header=False)
diff = subtract(hist, words)
print "The words in the book that aren't in the word list are:"
for word in diff:
print word,
print "\n\nHere are some random words from the book"
for i in range(100):
print random_word(hist),