forked from AllenDowney/ThinkPython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
anagram_sets.py
91 lines (65 loc) · 1.89 KB
/
anagram_sets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""This module contains code from
Think Python by Allen B. Downey
http://thinkpython.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
def signature(s):
"""Returns the signature of this string, which is a string
that contains all of the letters in order.
"""
t = list(s)
t.sort()
t = ''.join(t)
return t
def all_anagrams(filename):
"""Finds all anagrams in a list of words.
filename: string filename of the word list
Returns: a map from each word to a list of its anagrams.
"""
d = {}
for line in open(filename):
word = line.strip().lower()
t = signature(word)
if t not in d:
d[t] = [word]
else:
d[t].append(word)
return d
def print_anagram_sets(d):
"""Prints the anagram sets in d.
d: map from words to list of their anagrams
"""
for v in d.values():
if len(v) > 1:
print len(v), v
def print_anagram_sets_in_order(d):
"""Prints the anagram sets in d in decreasing order of size.
d: map from words to list of their anagrams
"""
# make a list of (length, word pairs)
t = []
for v in d.values():
if len(v) > 1:
t.append((len(v), v))
# sort in ascending order of length
t.sort()
# print the sorted list
for x in t:
print x
def filter_length(d, n):
"""Select only the words in d that have n letters.
d: map from word to list of anagrams
n: integer number of letters
Returns: new map from word to list of anagrams
"""
res = {}
for word, anagrams in d.iteritems():
if len(word) == n:
res[word] = anagrams
return res
if __name__ == '__main__':
d = all_anagrams('words.txt')
print_anagram_sets_in_order(d)
eight_letters = filter_length(d, 8)
print_anagram_sets_in_order(eight_letters)