-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMarkov.py
96 lines (74 loc) · 2.61 KB
/
Markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import sys
import string
import random
class Markov(object):
def __init__(self):
self.suffix_map = {}
self.prefix = ()
def process_file(self, filename, order=2):
"""Reads a file and performs Markov analysis.
filename: string
order: integer number of words in the prefix
Returns: map from prefix to list of possible suffixes.
"""
fp = open(filename)
self.skip_gutenberg_header(fp)
for line in fp:
for word in line.rstrip().split():
self.process_word(word, order)
def skip_gutenberg_header(self, fp):
"""Reads from fp until it finds the line that ends the header.
fp: open file object
"""
for line in fp:
if line.startswith('*END*THE SMALL PRINT!'):
break
def process_word(self, word, order=2):
if len(self.prefix) < order:
self.prefix += (word,)
return
try:
self.suffix_map[self.prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
self.suffix_map[self.prefix] = [word]
self.prefix = shift(self.prefix, word)
def random_text(self, n=100):
"""Generates random wordsfrom the analyzed text.
Starts with a random prefix from the dictionary.
n: number of words to generate
"""
# choose a random prefix (not weighted by frequency)
start = random.choice(self.suffix_map.keys())
for i in range(n):
suffixes = self.suffix_map.get(start, None)
if suffixes is None:
# if the start isn't in map, we got to the end of the
# original text, so we have to start again.
self.random_text(n - i)
return
# choose a random suffix
word = random.choice(suffixes)
print(word,)
start = shift(start, word)
def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.
t: tuple of strings
word: string
Returns: tuple of strings"""
return t[1:] + (word,)
def main(name, filename, n=100, order=2, *args):
try:
n = int(n)
order = int(order)
except ValueError:
print('Usage: Markov.py filename [# of words] [prefix length]')
else:
markov = Markov()
markov.process_file(filename, order)
markov.random_text(n)
if __name__ == '__main__':
try:
main(*sys.argv)
except TypeError:
print('Usage: Markov.py filename [# of words] [prefix length]')