-
Notifications
You must be signed in to change notification settings - Fork 17
/
markov.go
129 lines (114 loc) · 2.79 KB
/
markov.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package main
// This Markov chain code is taken from the "Generating arbitrary text"
// codewalk: http://golang.org/doc/codewalk/markov/
//
// Minor modifications have been made to make it easier to integrate
// with a webserver and to save/load state
import (
"encoding/gob"
"fmt"
"math/rand"
"os"
"strings"
"sync"
)
// Prefix is a Markov chain prefix of one or more words.
type Prefix []string
// String returns the Prefix as a string (for use as a map key).
func (p Prefix) String() string {
return strings.Join(p, " ")
}
// Shift removes the first word from the Prefix and appends the given word.
func (p Prefix) Shift(word string) {
copy(p, p[1:])
p[len(p)-1] = word
}
// Chain contains a map ("chain") of prefixes to a list of suffixes.
// A prefix is a string of prefixLen words joined with spaces.
// A suffix is a single word. A prefix can have multiple suffixes.
type Chain struct {
Chain map[string][]string
prefixLen int
mu sync.Mutex
}
// NewChain returns a new Chain with prefixes of prefixLen words.
func NewChain(prefixLen int) *Chain {
return &Chain{
Chain: make(map[string][]string),
prefixLen: prefixLen,
}
}
// Write parses the bytes into prefixes and suffixes that are stored in Chain.
func (c *Chain) Write(in string) (int, error) {
sr := strings.NewReader(in)
p := make(Prefix, c.prefixLen)
for {
var s string
if _, err := fmt.Fscan(sr, &s); err != nil {
break
}
key := p.String()
c.mu.Lock()
c.Chain[key] = append(c.Chain[key], s)
c.mu.Unlock()
p.Shift(s)
}
return len(in), nil
}
// Generate returns a string of at most n words generated from Chain.
func (c *Chain) Generate(n int) string {
c.mu.Lock()
defer c.mu.Unlock()
p := make(Prefix, c.prefixLen)
var words []string
for i := 0; i < n; i++ {
choices := c.Chain[p.String()]
if len(choices) == 0 {
break
}
next := choices[rand.Intn(len(choices))]
words = append(words, next)
p.Shift(next)
}
return strings.Join(words, " ")
}
// Save the chain to a file
func (c *Chain) Save(fileName string) (err error) {
// Open the file for writing
fo, err := os.Create(fileName)
if err != nil {
return
}
// close fo on exit and check for its returned error
defer func() {
if err := fo.Close(); err != nil {
panic(err)
}
}()
// Create an encoder and dump to it
c.mu.Lock()
defer c.mu.Unlock()
enc := gob.NewEncoder(fo)
err = enc.Encode(c)
return
}
// Load the chain from a file
func (c *Chain) Load(fileName string) (err error) {
// Open the file for reading
fi, err := os.Open(fileName)
if err != nil {
return
}
// close fi on exit and check for its returned error
defer func() {
if err := fi.Close(); err != nil {
panic(err)
}
}()
// Create a decoder and read from it
c.mu.Lock()
defer c.mu.Unlock()
dec := gob.NewDecoder(fi)
err = dec.Decode(c)
return
}