-
Notifications
You must be signed in to change notification settings - Fork 9
/
gokmp.go
141 lines (127 loc) · 2.6 KB
/
gokmp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// String-matching in Golang using the Knuth–Morris–Pratt algorithm (KMP)
package gokmp
import (
"errors"
"fmt"
)
type KMP struct {
pattern string
prefix []int
size int
}
// For debugging
func (kmp *KMP) String() string {
return fmt.Sprintf("pattern: %v\nprefix: %v", kmp.pattern, kmp.prefix)
}
// compile new prefix-array given argument
func NewKMP(pattern string) (*KMP, error) {
prefix, err := computePrefix(pattern)
if err != nil {
return nil, err
}
return &KMP{
pattern: pattern,
prefix: prefix,
size: len(pattern)},
nil
}
// returns an array containing indexes of matches
// - error if pattern argument is less than 1 char
func computePrefix(pattern string) ([]int, error) {
// sanity check
len_p := len(pattern)
if len_p < 2 {
if len_p == 0 {
return nil, errors.New("'pattern' must contain at least one character")
}
return []int{-1}, nil
}
t := make([]int, len_p)
t[0], t[1] = -1, 0
pos, count := 2, 0
for pos < len_p {
if pattern[pos-1] == pattern[count] {
count++
t[pos] = count
pos++
} else {
if count > 0 {
count = t[count]
} else {
t[pos] = 0
pos++
}
}
}
return t, nil
}
// return index of first occurence of kmp.pattern in argument 's'
// - if not found, returns -1
func (kmp *KMP) FindStringIndex(s string) int {
// sanity check
if len(s) < kmp.size {
return -1
}
m, i := 0, 0
for m+i < len(s) {
if kmp.pattern[i] == s[m+i] {
if i == kmp.size-1 {
return m
}
i++
} else {
m = m + i - kmp.prefix[i]
if kmp.prefix[i] > -1 {
i = kmp.prefix[i]
} else {
i = 0
}
}
}
return -1
}
// returns true if pattern i matched at least once
func (kmp *KMP) ContainedIn(s string) bool {
return kmp.FindStringIndex(s) >= 0
}
// returns the number of occurences of pattern in argument
func (kmp *KMP) Occurrences(s string) int {
return len(kmp.FindAllStringIndex(s))
}
// for effeciency, define default array-size
const startSize = 10
// find every occurence of the kmp.pattern in 's'
func (kmp *KMP) FindAllStringIndex(s string) []int {
// precompute
len_s := len(s)
if len_s < kmp.size {
return []int{}
}
match := make([]int, 0, startSize)
m, i := 0, 0
for m+i < len_s {
if kmp.pattern[i] == s[m+i] {
if i == kmp.size-1 {
// the word was matched
match = append(match, m)
// simulate miss, and keep running
m = m + i - kmp.prefix[i]
if kmp.prefix[i] > -1 {
i = kmp.prefix[i]
} else {
i = 0
}
} else {
i++
}
} else {
m = m + i - kmp.prefix[i]
if kmp.prefix[i] > -1 {
i = kmp.prefix[i]
} else {
i = 0
}
}
}
return match
}