-
Notifications
You must be signed in to change notification settings - Fork 68
/
Copy pathoptimized kmp.cpp
240 lines (198 loc) · 4.03 KB
/
optimized kmp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
// C++ program to implement a
// real time optimized KMP
// algorithm for pattern searching
#include <iostream>
#include <set>
#include <string>
#include <unordered_map>
using std::string;
using std::unordered_map;
using std::set;
using std::cout;
// Function to print
// an array of length len
void printArr(int* F, int len,
char name)
{
cout << '(' << name << ')'
<< "contain: [";
// Loop to iterate through
// and print the array
for (int i = 0; i < len; i++) {
cout << F[i] << " ";
}
cout << "]\n";
}
// Function to print a table.
// len is the length of each array
// in the map.
void printTable(
unordered_map<char, int*>& FT,
int len)
{
cout << "Failure Table: {\n";
// Iterating through the table
// and printing it
for (auto& pair : FT) {
printArr(pair.second,
len, pair.first);
}
cout << "}\n";
}
// Function to construct
// the failure function
// corresponding to the pattern
void constructFailureFunction(
string& P, int* F)
{
// P is the pattern,
// F is the FailureFunction
// assume F has length m,
// where m is the size of P
int len = P.size();
// F[0] must have the value 0
F[0] = 0;
// The index, we are parsing P[1..j]
int j = 1;
int l = 0;
// Loop to iterate through the
// pattern
while (j < len) {
// Computing the failure function or
// lps[] similar to KMP Algorithm
if (P[j] == P[l]) {
l++;
F[j] = l;
j++;
}
else if (l > 0) {
l = F[l - 1];
}
else {
F[j] = 0;
j++;
}
}
}
// Function to construct the failure table.
// P is the pattern, F is the original
// failure function. The table is stored in
// FT[][]
void constructFailureTable(
string& P,
set<char>& pattern_alphabet,
int* F,
unordered_map<char, int*>& FT)
{
int len = P.size();
// T is the char where we mismatched
for (char t : pattern_alphabet) {
// Allocate an array
FT[t] = new int[len];
int l = 0;
while (l < len) {
if (P[F[l]] == t)
// Old failure function gives
// a good shifting
FT[t][l] = F[l] + 1;
else {
// Move to the next char if
// the entry in the failure
// function is 0
if (F[l] == 0)
FT[t][l] = 0;
// Fill the table if F[l] > 0
else
FT[t][l] = FT[t][F[l] - 1];
}
l++;
}
}
}
// Function to implement the realtime
// optimized KMP algorithm for
// pattern searching. T is the text
// we are searching on and
// P is the pattern we are searching for
void KMP(string& T, string& P,
set<char>& pattern_alphabet)
{
// Size of the pattern
int m = P.size();
// Size of the text
int n = T.size();
// Initialize the Failure Function
int F[m];
// Constructing the failure function
// using KMP algorithm
constructFailureFunction(P, F);
printArr(F, m, 'F');
unordered_map<char, int*> FT;
// Construct the failure table and
// store it in FT[][]
constructFailureTable(
P,
pattern_alphabet,
F, FT);
printTable(FT, m);
// The starting index will be when
// the first match occurs
int found_index = -1;
// Variable to iterate over the
// indices in Text T
int i = 0;
// Variable to iterate over the
// indices in Pattern P
int j = 0;
// Loop to iterate over the text
while (i < n) {
if (P[j] == T[i]) {
// Matched the last character in P
if (j == m - 1) {
found_index = i - m + 1;
break;
}
else {
i++;
j++;
}
}
else {
if (j > 0) {
// T[i] is not in P's alphabet
if (FT.find(T[i]) == FT.end())
// Begin a new
// matching process
j = 0;
else
j = FT[T[i]][j - 1];
// Update 'j' to be the length of
// the longest suffix of P[1..j]
// which is also a prefix of P
i++;
}
else
i++;
}
}
// Printing the index at which
// the pattern is found
if (found_index != -1)
cout << "Found at index "
<< found_index << '\n';
else
cout << "Not Found \n";
for (char t : pattern_alphabet)
// Deallocate the arrays in FT
delete[] FT[t];
return;
}
// Driver code
int main()
{
string T = "cabababcababaca";
string P = "ababaca";
set<char> pattern_alphabet
= { 'a', 'b', 'c' };
KMP(T, P, pattern_alphabet);
}