-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsuffix_array.cpp
74 lines (66 loc) · 1.68 KB
/
suffix_array.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// suffix_array.cpp
// Eric K. Zhang; Dec. 17, 2017
#include <bits/stdc++.h>
using namespace std;
vector<int> suffix_array(string S) {
int N = S.size();
vector<int> sa(N), classes(N);
for (int i = 0; i < N; i++) {
sa[i] = N - 1 - i;
classes[i] = S[i];
}
stable_sort(sa.begin(), sa.end(), [&S](int i, int j) {
return S[i] < S[j];
});
for (int len = 1; len < N; len *= 2) {
vector<int> c(classes);
for (int i = 0; i < N; i++) {
bool same = i && sa[i - 1] + len < N
&& c[sa[i]] == c[sa[i - 1]]
&& c[sa[i] + len / 2] == c[sa[i - 1] + len / 2];
classes[sa[i]] = same ? classes[sa[i - 1]] : i;
}
vector<int> cnt(N), s(sa);
for (int i = 0; i < N; i++)
cnt[i] = i;
for (int i = 0; i < N; i++) {
int s1 = s[i] - len;
if (s1 >= 0)
sa[cnt[classes[s1]]++] = s1;
}
}
return sa;
}
vector<int> lcp_array(const vector<int>& sa, string S) {
int N = S.size();
vector<int> rank(N), lcp(N - 1);
for (int i = 0; i < N; i++)
rank[sa[i]] = i;
int pre = 0;
for (int i = 0; i < N; i++) {
if (rank[i] < N - 1) {
int j = sa[rank[i] + 1];
while (max(i, j) + pre < S.size() && S[i + pre] == S[j + pre])
++pre;
lcp[rank[i]] = pre;
if (pre > 0)
--pre;
}
}
return lcp;
}
int main() {
const string S = "AGCGCCCTTGCGAGCAGTCGTATGCTTTCTCGAATTCCGAGCGGTTAAGCGTGAC";
vector<int> sa = suffix_array(S);
vector<int> lcp = lcp_array(sa, S);
cout << S << endl;
cout << string(S.size(), '=') << endl;
for (int i = 0; i < S.size(); i++) {
cout << setw(S.size()) << right << S.substr(0, sa[i]) << '|';
cout << setw(S.size()) << left << S.substr(sa[i]);
if (i < S.size() - 1)
cout << ' ' << lcp[i];
cout << endl;
}
return 0;
}