@ljt12138
2017-03-11T21:54:49.000000Z
字数 23045
阅读 963
算法
后缀树和后缀数组是处理字符串的有力武器,但一个更为强大的数据结构——后缀自动机往往更加灵活。字符串S的后缀自动机被定义为一个接受S的所有后缀的有限状态自动机。一个暴力的构造方法是对于S的每个子串建立一个节点,将后缀设为接受节点。然而这样的时空复杂度都达到了。显然是不能接受的。
考虑一个字符串abaabac
,当我们用b
或者ab
去匹配他们时,得到的匹配位置“终点”集合都为,因此和转移到达的终点集合都相等。因此可以将他们合并成一个节点。换言之,我们不再维护所有的“子串”,而去维护用子串在原字符串上匹配得到的终点集合作为自动机的状态。可以证明,这样的状态数和转移数都是的,这样构造出的自动机称为最简状态后缀自动机。
不难发现,所有的有意义终点集合构成真包含关系,即两个集合要么不相交,要么真包含。我们可以利用包含关系建立一棵树结构,我们称其为Parent树。树上的节点所对应的终点集合真包含其儿子对应的终点集合,且
那么显然也有
我们发现同一终点集合内对应的字符串的长度构成一个区间,记为x对应终点集合内长度最短的字符串,反之,那么有:
也就是说:在Parent树上祖先的max比后代的min小,祖先的集合真包含后代的集合
我们维护的自动机有两个逻辑结构,一个是trans(S, a)表示状态S加上字符a到达的新状态形成的DAG,一个是par(S)形成的Parent树。两个结构的性质有:
构建具体方法见陈立杰的冬令营讲稿。
void push(int x)
{
int p = last, np = ++top; // 新建节点
maxl[np] = maxl[p]+1;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p]; // 如果还没有连边
if (!p) fa[np] = root; // 到达root
else {
int q = chl[p][x]; // 冲突节点
if (maxl[q] == maxl[p]+1) fa[np] = q; // 如果已经放满,合并状态,这时np在DAG
else { // 中被合并至q,但Parent树中仍保存结构
int nq = new_node(q); // 建立公共祖先
maxl[nq] = maxl[p]+1;
fa[nq] = fa[q], fa[q] = nq, fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p]; // 换边
}
}
last = np;
}
给定一个长度为n的字符串S,和一个正整数k。求在S中出现至少k次的最长子串。
分析与解:
a. SA解法:建立height数组。考虑出现k次在height数组中的形态,由于要最大化公共子串即区间RMQ,最优解一定是长度为k的连续区间。用单调队列维护一个滑动窗口即可。最优复杂度,用DA则是。
b. SAM解法:由于在自动机中匹配位置相同的子串被收在一个状态内,一个状态的最优解即为这个状态的max。如何确定出现k次?就是。利用公式(2)预处理节点的Right,然后枚举一遍每个节点即可。由于字符集太大需要用map维护,摊还分析可知map的总操作量是的,因而总复杂度为。
SA+DA:
#include <bits/stdc++.h>
using namespace std;
const int maxn = 21000;
struct suffix_array {
int A[maxn], n, l, sa[maxn], rank[maxn], height[maxn], C[maxn];
struct radix_ele {
int id;
int k[2];
radix_ele(){}
radix_ele(int a, int b, int c) { id = a, k[0] = b, k[1] = c; }
} RE[maxn], RT[maxn];
void radix_sort()
{
for (int y = 1; y >= 0; y--) {
memset(C, 0, sizeof C);
for (int i = 1; i <= n; i++) C[RE[i].k[y]]++;
for (int i = 1; i < maxn; i++) C[i] += C[i-1];
for (int i = n; i >= 1; i--) RT[C[RE[i].k[y]]--] = RE[i];
for (int i = 1; i <= n; i++) RE[i] = RT[i];
}
for (int i = 1; i <= n; i++) {
rank[RE[i].id] = rank[RE[i-1].id];
if (RE[i].k[0] != RE[i-1].k[0] || RE[i].k[1] != RE[i-1].k[1])
rank[RE[i].id]++;
}
}
void calc_sa()
{
for (int i = 1; i <= n; i++) RE[i] = radix_ele(i, A[i], 0);
radix_sort();
for (int i = 1; i < n; i <<= 1) {
for (int j = 1; j <= n; j++) RE[j] = radix_ele(j, rank[j], i+j<=n?rank[i+j]:0);
radix_sort();
}
for (int i = 1; i <= n; i++) sa[rank[i]] = i;
}
void calc_height()
{
int h = 0;
for (int i = 1; i <= n; i++) {
if (rank[i] == 1) h = 0;
else {
int k = sa[rank[i]-1];
if (--h < 0) h = 0;
while (A[i+h] == A[k+h]) h++;
}
height[rank[i]] = h;
}
}
void init()
{
calc_sa();
calc_height();
}
} SA;
int k;
struct p {
int dat, id;
p(){}
p(int a, int b) : dat(a), id(b) {}
};
deque<p> deq;
int main()
{
scanf("%d%d", &SA.n, &k);
for (int i = 1; i <= SA.n; i++)
scanf("%d", &SA.A[i]);
SA.init();
int ans = 0;
for (int i = 1; i <= SA.n; i++) {
while (!deq.empty() && i-deq.front().id+2 > k) deq.pop_front();
while (!deq.empty() && deq.back().dat > SA.height[i]) deq.pop_back();
deq.push_back(p(SA.height[i], i));
if (!deq.empty()) ans = max(ans, deq.front().dat);
}
cout << ans << endl;
return 0;
}
SAM+Map:
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 20005;
int stk[MAXN*2], top = 0;
int rd[MAXN*2], right_siz[MAXN*2];
struct SAM {
map<int, int> chl[MAXN*2];
int fa[MAXN*2], maxl[MAXN*2], root, top, last;
void init()
{
root = top = last = 1;
memset(fa, 0, sizeof fa);
memset(maxl, 0, sizeof fa);
}
void push(int x)
{
int p = last, np = ++top;
maxl[np] = maxl[last] + 1;
right_siz[np] = 1;
while (p && chl[p][x] == 0) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
chl[nq] = chl[q];
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
void top_sort()
{
memset(rd, 0, sizeof rd);
for (int i = 1; i <= sam.top; i++) rd[sam.fa[i]]++;
for (int i = 1; i <= sam.top; i++) if (rd[i] == 0) stk[++top] = i;
while (top) {
int tp = stk[top--]; right_siz[sam.fa[tp]] += right_siz[tp];
if (--rd[sam.fa[tp]] == 0) stk[++top] = sam.fa[tp];
}
}
int solve()
{
int n, k;
sam.init();
scanf("%d%d", &n, &k);
for (int i = 1; i <= n; i++) {
int u; scanf("%d", &u);
sam.push(u);
}
top_sort();
int ans = 0;
for (int i = 2; i <= sam.top; i++) {
if (right_siz[i] >= k)
ans = max(ans, sam.maxl[i]);
}
return ans;
}
int main()
{
printf("%d", solve());
return 0;
}
// 最终的树是信息合并过的树,因此维护right必须在加入节点时就+1
SAM解法:前面两项只和长度有关很容易O(n)或O(1)做,那后面那个呢?展开看,就是求任意两个不相同后缀的最长公共前缀的和。这里需要一个重要的性质:将原串逆序插入后缀自动机得到一颗后缀树。而后缀树上两个元素的后缀是他们的LCA。显然我们树形dp设当前节点为LCA分别计算即可。元素两两相乘计算量很大怎么办啊??用一下乘法分配率不就好了吗。
对于节点x,flag表示当前节点有没有后缀。我们要计算:
这也就是
/**************************************************************
Problem: 3238
User: ljt12138
Language: C++
Result: Accepted
Time:3580 ms
Memory:131176 kb
****************************************************************/
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 500005*2, S = 26;
long long right_siz[MAXN];
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN];
int root, top, last;
void init()
{
memset(chl, 0, sizeof chl);
memset(fa, 0, sizeof fa);
memset(maxl, 0, sizeof maxl);
root = top = last = 1;
}
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p] + 1, right_siz[np]++;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; memcpy(chl[nq], chl[q], sizeof chl[q]);
maxl[nq] = maxl[p] + 1, fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
char str[MAXN];
struct node {
int to, next;
} edge[MAXN];
int head[MAXN], top = 0;
void push(int i, int j)
{ ++top, edge[top] = (node) {j, head[i]}, head[i] = top; }
long long ans = 0;
void dfs(int nd)
{
long long sum = right_siz[nd];
for (int i = head[nd]; i; i = edge[i].next)
dfs(edge[i].to), right_siz[nd] += right_siz[edge[i].to], sum += right_siz[edge[i].to]*right_siz[edge[i].to];
ans += (right_siz[nd]*right_siz[nd]-sum)*sam.maxl[nd];
}
void work()
{
memset(head, 0, sizeof head);
sam.init();
scanf("%s", str);
for (int i = strlen(str)-1; i >= 0; i--)
sam.push(str[i]-'a');
for (int i = 1; i <= sam.top; i++)
if (sam.fa[i]) push(sam.fa[i], i);
int n = strlen(str);
dfs(sam.root);
long long ret = 0;
for (long long i = n-1; i >= 1; i--) ret += i*(i+1)+(1+i)*i/2;
printf("%lld\n", ret-ans);
}
int main()
{
work();
return 0;
}
给定一个叶节点不超过20的无根树,每个节点有一个字母。问树上路径形成的本质不同的字符串的个数。
广义后缀自动机裸题。从每个叶节点做bfs,记录父亲的状态从而插入建立后缀自动机。我们知道一个后缀自动机本质不同的子串个数为 ,或者DAG上dp即可。
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 100001*20, S = 10;
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN];
int top, root, last;
void init()
{
top = root = last = 1;
memset(chl, 0, sizeof chl);
memset(fa, 0, sizeof fa);
memset(maxl, 0, sizeof maxl);
}
void push(int stat, int x)
{
int p = stat, np = ++top; maxl[np] = maxl[p] + 1;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
}sam;
queue<int> que;
int stat[102400], rd[102400], col[102400];
struct node {
int to, next;
} edge[202400];
int head[102400], top = 0;
void push(int i, int j)
{ rd[i]++, ++top, edge[top] = (node) {j, head[i]}, head[i] = top; }
void bfs(int nd)
{
//printf("BFS : %d\n", nd);
memset(stat, 0, sizeof stat);
stat[nd] = 1;
que.push(nd);
while (!que.empty()) {
int tp = que.front(); que.pop();
sam.push(stat[tp], col[tp]);
//printf("%d -- %d--+%d-->%d\n", tp, stat[tp], col[tp], sam.last);
for (int i = head[tp]; i; i = edge[i].next)
if (!stat[edge[i].to])
stat[edge[i].to] = sam.last, que.push(edge[i].to);
}
}
int n, c;
void solve()
{
sam.init();
scanf("%d%d", &n, &c);
for (int i = 1; i <= n; i++)
scanf("%d", &col[i]);
for (int i = 1; i < n; i++) {
int u, v; scanf("%d%d", &u, &v);
push(u, v); push(v, u);
}
for (int i = 1; i <= n; i++)
if (rd[i] == 1)
bfs(i);
long long ans = 0;
for (int i = 2; i <= sam.top; i++)
ans += sam.maxl[i] - sam.maxl[sam.fa[i]];
printf("%lld", ans);
}
int main()
{
solve();
return 0;
}
给你个长度不超过2000的字符串,求最长公共子串。
SAM解法:只要在匹配的时候记录每个节点对于第i个串匹配的最长距离,然后xjb取max和min就好了。
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 2005*2, S = 26;
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN];
int top, root, last;
void clear()
{ top = root = last = 1; }
SAM()
{ clear(); }
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p]+1;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p]+1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
int n;
int match_max[MAXN*2][10]; // 第j个串匹配到自动机上i节点时最长长度
char str[MAXN][10];
void work()
{
scanf("%d", &n);
scanf("%s", str[1]+1);
for (char *p = str[1]+1; *p != '\0'; p++) sam.push(*p-'a');
for (int i = 2; i <= n; i++) {
scanf("%s", str[i]+1);
int nd = sam.root, len = strlen(str[i]+1), mcd = 0;
for (int j = 1; j <= len; j++) {
if (sam.chl[nd][str[i][j]-'a']) {
nd = sam.chl[nd][str[i][j]-'a'], mcd++;
} else {
while (nd && !sam.chl[nd][str[i][j]-'a']) nd = sam.fa[nd];
if (nd) mcd = sam.maxl[nd] + 1, nd = sam.chl[nd][str[i][j]-'a'];
else nd = sam.root, mcd = 0;
}
for (int k = nd; k; k = sam.fa[k]) match_max[k][i] = max(match_max[k][i], min(sam.maxl[k],mcd));
}
}
int ans = 0;
for (int i = 2; i <= sam.top; i++) {
int cnt = INT_MAX;
for (int j = 2; j <= n; j++) cnt = min(cnt, match_max[i][j]);
ans = max(ans, cnt);
}
cout << ans << endl;
}
int main()
{
work();
return 0;
}
/*Description
给出几个由小写字母构成的单词,求它们最长的公共子串的长度。
任务:
l 读入单词
l 计算最长公共子串的长度
l 输出结果
Input
文件的第一行是整数 n,1<=n<=5,表示单词的数量。接下来n行每行一个单词,只由小写字母组成,单词的长度至少为1,最大为2000。
Output
仅一行,一个整数,最长公共子串的长度。
*/
给定一个字符串,求一个回文子串,最大化 , 为出现次数。
首先我们用manacher算法求出本质不同的回文串。由于manacher的复杂度为,回文串个数为。我们一个个询问其出现次数即可。现在问题转化为了对于一个子串,要在 的时间内求出其出现次数。
首先我们预处理出在后缀自动机中的位置,那么 就是parent树上最后一个maxl大于l-r+1的节点。用倍增处理即可。
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 300005*2, S = 26;
int pos[MAXN]; // S[1..r]对应状态
int fa[MAXN][21];
char str[MAXN];
int right_siz[MAXN];
int stk[MAXN], top = 0, rd[MAXN];
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN];
int top, root, last;
void clear()
{
top = root = last = 1;
memset(chl, 0, sizeof chl), memset(fa, 0, sizeof fa), memset(maxl, 0, sizeof maxl);
}
SAM() { clear(); }
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p] + 1, right_siz[np]++;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p]+1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
void top_sort()
{
for (int i = 1; i <= sam.top; i++) rd[sam.fa[i]]++;
for (int i = 1; i <= sam.top; i++) if (rd[i] == 0) stk[++top] = i;
while (top) {
int t = stk[top--]; rd[sam.fa[t]]--, right_siz[sam.fa[t]] += right_siz[t];
if (rd[sam.fa[t]] == 0) stk[++top] = sam.fa[t];
}
}
void init()
{
scanf("%s", str+1);
for (char *p = str+1; *p != '\0'; ++p)
sam.push(*p-'a');
int len = strlen(str+1);
for (int i = 1, nd = sam.root; i <= len; i++) {
nd = sam.chl[nd][str[i]-'a'];
pos[i] = nd;
}
for (int i = 1; i <= sam.top; i++) fa[i][0] = sam.fa[i];
for (int j = 1; j <= 20; j++)
for (int i = 1; i <= sam.top; i++)
fa[i][j] = fa[fa[i][j-1]][j-1];
top_sort(); // Count right_siz
}
long long ans = 0;
void query(int i, int j)
{
int nd = pos[j];
for (int k = 20; k >= 0; k--)
if (sam.maxl[fa[nd][k]] >= j-i+1)
nd = fa[nd][k];
ans = max(ans, (long long)(j-i+1)*right_siz[nd]);
}
int p[MAXN];
void work()
{
int len = strlen(str+1);
int id = 0, mx = 0; // manacher
str[0] = '$';
for (int i = 1; i <= len; i++) {
if (mx > i) p[i] = min(p[id-(i-id)], mx-i); else p[i] = 1, query(i, i);
while (str[i-p[i]] == str[i+p[i]]) query(i-p[i], i+p[i]), p[i]++;
if (i+p[i] > mx) id = i, mx = i+p[i];
}
id = mx = 0;
for (int i = 1; i <= len; i++) {
if (mx > i) p[i] = min(p[id-(i-id)], mx-i); else p[i] = 0;
while (str[i-p[i]] == str[i+p[i]+1]) query(i-p[i], i+p[i]+1), p[i]++;
if (i+p[i] > mx) id = i, mx = i+p[i];
}
cout << ans << endl;
}
int main()
{
init();
work();
return 0;
}
给定两个串S1,S2,统计他们的公共子串总数。两个子串不同,当且仅当长度不同或出现位置不同。
SA解法:将S1和S2用一个'#'隔开,求出height数组,由于公共子串是后缀的前缀,因此答案就是所有前一半的后缀和后一半的后缀的lcp的和。用单调栈扫两遍记录答案即可。最优复杂度。
SAM解法:这个做法比较鬼畜。先把第一个串建立后缀自动机,再把第二个串在上面跑。到达一个状态x时匹配长度为len对答案的贡献分为两部分:
第一部分为,第二部分可以拓扑排序后 预处理。总复杂度为。
SA(DA)+单调栈,配合一些常数优化的技巧:
#include <bits/stdc++.h>
using namespace std;
const int maxn = 400005;
struct SA {
int A[maxn], sa[maxn], rank[maxn], C[maxn], height[maxn], n;
struct radix_ele {
int id;
int k[2];
radix_ele() {}
radix_ele(int a, int b, int c):id(a){ k[0] = b, k[1] = c; }
} RE[maxn], RT[maxn];
void radix_sort()
{
for (register int y = 1; y >= 0; y--) {
memset(C, 0, sizeof C);
for (register int i = 1; i <= n; i++) C[RE[i].k[y]]++;
for (register int i = 1; i < maxn; i++) C[i] += C[i-1];
for (register int i = n; i >= 1; i--) RT[C[RE[i].k[y]]--] = RE[i];
for (register int i = 1; i <= n; i++) RE[i] = RT[i];
}
for (register int i = 1; i <= n; i++) {
rank[RE[i].id] = rank[RE[i-1].id];
if (RE[i].k[0] != RE[i-1].k[0] || RE[i].k[1] != RE[i-1].k[1])
rank[RE[i].id]++;
}
}
void calc_rank()
{
for (int i = 1; i <= n; i++) RE[i] = radix_ele(i, A[i], 0);
radix_sort();
for (register int k = 1; k < n; k <<= 1) {
for (register int i = 1; i <= n; i++) RE[i] = radix_ele(i, rank[i], i+k<=n?rank[i+k]:0);
radix_sort();
}
for (register int i = 1; i <= n; i++) sa[rank[i]] = i;
}
void calc_height()
{
int h = 0;
for (int i = 1; i <= n; i++) {
if (rank[i] == 1) h = 0;
else {
int k = sa[rank[i]-1];
if (--h < 0) h = 0;
while (A[i+h] == A[k+h]) h++;
}
height[rank[i]] = h;
}
}
inline void init()
{
calc_rank();
calc_height();
}
} SA;
int n1, n2;
char str1[200005], str2[200005];
struct mono_stack {
struct ele {
int dat, cnt;
ele(){}
ele(int a, int b):dat(a), cnt(b){}
} stk[400005];
int top;
long long ans;
mono_stack():ans(0){ top = 0; }
void init()
{
ans = top = 0;
}
inline void push(int i, int put) // put 是否放入
{
int cnt = put;
for (; top && stk[top].dat >= i; top--) {
cnt += stk[top].cnt;
ans -= stk[top].cnt*(stk[top].dat - i);
}
ans += put*i;
if (cnt) stk[++top] = ele(i, cnt);
}
}stk;
int main()
{
gets(str1);
gets(str2);
n1 = strlen(str1), n2 = strlen(str2);
SA.n = n1 + n2 + 1;
for (register int i = 1; i <= n1; i++) SA.A[i] = str1[i-1] - 'a' + 1; SA.A[n1+1] = 27;
for (register int i = 1; i <= n2; i++) SA.A[n1+1+i] = str2[i-1] - 'a' + 1;
SA.init();
long long ans = 0;
stk.init();
for (register int i = 1; i < SA.n; i++) {
if (SA.sa[i] <= n1) ans += stk.ans, stk.push(SA.height[i+1], 0);
else stk.push(SA.height[i+1], 1);
}
stk.init();
for (register int i = 1; i < SA.n; i++) {
if (SA.sa[i] > n1+1) ans += stk.ans, stk.push(SA.height[i+1], 0);
else stk.push(SA.height[i+1], 1);
}
cout << ans << endl;
return 0;
}
SAM:
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 200005*2, S = 26;
int right_siz[MAXN];
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN];
int top, root, last;
void clear()
{ top = root = last = 1; }
SAM()
{ clear(); }
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p] + 1; right_siz[np]++;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
char s1[MAXN], s2[MAXN];
int stk[MAXN], top = 0;
int rd[MAXN];
int topo[MAXN], tp_top = 0;
int canc[MAXN];
int vis[MAXN];
void dfs(int nd, string str)
{
printf("Id = %d, pre = %d, dis = %d, right = %d, cacc = %d\n", nd, sam.fa[nd], sam.maxl[nd], right_siz[nd], canc[nd]);
vis[nd] = 1;
for (int i = 0; i < S; i++)
if (sam.chl[nd][i])
printf("-+%c-> %d\n", i+'a', sam.chl[nd][i]);
for (int i = 0; i < S; i++)
if (sam.chl[nd][i] && !vis[sam.chl[nd][i]])
dfs(sam.chl[nd][i], str+char(i+'a'));
}
void top_sort()
{
for (int i = 1; i <= sam.top; i++) rd[sam.fa[i]]++;
for (int i = 1; i <= sam.top; i++) if (rd[i] == 0) stk[++top] = i;
while (top) {
int t = stk[top--]; topo[++tp_top] = t, rd[sam.fa[t]]--;
if (rd[sam.fa[t]] == 0) stk[++top] = sam.fa[t];
}
for (int i = 1; i <= tp_top; i++) right_siz[sam.fa[topo[i]]] += right_siz[topo[i]];
for (int i = tp_top; i >= 1; i--)
if (topo[i] != sam.root && sam.fa[topo[i]] != sam.root)
canc[topo[i]] = canc[sam.fa[topo[i]]] + (sam.maxl[sam.fa[topo[i]]]-sam.maxl[sam.fa[sam.fa[topo[i]]]])*right_siz[sam.fa[topo[i]]];
}
void work()
{
scanf("%s%s", s1, s2);
for (char *p = s1; *p != '\0'; p++) sam.push(*p-'a');
top_sort();
int nd = sam.root, len = 0;
long long ans = 0;
for (char *p = s2; *p != '\0'; p++) {
if (sam.chl[nd][*p-'a']) nd = sam.chl[nd][*p-'a'], len++;
else {
while (nd && !sam.chl[nd][*p-'a']) nd = sam.fa[nd];
if (!nd) nd = sam.root, len = 0;
else len = sam.maxl[nd]+1, nd = sam.chl[nd][*p-'a'];
}
ans += canc[nd] + (len-sam.maxl[sam.fa[nd]])*right_siz[nd];
}
cout << ans << endl;
}
int main()
{
work();
return 0;
}
给你一个字符串init,要求你支持两个操作
(1):在当前字符串的后面插入一个字符串
(2):询问字符串s在当前字符串中出现了几次?(作为连续子串)
你必须在线支持这些操作。
SAM+LCT解法:由于SAM是一个在线结构,只要动态维护right集合大小,再暴力匹配后查询right集合大小即可。
用lct维护时的技巧:由于涉及有根树换根,自底向上传送比较困难,考虑自顶向下维护right。操作分别是:
: 先连接,access(y),splay(y),将y和y的左子树siz都加上siz[x],可以用lazy_tag实现。
:先切割,access(y),splay(y),将y和y的左子树都减去siz[x],仍然lazy_tag。
写的时候一定要小心再小心,不然一上午就荒废了。别问我是怎么知道的。
/**************************************************************
Problem: 2555
User: ljt12138
Language: C++
Result: Accepted
Time:26356 ms
Memory:404428 kb
****************************************************************/
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 1600002, S = 26;
struct LCT {
int chl[MAXN][2], fa[MAXN], siz[MAXN], flag[MAXN], rev[MAXN], add[MAXN];
int stk[MAXN];
int root, top;
void clear()
{ root = top = 0; }
LCT()
{ clear(); }
bool isrt(int nd)
{ return chl[fa[nd]][0] != nd && chl[fa[nd]][1] != nd; }
void pdw(int nd)
{
int &lc = chl[nd][0], &rc = chl[nd][1];
if (lc) rev[lc] ^= rev[nd], add[lc] += add[nd];
if (rc) rev[rc] ^= rev[nd], add[rc] += add[nd];
if (rev[nd]) rev[nd] = 0, swap(lc, rc);
if (add[nd]) siz[nd] += add[nd], add[nd] = 0;
}
void zig(int nd)
{
int p = fa[nd], g = fa[p];
int tp = chl[p][0] != nd, tg = chl[g][0] != p, son = chl[nd][tp^1];
if (!isrt(p)) chl[g][tg] = nd;
chl[nd][tp^1] = p, chl[p][tp] = son;
fa[nd] = g, fa[p] = nd, fa[son] = p;
}
void splay(int nd)
{
int top = 0; stk[++top] = nd;
for (int x = nd; !isrt(x); x = fa[x])
stk[++top] = fa[x];
while (top) pdw(stk[top--]);
while (!isrt(nd)) {
int p = fa[nd], g = fa[p];
int tp = chl[p][0] != nd, tg = chl[g][0] != p;
if (isrt(p)) { zig(nd); break; }
else if (tp == tg) zig(p), zig(nd);
else zig(nd), zig(nd);
}
}
void dfs(int nd, int tab)
{
if (!nd) return;
for (int i = 1; i <= tab; i++) putchar(' ');
printf("nd = %d, flag = %d, siz = %d, lc = %d, rc = %d, fa = %d, rev = %d\n", nd, flag[nd], siz[nd], chl[nd][0], chl[nd][1], fa[nd], rev[nd]);
dfs(chl[nd][0], tab+2);
dfs(chl[nd][1], tab+2);
}
void access(int x)
{
for (int y = 0; x; x = fa[y = x])
splay(x), chl[x][1] = y;
}
void mkt(int x)
{ access(x), splay(x), rev[x] ^= 1; }
void link(int x, int y)
{ mkt(x); splay(x); fa[x] = y; }
void cut(int x, int y)
{ mkt(x), access(y), splay(y), fa[x] = chl[y][0] = 0;}
void lct_link(int x, int y) // x->y
{
link(x, y), mkt(1);
access(y), splay(y), siz[y] += siz[x];
if (chl[y][0]) add[chl[y][0]] += siz[x];
}
void lct_cut(int x, int y) // cut x->y
{
cut(x, y), mkt(1);
access(y), splay(y), siz[y] -= siz[x];
if (chl[y][0]) add[chl[y][0]] -= siz[x];
}
void set_flag(int x)
{ mkt(x), splay(x), siz[x] = 1; }
int find_fa(int x)
{
access(x);
while (!isrt(x)) x = fa[x];
return x;
}
int query(int nd)
{
mkt(nd), splay(nd);
return siz[nd];
}
} lct;
struct SAM {
int chl[MAXN*2][S], fa[MAXN*2], maxl[MAXN*2];
int top, last, root;
void clear()
{ top = last = root = 1; }
SAM()
{ clear(); }
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p] + 1; lct.set_flag(np);
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root, lct.lct_link(np, root);
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q, lct.lct_link(np, q);
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
lct.lct_link(nq, fa[q]), fa[nq] = fa[q];
lct.lct_cut(q, fa[q]), lct.lct_link(q, nq), fa[q] = nq;
lct.lct_link(np, nq), fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
char str[MAXN*2];
int q, mask = 0;
void decode(int mask)
{
int len = strlen(str);
for (int j = 0; j < len; j++) {
mask = (mask*131+j)%len;
swap(str[j], str[mask]);
}
}
void get_str(char str[])
{
scanf("%s", str);
decode(mask);
}
char opt[10];
int main()
{
scanf("%d", &q);
scanf("%s", str);
for (char *p = str; *p != '\0'; p++)
sam.push(*p-'A');
for (int i = 1; i <= q; i++) {
scanf("%s", opt);
if (opt[0] == 'A') {
get_str(str);
for (char *p = str; *p != '\0'; p++)
sam.push(*p-'A');
} else {
get_str(str);
int nd = sam.root, flag = 0;
for (char *p = str; *p != '\0'; p++) {
if (!sam.chl[nd][*p-'A']) {flag = 1; break; }
else nd = sam.chl[nd][*p-'A'];
}
if (flag) puts("0");
else {
int ans = lct.query(nd);
printf("%d\n", ans);
mask ^= ans;
}
}
}
return 0;
}
给你一个长度为的字符串,每个后缀有一个权值。,回答:
SA解法:SA配合并查集。
SAM解法:将串逆序插入SAM得到后缀树,然后后缀树上dp即可。
我们知道一个串逆序插入后缀树如同aabb
插入为bbaa
,每一个前缀都是原串的后缀,因此加入时打过flag的节点都是后缀标记的位置。节点x对应的是原串的字符串末尾长度构成的后缀。
嘴巴AC倒是挺容易,然后一下午就没了
几个细节:
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 600005, S = 26;
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN], flag[MAXN],vis[MAXN];
int top, last, root;
void clear()
{ top = last = root = 1; }
SAM()
{ clear(); }
void dfs(int nd)
{
if (!nd || vis[nd]) return;
vis[nd] = 1;
printf("nd = %d, fa = %d, maxl = %d, flag = %d\n", nd, fa[nd], maxl[nd], flag[nd]);
for (int i = 0; i < S; i++)
dfs(chl[nd][i]);
}
void push(int x)
{
int p = last, np = ++top; maxl[np] = maxl[p] + 1, flag[np] = 1;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
} sam;
char str[MAXN];
long long a[MAXN];
int n;
struct node {
int to, next;
} edge[MAXN];
int head[MAXN], top = 0;
void push(int i, int j)
{ ++top, edge[top] = (node) {j, head[i]}, head[i] = top; }
long long siz[MAXN];
long long mx[MAXN], mn[MAXN];
long long rk[MAXN];
void init()
{
scanf("%d%s", &n, str+1);
for (int i = n; i >= 1; i--) scanf("%lld", &a[i]);
for (int i = n; i >= 1; i--) sam.push(str[i]-'a');
for (int i = 1; i <= sam.top; i++) if (sam.fa[i]) push(sam.fa[i], i);
for (int i = 0; i <= sam.top; i++) mx[i] = LONG_LONG_MIN, mn[i] = LONG_LONG_MAX;
}
long long rcnt[MAXN];
long long maxcnt[MAXN];
void dfs(int nd)
{
siz[nd] = sam.flag[nd], rk[nd] = a[sam.maxl[nd]]*sam.flag[nd];
if (sam.flag[nd]) mx[nd] = mn[nd] = rk[nd];
for (int i = head[nd]; i; i = edge[i].next) {
int to = edge[i].to; dfs(to); siz[nd] += siz[to];
if (mx[nd] != LONG_LONG_MIN && mn[nd] != LONG_LONG_MAX && mx[to] != LONG_LONG_MIN && mn[to] != LONG_LONG_MAX)
maxcnt[sam.maxl[nd]] = max(maxcnt[sam.maxl[nd]], max(mx[to]*mx[nd], mn[to]*mn[nd]));
mx[nd] = max(mx[nd], mx[to]);
mn[nd] = min(mn[nd], mn[to]);
}
}
void work()
{
for (int i = 0; i <= MAXN; i++) maxcnt[i] = LONG_LONG_MIN;
memset(rcnt, 0, sizeof rcnt);
init();
dfs(1);
for (int i = 2; i <= sam.top; i++) {
long long cnt = siz[i]*siz[i]-sam.flag[i];long long maxx = rk[i], maxs = LONG_LONG_MAX, minn = LONG_LONG_MIN, mins = LONG_LONG_MIN;
for (int k = head[i]; k; k = edge[k].next)
cnt -= siz[edge[k].to]*siz[edge[k].to];
rcnt[sam.maxl[i]] += cnt/2;
}
for (int i = n-2; i >= 0; i--) rcnt[i] += rcnt[i+1], maxcnt[i] = max(maxcnt[i], maxcnt[i+1]);
rcnt[0] = (long long)n*(n-1)/2;
for (int i = 0; i < n; i++) printf("%lld %lld\n", rcnt[i], maxcnt[i]!=LONG_LONG_MIN?maxcnt[i]:0);
}
int main()
{
work();
return 0;
}
Description
对于一个给定长度为N的字符串,求它的第K小子串是什么。
Input
好不容易理解了子串计数...总是混。
后缀自动机上路径与子串一一对应,一个位置对应的同一本质相同的子串出现次数为。因此需要预处理出这一节点之后走出的路径个数,然后贪心的找即可。
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 1000005, S = 26;
int t, k;
struct SAM {
int chl[MAXN][S], fa[MAXN], maxl[MAXN], val[MAXN];
int top, root, last;
SAM() { top = root = last = 1; }
void push(int x)
{
int p = last, np = ++top; val[np]++;
while (p && !chl[p][x]) chl[p][x] = np, p = fa[p];
if (!p) fa[np] = root;
else {
int q = chl[p][x];
if (maxl[q] == maxl[p] + 1) fa[np] = q;
else {
int nq = ++top; maxl[nq] = maxl[p] + 1;
memcpy(chl[nq], chl[q], sizeof chl[q]);
fa[nq] = fa[q], fa[q] = fa[np] = nq;
while (p && chl[p][x] == q) chl[p][x] = nq, p = fa[p];
}
}
last = np;
}
int stk[MAXN], top_tp, rd[MAXN], topo[MAXN];
void top_sort()
{
for (int i = 1; i <= top; i++) rd[fa[i]]++;
for (int i = 1; i <= top; i++) if (rd[i] == 0) stk[++top_tp] = i;
int topo_ord = 0;
while (top_tp) {
int nd = stk[top_tp--]; rd[fa[nd]]--, val[fa[nd]] += val[nd], topo[++topo_ord] = nd;
if (rd[fa[nd]] == 0) stk[++top_tp] = fa[nd];
}
if (t == 0)
for (int i = 1; i <= top; i++) val[i] = 1;
}
}sam;
char str[MAXN];
int siz[MAXN];
void find(int nd, int k)
{
if (k <= sam.val[nd]) return;
k -= sam.val[nd];
for (int i = 0; i < S; i++) if (sam.chl[nd][i]){
if (k <= siz[sam.chl[nd][i]]) {
putchar('a'+i);
find(sam.chl[nd][i], k);
return;
} else k -= siz[sam.chl[nd][i]];
}
}
void work()
{
scanf("%s", str);
for (char *p = str; *p != '\0'; p++) sam.push(*p-'a');
scanf("%d%d", &t, &k);
sam.top_sort();
for (int i = 1; i <= sam.top; i++) {
int nd = sam.topo[i];
siz[nd] = sam.val[nd];
for (int j = 0; j < S; j++)
siz[nd] += siz[sam.chl[nd][j]];
}
sam.val[1] = 0;
if (siz[1] < k) puts("-1");
else find(1, k);
}
int main()
{
work();
return 0;
}