@Dmaxiya 2020-08-24T16:40:52.000000Z 字数 8966 阅读 1194

字符串练习题解

Hello_World

kmp

先来简单说一下 $kmp$ 吧，感觉例题说得有点复杂了，这里写一下自己对 $kmp$ 的一些理解吧。
还是用例题中给出的那两个串：

原串： $\quad bababababababababb$
模式串： $~bababb$

当我们比较到模式串的第 $5$ 个字符 $b$ 的时候，发现后一个字符不能再匹配，所以我们需要一个幅度比较大的移动（直接说 $kmp$ ，不从一位一位如何移动过渡了）。

这个幅度比较大的移动要保证：之前匹配过的那些字符可以不用再匹配的长度最大。
那么，可以想到，如果对于当前模式串已经匹配过的前缀（ $s=babab$ ），如果模式串的某个前缀 $x$ 满足：它是当前前缀 $s$ 的最长的后缀（且不等于 $s$ ）。就可以将： $s$ 的后缀用 $x$ 替换，从而将模式串往后移动一个比较大的幅度。

现在可以发现， $s$ 的最长的能用前缀表示的后缀是 $bab$ （ $s_1s_2s_3=s_3s_4s_5$ ），那么在发现不匹配后，就可以将 $s_1s_2s_3$ 移动到 $s_3s_4s_5$ 的位置上（因为它们相等，且 $s_3s_4s_5$ 已经和原串匹配了这三个字符），于是匹配的对应位置就变成了：

原串： $\quad bababababababababb$
模式串： $\quad~bababb$

现在要解决的问题就是：如何在模式串与原串不匹配的时候，使模式串快速改变匹配的位置。
其实从上面的描述可以看出，这个改变的位置，就是模式串当前失配（不匹配，以后都叫失配）位置前一个字符对应的前缀字符串的最大后缀（这个后缀能够等于模式串的某一个前缀），只要处理出这样一个数组就行了，这个数组就是“ $next$ 数组（失配数组）”，这个 $next$ 数组的第 $i$ 位，存的就是当前前缀 $s$ 的最长的符合条件的前缀 $x$ 的最后一个字符的下标。
例如上面的模式串的 $next$ 数组就是（下标从 $1$ 开始）：

$b~a~b~a~b~b\\ 0~0~1~2~3~1$

$kmp$ 的核心就是 $next$ 数组的定义，而不是如何在 $O(n+m)$ 的时间复杂度内求出模式串与原串是否匹配，能灵活运用 $next$ 数组，才算是理解了 $kmp$ ，否则学会如何匹配字符串，只能算是学会了套 $kmp$ 的板子，知道了 $next$ 数组的定义，应该手写都能把 $kmp$ 写出来了吧。
下面给出 $kmp$ 的板子（来自板子-字符串-kmp）：

const int maxn = 1000 + 100;
char str1[maxn], str2[maxn];
int Next[maxn];
// str1 为长串，str2 为短串，Next 为失配数组，maxn 为字符串长度
// 字符串下标从 1 开始
void get_next(char *s) {
    Next[1] = 0;
    int j = 0;
    for(int i = 2; s[i]; ++i) {
        while(j > 0 && s[i] != s[j + 1]) {
            j = Next[j];
        }
        if(s[i] == s[j + 1]) {
            ++j;
        }
        Next[i] = j;
    }
}
int KMP(char *s1, char *s2) {
    int j = 0, ret = 0;
    for(int i = 1; s1[i]; ++i) {
        while(j > 0 && s1[i] != s2[j + 1]) {
            j = Next[j];
        }
        if(s1[i] == s2[j + 1]) {
            ++j;
        }
        if(s2[j + 1] == '\0') {
            ++ret;
            j = Next[j];
        }
    }
    return ret;
}

2 月 24 日 kmp & 最长公共子串练习题解

A. KMP算法

题解

$kmp$ 裸题。

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int maxn = 1000000 + 100;
int T;
int Next[maxn];
char str1[maxn], str2[maxn];
void get_next(char *str) {
    int j = 0;
    for(int i = 2; str[i]; ++i) {
        while(j != 0 && str[i] != str[j + 1]) {
            j = Next[j];
        }
        if(str[i] == str[j + 1]) {
            ++j;
        }
        Next[i] = j;
    }
}
int kmp(char *str1, char *str2) {
    int j = 0;
    int ret = 0;
    for(int i = 1; str1[i]; ++i) {
        while(j != 0 && str1[i] != str2[j + 1]) {
            j = Next[j];
        }
        if(str1[i] == str2[j + 1]) {
            ++j;
        }
        if(str2[j + 1] == '\0') {
            j = Next[j];
            ++ret;
        }
    }
    return ret;
}
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    scanf("%d", &T);
    while(T--) {
        scanf("%s%s", str1 + 1, str2 + 1);
        get_next(str1);
        printf("%d\n", kmp(str2, str1));
    }
    return 0;
}

B. Count the string

题意

给定一个长度为 $n$ 的字符串，询问这个字符串中，每个前缀出现的次数。

数据范围

$1\leq n\leq2\times10^5$

题解

这题就是考 $next$ 数组的定义与应用，虽然这种应用已经算是一种套路了，在 $AC$ 自动机上也这样考过，就是计算每个前缀对答案的贡献。
最开始每个前缀对答案的贡献都是 $1$ ，如果某个前缀的后缀（就是 $next$ 数组指向的下标）是原字符串的前缀，那么这个前缀就对答案又有了一次贡献，最后将贡献加起来即可。

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int MOD = 10007;
const int maxn = 200000 + 100;
int T, n;
int Next[maxn];
char str[maxn];
int sum[maxn];
void get_next(char *str) {
    int j = 0;
    for(int i = 2; str[i]; ++i) {
        while(j != 0 && str[i] != str[j + 1]) {
            j = Next[j];
        }
        if(str[i] == str[j + 1]) {
            ++j;
        }
        Next[i] = j;
    }
}
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    scanf("%d", &T);
    while(T--) {
        scanf("%d%s", &n, str + 1);
        get_next(str);
        memset(sum, 0, sizeof(int) * (n + 1));
        for(int i = n; i > 0; --i) {
            ++sum[i];
            sum[Next[i]] += sum[i];
            sum[Next[i]] %= MOD;
        }
        int ans = 0;
        for(int i = 1; i <= n; ++i) {
            ans = (ans + sum[i]) % MOD;
        }
        printf("%d\n", ans);
    }
    return 0;
}

C. Common Subsequence

题意

求两个字符串的最长公共子序列。

数据范围

$1\leq\left|s\right|\leq1000$

题解

这题原来的求最长公共子序列，不是求最长公共子串，而且还没给数据范围，出题失误，出题失误。但是两种的 $dp$ 思路是类似的，就是定义 $dp(i,j)$ 为第一个字符串到第 $i$ 个字符所代表的前缀，与第二个字符串到第 $j$ 位所代表的前缀，的最长公共子序列的长度，这样就能得到下面的状态转移方程：

$dp[i][j]=\begin{cases} \max(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]+1)&s1[i]=s2[j]\\ \max(dp[i-1][j],dp[i][j-1])&s1[i]!=s2[j] \end{cases}$

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int maxn = 1000 + 100;
char str1[maxn], str2[maxn];
int dp[maxn][maxn];
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    while(scanf("%s%s", str1 + 1, str2 + 1) != EOF) {
        int len1 = strlen(str1 + 1);
        int len2 = strlen(str2 + 1);
        for(int i = 1; i <= len1; ++i) {
            for(int j = 1; j <= len2; ++j) {
                if(str1[i] == str2[j]) {
                    dp[i][j] = max(max(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1] + 1);
                } else {
                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
                }
            }
        }
        printf("%d\n", dp[len1][len2]);
    }
    return 0;
}

Trie

字典树，数据结构课上应该都讲过，可能有人也已经写过了，要学的应该也就板子了吧，看别人的写法哪里比较简洁明了之类的。
这里直接贴出板子了（来自板子-字符串-Trie/字典树）：

const int maxcnt = 1000000 + 100;
const int Size = 30;
// maxcnt 为树的节点总数，cnt 为当前节点总数
// 节点下标范围为[1,cnt]，root为1
struct Trie {
    int root, cnt;
    int tree[maxcnt][Size];
    int val[maxcnt];
    int create() {
        ++cnt;
        memset(tree[cnt], 0, sizeof(tree[cnt]));
        val[cnt] = 0;
        return cnt;
    }
    void Init() {
        cnt = 0;
        root = create();
    }
    inline int id(const char &ch) {
        return ch - 'a';
    }
    void add(char *s, int v) {
        int pos = root;
        for(int i = 1; s[i]; ++i) {
            int w = id(s[i]);
            if(tree[pos][w] == 0) {
                tree[pos][w] = create();
            }
            pos = tree[pos][w];
        }
        val[pos] = v;
    }
    int query(char *s) {
        int pos = root;
        for(int i = 1; s[i]; ++i) {
            int w = id(s[i]);
            if(tree[pos][w] == 0) {
                break;
            }
            pos = tree[pos][w];
        }
        return val[pos];
    }
};

比较常用的也就是“字母树”和 $01$ 树了吧， $01$ 数用得可能更经常些，字母树没什么好考的，要考也考 $AC$ 自动机（就是在 $Trie$ 上跑 $kmp$ ），如果碰到 $01$ 树的话，大多数都涉及到数字的位运算，这个比较考思维一些，但其实板子写得好，理解了板子，思路清晰的话，应该也写得比较快。

2 月 25 日回文串 & 字典树练习题解

A. Palindrome

题意

给定一个长度为 $N$ 的字符串，问最少添加多少个字符，能够使得该字符串成为一个回文串。

数据范围

$3\leq N\leq5000$

题解

这题比较考思维，想到了就很好写了，就是把原串反过来，然后求两个串的最长公共子序列长度，用总长度减去公共子序列长度就是答案。
还有注意一下这题卡空间，不能开 $5000\times5000$ 的 $dp$ 数组，用一下滚动数组就行了。

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int maxn = 5000 + 100;
int n;
char str1[maxn], str2[maxn];
int dp[2][maxn];
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    while(scanf("%d", &n) != EOF) {
        scanf("%s", str1 + 1);
        int len = strlen(str1 + 1);
        for(int i = 1; i <= len; ++i) {
            str2[len - i + 1] = str1[i];
        }
        for(int i = 1; i <= len; ++i) {
            int ii = i % 2;
            int iii = (i + 1) % 2;
            for(int j = 1; j <= len; ++j) {
                if(str1[i] == str2[j]) {
                    dp[ii][j] = max(max(dp[iii][j], dp[ii][j - 1]), dp[iii][j - 1] + 1);
                } else {
                    dp[ii][j] = max(dp[iii][j], dp[ii][j - 1]);
                }
            }
        }
        printf("%d\n", len - dp[len % 2][len]);
    }
    return 0;
}

B. Trie树

题解

$Trie$ 裸题，就是这题说有 $10^5$ 个字符串，但是每个字符串有 $10$ 个字符，所以 $maxcnt$ 的值应该为 $10^6$ （最大节点数，每一个节点为一个字母）。

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int maxcnt = 1000000 + 100;
const int Size = 30;
struct Trie {
    int root, cnt;
    int tree[maxcnt][Size];
    int val[maxcnt];
    int create() {
        ++cnt;
        memset(tree[cnt], 0, sizeof(tree[cnt]));
        val[cnt] = 0;
        return cnt;
    }
    void Init() {
        cnt = 0;
        root = create();
    }
    inline int id(const char &ch) {
        return ch - 'a';
    }
    void add(char *s) {
        int pos = root;
        for(int i = 1; s[i]; ++i) {
            int w = id(s[i]);
            if(tree[pos][w] == 0) {
                tree[pos][w] = create();
            }
            pos = tree[pos][w];
            ++val[pos];
        }
    }
    int query(char *s) {
        int pos = root;
        for(int i = 1; s[i]; ++i) {
            int w = id(s[i]);
            if(tree[pos][w] == 0) {
                return 0;
            }
            pos = tree[pos][w];
        }
        return val[pos];
    }
};
int n, m;
char str[15];
Trie t;
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    while(scanf("%d", &n) != EOF) {
        t.Init();
        for(int i = 0; i < n; ++i) {
            scanf("%s", str + 1);
            t.add(str);
        }
        scanf("%d", &m);
        for(int i = 0; i < m; ++i) {
            scanf("%s", str + 1);
            printf("%d\n", t.query(str));
        }
    }
    return 0;
}

C. Xor Sum

题解

$01$ 树，也算是裸题吧，就是把 $n$ 个数字都放到 $01$ 数中，然后对于每一次询问，从高位往低位贪心，如果存在某一位与 $S$ 的相同位上的数字相反（ $0$ 与 $1$ 相反），就取那一位（这样那一位的异或值才为 $1$ ），如果不存在，则只能让那一位的异或值为 $0$ ，然后从高位到低位贪心。

过题代码

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <climits>
#include <cstring>
#include <string>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <map>
#include <set>
#include <bitset>
#include <algorithm>
#include <functional>
#include <iomanip>
using namespace std;
#define LL long long
const int maxcnt = 3200000 + 100;
const int Size = 2;
struct Trie {
    int root, cnt;
    int tree[maxcnt][Size];
    int val[maxcnt];
    int create() {
        ++cnt;
        memset(tree[cnt], 0, sizeof(tree[cnt]));
        return cnt;
    }
    void Init() {
        cnt = 0;
        root = create();
    }
    int id(int num, int dig) {
        return ((num >> (32 - dig)) & 1);
    }
    void add(int num) {
        int pos = root;
        for(int i = 1; i <= 32; ++i) {
            int w = id(num, i);
            if(tree[pos][w] == 0) {
                tree[pos][w] = create();
            }
            pos = tree[pos][w];
        }
    }
    int query(int num) {
        int ret = 0;
        int pos = root;
        for(int i = 1; i <= 32; ++i) {
            int w = !id(num, i);
            if(tree[pos][w] == 0) {
                w = !w;
            }
            ret |= (w << (32 - i));
            pos = tree[pos][w];
        }
        return ret;
    }
};
int T, n, m;
int num;
Trie t;
int main() {
    #ifdef LOCAL
    freopen("test.txt", "r", stdin);
//    freopen("out.txt", "w", stdout);
    #endif // LOCAL
    ios::sync_with_stdio(false);
    scanf("%d", &T);
    for(int cas = 1; cas <= T; ++cas) {
        printf("Case #%d:\n", cas);
        t.Init();
        scanf("%d%d", &n, &m);
        for(int i = 0; i < n; ++i) {
            scanf("%d", &num);
            t.add(num);
        }
        for(int i = 0; i < m; ++i) {
            scanf("%d", &num);
            printf("%d\n", t.query(num));
        }
    }
    return 0;
}

字符串练习题解

kmp

2 月 24 日 kmp & 最长公共子串练习题解

A. KMP算法

题解

过题代码

B. Count the string

题意

数据范围

题解

过题代码

C. Common Subsequence

题意

数据范围

题解

过题代码

Trie

2 月 25 日 回文串 & 字典树练习题解

A. Palindrome

题意

数据范围

题解

过题代码

B. Trie树

题解

过题代码

C. Xor Sum

题解

过题代码

内容目录

2 月 25 日回文串 & 字典树练习题解