A string s is called an (k,l)-repeat if s is obtained by concatenating k>=1 times some seed string t with length l>=1. For example, the string
s = abaabaabaaba
is a (4,3)-repeat with t = aba as its seed string. That is, the seed string t is 3 characters long, and the whole string s is obtained by repeating t 4 times.
Write a program for the following task: Your program is given a long string u consisting of characters ‘a’ and/or ‘b’ as input. Your program must find some (k,l)-repeat that occurs as substring within u with k as large as possible. For example, the input string
u = babbabaabaabaabab
contains the underlined (4,3)-repeat s starting at position 5. Since u contains no other contiguous substring with more than 4 repeats, your program must output the maximum k.
Input
In the first line of the input contains H- the number of test cases (H <= 20). H test cases follow. First line of each test cases is n - length of the input string (n <= 50000), The next n lines contain the input string, one character (either ‘a’ or ‘b’) per line, in order.
Output
For each test cases, you should write exactly one interger k in a line - the repeat count that is maximized.
Example
Input: 1 17 b a b b a b a a b a a b a a b a b Output: 4
since a (4, 3)-repeat is found starting at the 5th character of the input string.
题意:
求重复次数最多的连续重复子串出现的次数
思路:
罗穗骞论文和其他博主已经说的比较清楚了,不在赘述。
在此解释一下向左匹配是什么意思。
将i,j同时向左移动,如果s[i]仍然等于s[j],则匹配成功。
#include<iostream> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<map> #include<set> #include<cstdio> #include<cstring> #include<cmath> #include<ctime> #define fuck(x) cerr<<#x<<" = "<<x<<endl; #define debug(a, x) cerr<<#a<<"["<<x<<"] = "<<a[x]<<endl; #define ls (t<<1) #define rs ((t<<1)|1) using namespace std; typedef long long ll; typedef unsigned long long ull; const int maxn = 100086; const int maxm = 100086; const int inf = 0x3f3f3f3f; const ll Inf = 999999999999999999; const int mod = 1000000007; const double eps = 1e-6; const double pi = acos(-1); char s[maxn]; int len, Rank[maxn], sa[maxn], tlen, tmp[maxn]; bool compare_sa(int i, int j) { if (Rank[i] != Rank[j]) { return Rank[i] < Rank[j]; } //如果以i开始,长度为k的字符串的长度,已经超出了字符串尾,那么就赋值为-1 //这是因为,在前面所有数据相同的情况下,字符串短的字典序小. int ri = i + tlen <= len ? Rank[i + tlen] : -inf; int rj = j + tlen <= len ? Rank[j + tlen] : -inf; return ri < rj; } void construct_sa() { //初始的RANK为字符的ASCII码 for (int i = 0; i <= len; i++) { sa[i] = i; Rank[i] = i < len ? s[i] : -inf; } for (tlen = 1; tlen <= len; tlen *= 2) { sort(sa, sa + len + 1, compare_sa); tmp[sa[0]] = 0; //全新版本的RANK,tmp用来计算新的rank //将字典序最小的后缀rank计为0 //sa之中表示的后缀都是有序的,所以将下一个后缀与前一个后缀比较,如果大于前一个后缀,rank就比前一个加一. //否则就和前一个相等. for (int i = 1; i <= len; i++) { tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0); } for (int i = 0; i <= len; i++) { Rank[i] = tmp[i]; } } } int height[maxn]; void construct_lcp() { // for(int i=0;i<=n;i++){Rank[sa[i]]=i;} int h = 0; height[0] = 0; for (int i = 0; i < len; i++) {//i为后缀数组起始位置 int j = sa[Rank[i] - 1];//获取当前后缀的前一个后缀(排序后) if (h > 0)h--; for (; j + h < len && i + h < len; h++) { if (s[j + h] != s[i + h])break; } height[Rank[i]] = h; } } int st[maxn][20]; void rmq_init(){ for(int i=1;i<=len;i++){ st[i][0]=height[i]; } int l=2; for(int i=1;l<=len;i++){ for(int j=1;j+l/2<=len;j++){ st[j][i]=min(st[j][i-1],st[j+l/2][i-1]); } l<<=1; } } int ask_min(int i,int j){ int k=int(log(j-i+1.0)/log(2.0)); return min(st[i][k],st[j-(1<<k)+1][k]); } int lcp(int a,int b) { a=Rank[a],b=Rank[b]; if(a>b) swap(a,b); return ask_min(a+1,b); } int main() { // ios::sync_with_stdio(false); // freopen("in.txt", "r", stdin); int T; scanf("%d",&T); while (T--){ scanf("%d",&len); for(int i=0;i<len;i++){ scanf("%s",s+i); } construct_sa(); construct_lcp(); rmq_init(); int ans=0,ans1=1; for(int i=1;i<=len;i++){//i是长度 for(int j=0;j+i<len;j+=i){ ans=lcp(j,j+i); int k=j-(i-ans%i); ans=ans/i+1; int p=ans-1; if(k>=0&&lcp(k,k+i)>=i){ans++;} ans1=max(ans,ans1); j+=p*i; } } printf("%d\n",ans1); } return 0; }
原文地址:https://www.cnblogs.com/ZGQblogs/p/11176264.html