hdu4691 Front compression(后缀数组)

Front compression

Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others)

Total Submission(s): 1339 Accepted Submission(s): 496

Problem Description

Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:

The size of the input is 43 bytes, while the size of the compressed output is
40
. Here, every space and newline is also counted as 1 byte.

Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?

Input

There are multiple test cases. Process to the End of File.

The first line of each test case is a long string S made up of lowercase letters, whose length doesn‘t exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two
integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.

Output

For each test case, output the sizes of the input and corresponding compressed output.

Sample Input

frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37

Sample Output

14 12
42 31
43 40

Author

Zejun Wu (watashi)

Source

2013 Multi-University Training Contest 9

Recommend

zhuyuanchen520 | We have carefully selected several similar problems for you:5061
5060 5059 5058 5057

解题思路:后缀数组水题,试了两种模版,还是基数排序的快啊。。

板子1:

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define ll long long
#define maxn 100010
using namespace std;
char s[maxn];
int n,k,q;
int rank[maxn],sa[maxn],tmp[maxn],lcp[maxn];//lcp:0-n-1
bool cmp(int x,int y){
    if(rank[x]!=rank[y]) return rank[x]<rank[y];
    int sx=x+k<=n ? rank[x+k]:-1;
    int sy=y+k<=n ? rank[y+k]:-1;
    return sx<sy;
}
void build_sa(){
    n=strlen(s);
    for(int i=0;i<=n;i++){
        sa[i]=i;
        rank[i]=i<n ? s[i]:-1;
    }
    for(k=1;k<=n;k<<=1){
        sort(sa,sa+n+1,cmp);
        tmp[sa[0]]=0;
        for(int i=1;i<=n;i++){
            tmp[sa[i]]=tmp[sa[i-1]]+(cmp(sa[i-1],sa[i]) ? 1:0);
        }
        for(int i=0;i<=n;i++) rank[i]=tmp[i];
    }
}
void build_lcp(){
    n=strlen(s);
    //for(int i=0;i<=n;i++) rank[sa[i]]=i;
    int h=0;
    lcp[0]=0;
    for(int i=0;i<n;i++){
        int j=sa[rank[i]-1];
        if(h>0) h--;
        for(;j+h<n&&i+h<n;h++){
            if(s[j+h]!=s[i+h]) break;
        }
        lcp[rank[i]-1]=h;
    }
}
int dp[20][maxn],mm[maxn];
void init_RMQ(int n){
    mm[0]=-1;
    for(int i=1;i<=n;i++){//长度1-n
        mm[i]=(i&(i-1)) ? mm[i-1]:mm[i-1]+1;
    }
    for(int i=0;i<n;i++) dp[0][i]=lcp[i];
    for(int i=1;i<=mm[n];i++){
        for(int j=0;j+(1<<i)-1<n;j++){
            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);
        }
    }
}
int RMQ(int x,int y){//[x,y-1]
    if(x==y) return n-x;
    x=rank[x],y=rank[y];
    if(x>y) swap(x,y);
    y--;
    int l=mm[y-x+1];
    return min(dp[l][x],dp[l][y-(1<<l)+1]);
}
void read(){
    scanf("%d",&q);
    ll sum1=0,sum2=0;
    int pl=-1,pr=-1,l,r;
    for(int i=0;i<q;i++){
        scanf("%d%d",&l,&r);
        sum1+=(r-l+1);
        if(pl==-1){
            sum2+=r-l+1;
        }else{
            int LCP=RMQ(pl,l);
            int ans=min(LCP,min(r-l,pr-pl));
            sum2+=(r-l-ans);
            if(ans==0) sum2+=1;
            else sum2+=(int)log10(ans*1.0)+1;
        }
        pl=l,pr=r;
    }
    printf("%I64d %I64d\n",sum1,sum2+2*q);
}
int main(){
    while(~scanf("%s",s)){
        build_sa();
        build_lcp();
        init_RMQ(n);
        read();
    }
    return 0;
}

板子2:

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define ll long long
#define maxn 100010
using namespace std;
char s[maxn];
int c[maxn],wa[maxn],wb[maxn],r[maxn];//求SA数组需要的中间变量,不需要赋值
 //待排序的字符串放在s数组中,从s[0]到s[n-1],长度为n,且最大值小于m,
 //除s[n-1]外的所有s[i]都大于0,r[n-1]=0
//函数结束以后结果放在sa数组中
int n,sa[maxn],lcp[maxn],rank[maxn];
bool cmp(int *r,int a,int b,int l){
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void build_sa(int n,int m){//数组长度,最大数字
    for(int i=0;i<=n;i++) r[i]=i<n ? s[i]:0;
    n++;
    int i,j,p,*x=wa,*y=wb;
    //第一轮基数排序,如果s的最大值很大,可改为快速排序
    for(i=0;i<m;i++) c[i]=0;
    for(i=0;i<n;i++) c[x[i]=r[i]]++;
    for(i=1;i<m;i++) c[i]+=c[i-1];
    for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        //直接利用sa数组排序第二关键字
        for(i=n-j;i<n;i++) y[p++]=i;//后面的j个数第二关键字为空的最小
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i=0;i<m;i++) c[i]=0;
        for(i=0;i<n;i++) c[x[y[i]]]++;
        for(i=1;i<m;i++) c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p=1,x[sa[0]]=0;
        for(i=1;i<n;i++)
        x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}
void build_lcp(int n){
    int i,j,k=0;
    for(i=0;i<=n;i++) rank[sa[i]]=i;
    lcp[0]=0;
    for(i=0;i<n;i++){
        j=sa[rank[i]-1];
        if(k) k--;
        while(s[i+k]==s[j+k]) k++;
        lcp[rank[i]-1]=k;
    }
}
int dp[20][maxn],mm[maxn];
void init_RMQ(int n){
    mm[0]=-1;
    for(int i=1;i<=n;i++){
        mm[i]=(i&(i-1)) ? mm[i-1]:mm[i-1]+1;
    }
    for(int i=0;i<n;i++) dp[0][i]=lcp[i];
    for(int i=1;i<=mm[n];i++){
        for(int j=0;j+(1<<i)-1<n;j++){
            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);
        }
    }
}
int RMQ(int x,int y){
    if(x==y) return n-x;
    x=rank[x],y=rank[y];
    if(x>y) swap(x,y);
    y--;
    int l=mm[y-x+1];
    return min(dp[l][x],dp[l][y-(1<<l)+1]);
}
int q;
void read(){
    scanf("%d",&q);
    ll sum1=0,sum2=0;
    int pl=-1,pr=-1,l,r;
    for(int i=0;i<q;i++){
        scanf("%d%d",&l,&r);
        sum1+=(r-l+1);
        if(pl==-1){
            sum2+=r-l+1;
        }else{
            int LCP=RMQ(pl,l);
            //cout<<i<<":"<<LCP<<endl;
            int ans=min(LCP,min(r-l,pr-pl));
            //cout<<i<<":"<<ans<<endl;
            sum2+=(r-l-ans);
            if(ans==0) sum2+=1;
            else sum2+=(int)log10(ans*1.0)+1;
        }
        pl=l,pr=r;
    }
    printf("%I64d %I64d\n",sum1,sum2+2*q);
}
int main(){
    while(~scanf("%s",s)){
        n=strlen(s);
        build_sa(n,128);
        build_lcp(n);
        /*for(int i=0;i<n;i++){
            cout<<i<<" "<<sa[i]<<" "<<lcp[i]<<endl;
        }*/
        init_RMQ(n);
        read();
    }
    return 0;
}

时间: 2024-10-12 20:50:13

hdu4691 Front compression(后缀数组)的相关文章

HDOJ 4691 Front compression 后缀数组

后缀数组求两子串间的最大公共前缀. Front compression Time Limit: 5000/5000 MS (Java/Others)    Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1382    Accepted Submission(s): 517 Problem Description Front compression is a type of delta encoding compr

hdu4691---Front compression(后缀数组+RMQ)

Front compression Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1490 Accepted Submission(s): 553 Problem Description Front compression is a type of delta encoding compression algorithm whereby

hdu 4691 Front compression (后缀数组)

题目大意: 介绍了一种压缩文本的方式,问压缩前后的文本长度. 思路分析: 后缀数组跑模板然后考虑两次l r之间的lcp. 然后减掉重复的长度. 注意ans2的累加. #include <cstdio> #include <iostream> #include <cstring> #include <algorithm> #include <cmath> #define maxn 200005 using namespace std; typede

HDOJ 题目4691 Front compression(后缀数组+RMQ最长前缀)

Front compression Time Limit: 5000/5000 MS (Java/Others)    Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1652    Accepted Submission(s): 604 Problem Description Front compression is a type of delta encoding compression algorithm w

hdu4691(后缀数组)

算是后缀数组的入门题吧. 思路无比简单,要是直接套模板的话应该很容易秒掉. 关于后缀数组看高中神犇的论文就可以学会了 算法合集之<后缀数组——处理字符串的有力工具> 话说这题暴力是可以过了,但是我们在做多校的时候就是用暴力过的,当时还不知道什么是后缀数组... 靠着概念纯手敲了几个小时,把建SA,求height,和RMQ的ST算法都复习了一遍,这个东西要是每次都手敲的话真的会死人,尤其是倍增算法基数排序怎么排怎么别扭.自己写的倍增算法又太长,大牛的倍增算法总感觉敲的不顺. 贴个代码做留念...

HDU5853 Jong Hyok and String(二分 + 后缀数组)

题目 Source http://acm.hdu.edu.cn/showproblem.php?pid=5853 Description Jong Hyok loves strings. One day he gives a problem to his friend you. He writes down n strings Pi in front of you, and asks m questions. For i-th question, there is a string Qi. We

hdu 4691 最长的共同前缀 后缀数组 +lcp+rmq

http://acm.hdu.edu.cn/showproblem.php? pid=4691 去年夏天,更多的学校的种族称号.当时,没有后缀数组 今天将是,事实上,自己的后缀阵列组合rmq或到,但是,题意理解的一个问题,再折腾了很长时间,,,, 此处简单解释下题目例子吧,希望对读者有帮助  以最后一组数据为例 myxophytamyxopodnabnabbednabbingnabit 6 0 9 9 16 16 19 19 25 25 32 32 37 前两行不解释,题目叙述非常清楚 从第三行

hdu 4691 最长公共前缀 后缀数组 +lcp+rmq

http://acm.hdu.edu.cn/showproblem.php?pid=4691 去年暑假多校赛的题,当时还不会后缀数组 现在会了,其实自己组合后缀数组跟rmq还是对的,但是题意理解有问题,于是折腾了很久,,,, 此处简单解释下题目样例吧,希望对读者有帮助  以最后一组数据为例 myxophytamyxopodnabnabbednabbingnabit 6 0 9 9 16 16 19 19 25 25 32 32 37 前两行不解释,题目叙述很清楚 从第三行,0 9 指的是第一个字

BZOJ 题目3172: [Tjoi2013]单词(AC自动机||AC自动机+fail树||后缀数组暴力||后缀数组+RMQ+二分等五种姿势水过)

3172: [Tjoi2013]单词 Time Limit: 10 Sec  Memory Limit: 512 MB Submit: 1890  Solved: 877 [Submit][Status][Discuss] Description 某人读论文,一篇论文是由许多单词组成.但他发现一个单词会在论文中出现很多次,现在想知道每个单词分别在论文中出现多少次. Input 第一个一个整数N,表示有多少个单词,接下来N行每行一个单词.每个单词由小写字母组成,N<=200,单词长度不超过10^6