hdu4691 Front compression(后缀数组)

Front compression

Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others)

Total Submission(s): 1339 Accepted Submission(s): 496

Problem Description

Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:

The size of the input is 43 bytes, while the size of the compressed output is
40. Here, every space and newline is also counted as 1 byte.

Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?

Input

There are multiple test cases. Process to the End of File.

The first line of each test case is a long string S made up of lowercase letters, whose length doesn‘t exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two
integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.

Output

For each test case, output the sizes of the input and corresponding compressed output.

Sample Input

frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37

Sample Output

14 12
42 31
43 40

Author

Zejun Wu (watashi)

Source

2013 Multi-University Training Contest 9

Recommend

zhuyuanchen520 | We have carefully selected several similar problems for you:5061
5060 5059 5058 5057

解题思路：后缀数组水题，试了两种模版，还是基数排序的快啊。。

板子1：

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define ll long long
#define maxn 100010
using namespace std;
char s[maxn];
int n,k,q;
int rank[maxn],sa[maxn],tmp[maxn],lcp[maxn];//lcp:0-n-1
bool cmp(int x,int y){
    if(rank[x]!=rank[y]) return rank[x]<rank[y];
    int sx=x+k<=n ? rank[x+k]:-1;
    int sy=y+k<=n ? rank[y+k]:-1;
    return sx<sy;
}
void build_sa(){
    n=strlen(s);
    for(int i=0;i<=n;i++){
        sa[i]=i;
        rank[i]=i<n ? s[i]:-1;
    }
    for(k=1;k<=n;k<<=1){
        sort(sa,sa+n+1,cmp);
        tmp[sa[0]]=0;
        for(int i=1;i<=n;i++){
            tmp[sa[i]]=tmp[sa[i-1]]+(cmp(sa[i-1],sa[i]) ? 1:0);
        }
        for(int i=0;i<=n;i++) rank[i]=tmp[i];
    }
}
void build_lcp(){
    n=strlen(s);
    //for(int i=0;i<=n;i++) rank[sa[i]]=i;
    int h=0;
    lcp[0]=0;
    for(int i=0;i<n;i++){
        int j=sa[rank[i]-1];
        if(h>0) h--;
        for(;j+h<n&&i+h<n;h++){
            if(s[j+h]!=s[i+h]) break;
        }
        lcp[rank[i]-1]=h;
    }
}
int dp[20][maxn],mm[maxn];
void init_RMQ(int n){
    mm[0]=-1;
    for(int i=1;i<=n;i++){//长度1-n
        mm[i]=(i&(i-1)) ? mm[i-1]:mm[i-1]+1;
    }
    for(int i=0;i<n;i++) dp[0][i]=lcp[i];
    for(int i=1;i<=mm[n];i++){
        for(int j=0;j+(1<<i)-1<n;j++){
            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);
        }
    }
}
int RMQ(int x,int y){//[x,y-1]
    if(x==y) return n-x;
    x=rank[x],y=rank[y];
    if(x>y) swap(x,y);
    y--;
    int l=mm[y-x+1];
    return min(dp[l][x],dp[l][y-(1<<l)+1]);
}
void read(){
    scanf("%d",&q);
    ll sum1=0,sum2=0;
    int pl=-1,pr=-1,l,r;
    for(int i=0;i<q;i++){
        scanf("%d%d",&l,&r);
        sum1+=(r-l+1);
        if(pl==-1){
            sum2+=r-l+1;
        }else{
            int LCP=RMQ(pl,l);
            int ans=min(LCP,min(r-l,pr-pl));
            sum2+=(r-l-ans);
            if(ans==0) sum2+=1;
            else sum2+=(int)log10(ans*1.0)+1;
        }
        pl=l,pr=r;
    }
    printf("%I64d %I64d\n",sum1,sum2+2*q);
}
int main(){
    while(~scanf("%s",s)){
        build_sa();
        build_lcp();
        init_RMQ(n);
        read();
    }
    return 0;
}

板子2：

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define ll long long
#define maxn 100010
using namespace std;
char s[maxn];
int c[maxn],wa[maxn],wb[maxn],r[maxn];//求SA数组需要的中间变量，不需要赋值
 //待排序的字符串放在s数组中，从s[0]到s[n-1],长度为n,且最大值小于m,
 //除s[n-1]外的所有s[i]都大于0，r[n-1]=0
//函数结束以后结果放在sa数组中
int n,sa[maxn],lcp[maxn],rank[maxn];
bool cmp(int *r,int a,int b,int l){
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void build_sa(int n,int m){//数组长度，最大数字
    for(int i=0;i<=n;i++) r[i]=i<n ? s[i]:0;
    n++;
    int i,j,p,*x=wa,*y=wb;
    //第一轮基数排序，如果s的最大值很大，可改为快速排序
    for(i=0;i<m;i++) c[i]=0;
    for(i=0;i<n;i++) c[x[i]=r[i]]++;
    for(i=1;i<m;i++) c[i]+=c[i-1];
    for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1){
        p=0;
        //直接利用sa数组排序第二关键字
        for(i=n-j;i<n;i++) y[p++]=i;//后面的j个数第二关键字为空的最小
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i=0;i<m;i++) c[i]=0;
        for(i=0;i<n;i++) c[x[y[i]]]++;
        for(i=1;i<m;i++) c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p=1,x[sa[0]]=0;
        for(i=1;i<n;i++)
        x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p>=n) break;
        m=p;
    }
}
void build_lcp(int n){
    int i,j,k=0;
    for(i=0;i<=n;i++) rank[sa[i]]=i;
    lcp[0]=0;
    for(i=0;i<n;i++){
        j=sa[rank[i]-1];
        if(k) k--;
        while(s[i+k]==s[j+k]) k++;
        lcp[rank[i]-1]=k;
    }
}
int dp[20][maxn],mm[maxn];
void init_RMQ(int n){
    mm[0]=-1;
    for(int i=1;i<=n;i++){
        mm[i]=(i&(i-1)) ? mm[i-1]:mm[i-1]+1;
    }
    for(int i=0;i<n;i++) dp[0][i]=lcp[i];
    for(int i=1;i<=mm[n];i++){
        for(int j=0;j+(1<<i)-1<n;j++){
            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);
        }
    }
}
int RMQ(int x,int y){
    if(x==y) return n-x;
    x=rank[x],y=rank[y];
    if(x>y) swap(x,y);
    y--;
    int l=mm[y-x+1];
    return min(dp[l][x],dp[l][y-(1<<l)+1]);
}
int q;
void read(){
    scanf("%d",&q);
    ll sum1=0,sum2=0;
    int pl=-1,pr=-1,l,r;
    for(int i=0;i<q;i++){
        scanf("%d%d",&l,&r);
        sum1+=(r-l+1);
        if(pl==-1){
            sum2+=r-l+1;
        }else{
            int LCP=RMQ(pl,l);
            //cout<<i<<":"<<LCP<<endl;
            int ans=min(LCP,min(r-l,pr-pl));
            //cout<<i<<":"<<ans<<endl;
            sum2+=(r-l-ans);
            if(ans==0) sum2+=1;
            else sum2+=(int)log10(ans*1.0)+1;
        }
        pl=l,pr=r;
    }
    printf("%I64d %I64d\n",sum1,sum2+2*q);
}
int main(){
    while(~scanf("%s",s)){
        n=strlen(s);
        build_sa(n,128);
        build_lcp(n);
        /*for(int i=0;i<n;i++){
            cout<<i<<" "<<sa[i]<<" "<<lcp[i]<<endl;
        }*/
        init_RMQ(n);
        read();
    }
    return 0;
}

时间： 2024-10-12 20:50:13

hdu4691 Front compression(后缀数组)

Front compression

hdu4691 Front compression(后缀数组)的相关文章

HDOJ 4691 Front compression 后缀数组

hdu4691---Front compression(后缀数组+RMQ)

hdu 4691 Front compression （后缀数组）

HDOJ 题目4691 Front compression（后缀数组+RMQ最长前缀）

hdu4691(后缀数组)

HDU5853 Jong Hyok and String（二分 + 后缀数组）

hdu 4691 最长的共同前缀后缀数组 +lcp+rmq

hdu 4691 最长公共前缀后缀数组 +lcp+rmq

BZOJ 题目3172: [Tjoi2013]单词（AC自动机||AC自动机+fail树||后缀数组暴力||后缀数组+RMQ+二分等五种姿势水过）