HDOJ 4691 Front compression 后缀数组

后缀数组求两子串间的最大公共前缀.

Front compression

Time Limit: 5000/5000 MS (Java/Others)    Memory Limit: 102400/102400 K (Java/Others)

Total Submission(s): 1382    Accepted Submission(s): 517

Problem Description

Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:

The size of the input is 43 bytes, while the size of the compressed output is 40. Here, every space and newline is also counted as 1 byte.

Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?

Input

There are multiple test cases. Process to the End of File.

The first line of each test case is a long string S made up of lowercase letters, whose length doesn‘t exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two
integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.

Output

For each test case, output the sizes of the input and corresponding compressed output.

Sample Input

frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37

Sample Output

14 12
42 31
43 40

Author

Zejun Wu (watashi)

Source

2013 Multi-University Training Contest 9

#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <cmath>

using namespace std;

typedef long long int LL;

const int maxn=102100;

int sa[maxn],rank[maxn],rank2[maxn],h[maxn],c[maxn],*x,*y,ans[maxn];
char str[maxn];

bool cmp(int* r,int a,int b,int l,int n)
{
    if(r[a]==r[b]&&a+l<n&&b+l<n&&r[a+l]==r[b+l]) return true;
    return false;
}

void radix_sort(int n,int sz)
{
    for(int i=0;i<sz;i++) c[i]=0;
    for(int i=0;i<n;i++) c[x[y[i]]]++;
    for(int i=1;i<sz;i++) c[i]+=c[i-1];
    for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
}

void get_sa(char c[],int n,int sz=128)
{
    x=rank,y=rank2;
    for(int i=0;i<n;i++) x[i]=c[i],y[i]=i;
    radix_sort(n,sz);
    for(int len=1;len<n;len<<=1)
    {
        int yid=0;
        for(int i=n-len;i<n;i++) y[yid++]=i;
        for(int i=0;i<n;i++)
            if(sa[i]>=len) y[yid++]=sa[i]-len;

        radix_sort(n,sz);

        swap(x,y);
        x[sa[0]]=yid=0;

        for(int i=1;i<n;i++)
        {
            x[sa[i]]=cmp(y,sa[i],sa[i-1],len,n)?yid:++yid;
        }

        sz=yid+1;
        if(sz>=n) break;
    }
    for(int i=0;i<n;i++)
        rank[i]=x[i];
}

void get_h(char str[],int n)
{
    int k=0; h[0]=0x3f3f3f3f;
    for(int i=0;i<n;i++)
    {
        if(rank[i]==0) continue;
        k=max(k-1,0);
        int j=sa[rank[i]-1];
        while(i+k<n&&j+k<n&&str[i+k]==str[j+k]) k++;
        h[rank[i]]=k;
    }
}

int dp[maxn][20],Log[maxn];

void RMQ_init(int n)
{
    for(int i=0;i<n;i++) dp[i][0]=h[i];
    for(int i=1;i<=Log[n];i++)
    {
        for(int j=0;j+(1<<i)-1<n;j++)
        {
            dp[j][i]=min(dp[j][i-1],dp[j+(1<<(i-1))][i-1]);
        }
    }
}

int lcp(int l,int r)
{
    l=rank[l];r=rank[r];
    if(l>r) swap(l,r);
    ///!!!!!if(l==r) return n-sa[l];
    int a=l+1,b=r;
    int k=Log[b-a+1];
    return min(dp[a][k],dp[b-(1<<k)+1][k]);
}

void init_Log()
{
    Log[0]=-1;
    for(int i=1;i<maxn;i++)
    {
        Log[i]=(i&(i-1))?Log[i-1]:Log[i-1]+1;
    }
}

void debug(int n)
{
    for(int i=0;i<n;i++)
    {
        cout<<sa[i]<<",";
    }
    cout<<endl;
    for(int j=0;j<n;j++)
    {
        cout<<h[j]<<",";
    }
    cout<<endl;
    for(int j=0;j<n;j++)
    {
        cout<<rank[j]<<",";
    }
    cout<<endl;
    int a,b;
    while(cin>>a>>b)
    cout<<"lcp: "<<lcp(a,b)<<endl;
}

int getwei(int x)
{
    if(x==0) return 1;
    int ret=0;
    while(x)
    {
        x/=10;
        ret++;
    }
    return ret;
}

int main()
{
    init_Log();
    while(scanf("%s",&str)!=EOF)
    {
        int n=strlen(str);
        get_sa(str,n);
        get_h(str,n);
        RMQ_init(n);

        //debug(n);

        int m;
        LL ans1=0,ans2=0;

        scanf("%d",&m);
        bool first=true;
        int lastL,lastR;
        while(m--)
        {
            int l,r;
            scanf("%d%d",&l,&r);
            ans1+=r-l+1LL;
            if(first==true)
            {
                ans2+=r-l+3;
                first=false;
            }
            else
            {
                if(lastL==l)
                {
                    int minR=min(lastR-1,r-1);
                    int com=minR-l+1;///公共长度
                    int diff=r-l+1-com;///不同的长度
                    int wei=getwei(com);

                    ans2+=diff+wei+1LL;
                    if(diff==0) ans2+=1LL;
                }
                else
                {
                    int com=lcp(lastL,l);///LCP
                    com=min(com,min(lastR-lastL,r-l));///公共长度
                    int diff=r-l+1-com;
                    int wei=getwei(com);
                    ans2+=diff+wei+1LL;
                    if(diff==0) ans2+=1LL;
                }
            }
            lastL=l; lastR=r;
        }
        cout<<ans1<<" "<<ans2<<endl;
    }
    return 0;
}
时间: 2024-10-05 19:31:28

HDOJ 4691 Front compression 后缀数组的相关文章

hdu4691 Front compression(后缀数组)

Front compression Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1339 Accepted Submission(s): 496 Problem Description Front compression is a type of delta encoding compression algorithm whereby

hdu 4691 Front compression (后缀数组)

题目大意: 介绍了一种压缩文本的方式,问压缩前后的文本长度. 思路分析: 后缀数组跑模板然后考虑两次l r之间的lcp. 然后减掉重复的长度. 注意ans2的累加. #include <cstdio> #include <iostream> #include <cstring> #include <algorithm> #include <cmath> #define maxn 200005 using namespace std; typede

HDOJ 题目4691 Front compression(后缀数组+RMQ最长前缀)

Front compression Time Limit: 5000/5000 MS (Java/Others)    Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1652    Accepted Submission(s): 604 Problem Description Front compression is a type of delta encoding compression algorithm w

【HDOJ】4691 Front compression

后缀数组基础题目,dc3解. 1 /* 4691 */ 2 #include <iostream> 3 #include <sstream> 4 #include <string> 5 #include <map> 6 #include <queue> 7 #include <set> 8 #include <stack> 9 #include <vector> 10 #include <deque>

hdu4691---Front compression(后缀数组+RMQ)

Front compression Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others) Total Submission(s): 1490 Accepted Submission(s): 553 Problem Description Front compression is a type of delta encoding compression algorithm whereby

hdu 4691 最长的共同前缀 后缀数组 +lcp+rmq

http://acm.hdu.edu.cn/showproblem.php? pid=4691 去年夏天,更多的学校的种族称号.当时,没有后缀数组 今天将是,事实上,自己的后缀阵列组合rmq或到,但是,题意理解的一个问题,再折腾了很长时间,,,, 此处简单解释下题目例子吧,希望对读者有帮助  以最后一组数据为例 myxophytamyxopodnabnabbednabbingnabit 6 0 9 9 16 16 19 19 25 25 32 32 37 前两行不解释,题目叙述非常清楚 从第三行

hdu 4691 最长公共前缀 后缀数组 +lcp+rmq

http://acm.hdu.edu.cn/showproblem.php?pid=4691 去年暑假多校赛的题,当时还不会后缀数组 现在会了,其实自己组合后缀数组跟rmq还是对的,但是题意理解有问题,于是折腾了很久,,,, 此处简单解释下题目样例吧,希望对读者有帮助  以最后一组数据为例 myxophytamyxopodnabnabbednabbingnabit 6 0 9 9 16 16 19 19 25 25 32 32 37 前两行不解释,题目叙述很清楚 从第三行,0 9 指的是第一个字

HDOJ 3948 The Number of Palindromes 后缀数组

后缀数组求有多少个不同的回文串 The Number of Palindromes Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 262144/262144 K (Java/Others) Total Submission(s): 1976    Accepted Submission(s): 690 Problem Description Now, you are given a string S. We want to kno

HDOJ 题目4416 Good Article Good sentence(后缀数组求a串子串在b串中不出现的种类数)

-每周六晚的BestCoder(有米!) Good Article Good sentence Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others) Total Submission(s): 2784    Accepted Submission(s): 785 Problem Description In middle school, teachers used to encour