POJ 3294 Life Forms(后缀数组求k个串的最长子串)

题目大意:给出n个字符串,让你求出最长的子串,如果有多个按照字典序顺序输出。

解题思路:将n个字符串连起来,中间需要隔开,然后我们二分枚举字符串的长度,求最长的长度,如果多个需要按照字典序保存起来,最后输出答案就可以了。时间复杂度是:O(n*log(n))。

Life Forms

Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 10275   Accepted: 2822

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes
like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled
The Chase. It turns out that in the vast majority of the quadrant‘s life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤
n
≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the
last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test
cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?
#include <algorithm>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <iomanip>
#include <stdio.h>
#include <string>
#include <queue>
#include <cmath>
#include <stack>
#include <ctime>
#include <map>
#include <set>
#define eps 1e-9
///#define M 1000100
///#define LL __int64
#define LL long long
///#define INF 0x7ffffff
#define INF 0x3f3f3f3f
#define PI 3.1415926535898
#define zero(x) ((fabs(x)<eps)?0:x)
#define mod 1000000007
#define Read() freopen("autocomplete.in","r",stdin)
#define Write() freopen("autocomplete.out","w",stdout)
#define Cin() ios::sync_with_stdio(false)

using namespace std;

inline int read()
{
    char ch;
    bool flag = false;
    int a = 0;
    while(!((((ch = getchar()) >= '0') && (ch <= '9')) || (ch == '-')));
    if(ch != '-')
    {
        a *= 10;
        a += ch - '0';
    }
    else
    {
        flag = true;
    }
    while(((ch = getchar()) >= '0') && (ch <= '9'))
    {
        a *= 10;
        a += ch - '0';
    }
    if(flag)
    {
        a = -a;
    }
    return a;
}
void write(int a)
{
    if(a < 0)
    {
        putchar('-');
        a = -a;
    }
    if(a >= 10)
    {
        write(a / 10);
    }
    putchar(a % 10 + '0');
}

const int maxn = 200010;

int wa[maxn], wb[maxn], wv[maxn], ws1[maxn];
int sa[maxn];

int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for(i = 0; i < m; i++) ws1[i] = 0;
    for(i = 0; i < n; i++) ws1[x[i] = r[i]]++;
    for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
    for(i = n-1; i >= 0; i--) sa[--ws1[x[i]]] = i;
    for(j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        for(p = 0, i = n-j; i < n; i++) y[p++] = i;
        for(i = 0; i < n; i++)
            if(sa[i] >= j) y[p++] = sa[i]-j;
        for(i = 0; i < n; i++) wv[i] = x[y[i]];
        for(i = 0; i < m; i++) ws1[i] = 0;
        for(i = 0; i < n; i++) ws1[wv[i]]++;
        for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
        for(i = n-1; i >= 0; i--) sa[--ws1[wv[i]]] = y[i];
        for(swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
    }
    return ;
}

int rank[maxn], height[maxn];

void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for(i = 1; i <= n; i++) rank[sa[i]] = i;
    for(i = 0; i < n; height[rank[i++]] = k)
        for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++);
    return;
}

char str1[maxn], str2[maxn];
int seq[maxn];

int hash[maxn];

char str[110][1010];

struct node
{
    int pos;
    int s;
    int len;
}f[maxn], xf[maxn];

int xans;

int vis[110];

int Find(int x)
{
    int s = hash[x];
    for(int i = 0; i < s; i++)
    {
        int len = strlen(str[i]);
        x -= len;
    }
    x -= s;
    return x;
}

bool judge(int mid, int n, int m)
{
    int ans = 0;
    int sx;
    for(int i = 2; i <= n; i++)
    {
        memset(vis, 0, sizeof(vis));
        sx = 1;
        vis[hash[sa[i-1]]] = 1;
        while(height[i] >= mid)
        {
            if(!vis[hash[sa[i]]])
            {
                vis[hash[sa[i]]] = 1;
                ++sx;
            }
            i++;
        }
        if(sx*2 > m)
        {
            xf[ans].len = mid;
            xf[ans].pos = Find(sa[i-1]);
            xf[ans++].s = hash[sa[i-1]];
        }

    }
    if(ans)
    {
        xans = 0;
        for(int i = 0; i < ans; i++)
        f[xans++] = xf[i];
    }
    return ans;
}

void Del(int n, int len, int m)
{
    int l = 1;
    int r = len;
    xans = 0;
    while(l <= r)
    {
        int mid = (l+r)>>1;
        if(judge(mid, n, m)) l = mid+1;
        else r = mid-1;
    }

    if(!xans)
    {
        cout<<"?"<<endl;
        return;
    }
    for(int i = 0; i < xans; i++)
    {
        for(int j = f[i].pos, k = 0; k < f[i].len; k++, j++)
        cout<<str[f[i].s][j];
        puts("");
    }
}

int main()
{
    int n;
    int flag = 0;
    while(~scanf("%d", &n) && n)
    {
        memset(hash, -1, sizeof(hash));
        int ans = 0;
        int Min = maxn;
        for(int i = 0; i < n; i++)
        {
            scanf("%s",str[i]);
            int len = strlen(str[i]);
            Min = min(Min, len);
            for(int j = 0; j < len; j++)
            {
                seq[ans] = str[i][j];
                hash[ans++] = i;
            }
            seq[ans++] = 200+i;
        }
        seq[ans] = 0;
        da(seq, sa, ans+1, 310);
        calheight(seq, sa, ans);
        if(!flag) flag = 1;
        else puts("");
        Del(ans, Min, n);
    }
    return 0;
}

时间: 2024-10-07 19:55:06

POJ 3294 Life Forms(后缀数组求k个串的最长子串)的相关文章

POJ 3294 Life Forms (后缀数组)

题目大意: 求出在m个串中出现过大于m/2次的子串. 思路分析: 如果你只是直接跑一次后缀数组,然后二分答案扫描的话. 那么就试一下下面这个数据. 2 abcdabcdefgh efgh 这个数据应该输出 efgh 问题就在于对于每一个串,都只能参与一次计数,所以在check的时候加一个标记数组是正解. #include <cstdio> #include <iostream> #include <algorithm> #include <cstring>

Poj 3294 Life Forms (后缀数组 + 二分 + Hash)

题目链接: Poj 3294 Life Forms 题目描述: 有n个文本串,问在一半以上的文本串出现过的最长连续子串? 解题思路: 可以把文本串用没有出现过的不同字符连起来,然后求新文本串的height.然后二分答案串的长度K,根据K把新文本串的后缀串分块,统计每块中的原文本串出现的次数,大于原文本串数目的一半就作为答案记录下来,对于输出字典序,height就是排好序的后缀数组,只要按照顺序输出即可. 1 #include <cstdio> 2 #include <cstring>

POJ 3415 Common Substrings(后缀数组求重复字串)

题目大意:给你两个字符串,让你求出来两个字符串之间的重复子串长度大于k的有多少个. 解题思路: 先说论文上给的解释:基本思路是计算A的所有后缀和B的所有后缀之间的最长公共前缀的长度,把最长公共前缀长度不小于k的部分全部加起来.先将两个字符串连起来,中间用一个没有出现过的字符隔开.按height值分组后,接下来的工作便是快速的统计每组中后缀之间的最长公共前缀之和.扫描一遍,每遇到一个B的后缀就统计与前面的A的后缀能产生多少个长度不小于k的公共子串,这里A的后缀需要用一个单调的栈来高效的维护.然后对

POJ - 3693 Maximum repetition substring(后缀数组求重复次数最多的连续重复子串)

Description The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1. Given a

POJ - 1743 Musical Theme (后缀数组求不可重叠最长重复子串)

Description A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of music

POJ 3261 Milk Patterns ( 后缀数组 &amp;&amp; 出现k次最长可重叠子串长度 )

题意 : 给出一个长度为 N 的序列,再给出一个 K 要求求出出现了至少 K 次的最长可重叠子串的长度 分析 : 后缀数组套路题,思路是二分长度再对于每一个长度进行判断,判断过程就是对于 Height 数组进行限定长度的分组策略,如果有哪一组的个数 ≥  k 则说明可行! 分组要考虑到一个事实,对于每一个后缀,与其相匹配能够产生最长的LCP长度的串肯定是在后缀数组中排名与其相邻. 一开始对分组的理解有误,所以想了一个错误做法 ==> 遍历一下 Height 将值 ≥ (当前二分长度) 的做一次贡

hdu1403---Longest Common Substring(后缀数组求2个字符串的最长公共子串)

Problem Description Given two strings, you have to tell the length of the Longest Common Substring of them. For example: str1 = banana str2 = cianaic So the Longest Common Substring is "ana", and the length is 3. Input The input contains several

POJ - 3261 Milk Patterns (后缀数组求可重叠的 k 次最长重复子串)

Description Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular pattern

POJ - 3415 Common Substrings(后缀数组求长度不小于 k 的公共子串的个数+单调栈优化)

Description A substring of a string T is defined as: T( i, k)= TiTi+1... Ti+k-1, 1≤ i≤ i+k-1≤| T|. Given two strings A, B and one integer K, we define S, a set of triples (i, j, k): S = {( i, j, k) | k≥ K, A( i, k)= B( j, k)}. You are to give the val