Description
You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.
The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant‘s life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.
Input
Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.
Output
For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.
Sample Input
3 abcdefg bcdefgh cdefghi 3 xxx yyy zzz 0
Sample Output
bcdefg cdefgh ?
【题目大意】
给定多个字符串,寻找字符串中出现的最长相同子串,如果有多个全部输出
【解题思路】
首先求后缀数组,然后二分查找最长长度的子串
然后在二分答案的时候,记录答案
【代码细节】
1 #include<cstdio> 2 #include<iostream> 3 #include<algorithm> 4 #include<cmath> 5 #include<vector> 6 #include<cstring> 7 using namespace std; 8 const int MAXN = 110; 9 const int MAXM = 2010; 10 int n = 0, num; 11 int down; 12 int s[MAXN*MAXM]; 13 vector<int> ret; 14 char tmp[MAXM]; 15 int SA[MAXN*MAXM], height[MAXN*MAXM]; 16 int rak[MAXN*MAXM],k; 17 int c[MAXN*MAXM], x[MAXN*MAXM], y[MAXN*MAXM]; 18 int pos[MAXN],id[MAXN*MAXM]; 19 vector<int> ans; 20 bool vis[MAXN]; 21 void init() 22 { 23 memset(c, 0, sizeof(c)); 24 memset(x, 0, sizeof(x)); 25 memset(y, 0, sizeof(y)); 26 return; 27 } 28 void get_SA(int *s) 29 { 30 int m = down; 31 for (int i = 1; i <= n; i++) 32 ++c[x[i] = s[i]]; 33 for (int i = 2; i <= m; i++) 34 c[i] += c[i - 1]; 35 for (int i = n; i >= 1; i--) 36 SA[c[x[i]]--] = i; 37 for (int k = 1; k <= n; k <<= 1) 38 { 39 int num = 0; 40 for (int i = n - k + 1; i <= n; i++) y[++num] = i; 41 for (int i = 1; i <= n; i++) 42 if (SA[i] > k) 43 y[++num] = SA[i] - k; 44 for (int i = 1; i <= m; i++) 45 c[i] = 0; 46 for (int i = 1; i <= n; i++) 47 c[x[i]]++; 48 for (int i = 2; i <= m; i++) 49 c[i] += c[i - 1]; 50 for (int i = n; i >= 1; i--) 51 SA[c[x[y[i]]]--] = y[i], y[i] = 0; 52 swap(x, y); 53 x[SA[1]] = 1; num = 1; 54 for (int i = 2; i <= n; i++) 55 x[SA[i]] = (y[SA[i]] == y[SA[i - 1]] && y[SA[i] + k] == y[SA[i - 1] + k]) ? num : ++num; 56 if (num == n) break; 57 m = num; 58 } 59 for (int i = 1; i <= n; i++) 60 rak[SA[i]] = i; 61 return; 62 } 63 void get_height(int *s) 64 { 65 int j, k = 0; 66 for (int i = 1; i <= n; i++) { 67 if (k) k--; 68 j = SA[rak[i] - 1]; 69 while (i + k <= n && j + k <= n && s[i + k] == s[j + k]) k++; 70 height[rak[i]] = k; 71 } 72 }//以上为板子 73 int sta[MAXN]; 74 int top = 0;//以下为核心 75 bool check(int x) 76 { 77 int cnt = 0; 78 bool flag = 0; 79 memset(vis, 0, sizeof(vis)); 80 for (int i = 2; i < n; i++) 81 { 82 if (height[i] >= x) 83 { 84 if (!vis[id[SA[i]]]) cnt++, vis[id[SA[i]]] = 1; 85 if (!vis[id[SA[i - 1]]]) cnt++, vis[id[SA[i - 1]]] = 1; 85.5 //查找到符合条件的子串 86 } 87 else 88 { 89 if (cnt > k) 90 { 91 if (!flag) 91.5 //如果已经有解,清空 ans 92 ans.clear(); 93 ans.push_back(SA[i-1]); 94 flag = true; 95 } 96 memset(vis, 0, sizeof(vis)); 97 cnt = 0; 98 } 99 } 102 return flag; 103 } 104 int main() 105 { 106 while(scanf("%d", &num)) 107 { 108 if (num == 0) 109 break; 110 down =100; 111 init(); 112 ret.clear(); 113 n = 0; 114 for (int i = 1; i <= num; i++) 115 { 116 scanf("%s", tmp + 1); 117 int N = strlen(tmp+1); 118 for (int j = 1; j <= N; j++) 119 { 120 s[++n] = tmp[j] - ‘a‘ + 1; 121 id[n] = i; 122 } 123 s[++n] = ++down; 124 id[n] = -1; 125 } 126 get_SA(s); 127 get_height(s); 128 int l = 1, r = n; 129 int len = 0; 130 k = num / 2; 131 while (l <= r) 132 { 133 134 int mid = (l + r) >> 1; 135 if (check(mid)) 136 { 137 len = max(len, mid); 138 l = mid + 1; 139 } 140 else 141 { 142 r = mid - 1; 143 } 144 } 145 146 if (len==1 || len==0) 147 { 148 printf("?\n\n"); 149 continue; 150 } 151 for (int i = 0; i < ans.size(); i++) 152 { 153 for (int j = ans[i]; j <= ans[i] + len-1; j++) 154 printf("%c", s[j]+‘a‘-1); 155 printf("\n"); 156 } 157 printf("\n"); 158 } 159 160 return 0; 161 }
原文地址:https://www.cnblogs.com/rentu/p/11515624.html