DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 12252 | Accepted: 4661 |
Description
It‘s well known that DNA Sequence is a sequence only contains A, C, T and G, and it‘s very useful to analyze a segment of DNA Sequence,For example, if a animal‘s DNA sequence contains segment ATC then it may mean that the animal
may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don‘t contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
给出患病的DNA序列,问序列长度为n的,且不包含患病的DNA序列有多少种。
首先处理患病的DNA串,连接为字典树后,添加fail指针,完成AC自动机,给每个节点一个编号,然后用矩阵统计每个节点走一步可以走到的节点的种数,其中,不能包含会患病的序列,最后矩阵相乘。
注意1.__int64 相乘会超出范围。
注意2.处理自动机时,注意,如果发现某个节点的fail会返回到一个代表序列结束的节点上,那么这个节点也是不可达的。不能被统计到矩阵中。
注意3.矩阵快速幂要写成非递归的形式。
给出测试案例:
2 1
ACG
C
其中矩阵应该为
2 1 0 0 0
2 1 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
#include <cstdio> #include <cstring> #include <queue> #include <algorithm> using namespace std ; #define MOD 100000 #define LL __int64 struct node{ int flag , id ; node *next[4] , *fail ; }; struct nnode{ LL Map[110][110] , n ; }; queue <node*> que ; char c[5] = "ACGT" ; char str[20] ; int num , vis[110] ; node *newnode() { node *p = new node ; p->flag = 0 ; p->id = num++ ; p->fail = NULL ; for(int i = 0 ; i < 4 ; i++) p->next[i] = NULL ; return p ; } void settree(char *s,node *rt,int temp) { int i , k , l = strlen(s) ; node *p = rt ; for(i = 0 ; i < l ; i++) { for(k = 0 ; k < 4 ; k++) if( s[i] == c[k] ) break ; if( p->next[k] == NULL ) p->next[k] = newnode() ; p = p->next[k] ; } p->flag = 1 ; return ; } void setfail(node *rt) { int i ; node *p = rt , *temp ; p->fail = NULL ; while( !que.empty() ) que.pop() ; que.push(p) ; while( !que.empty() ) { p = que.front() ; que.pop() ; for(i = 0 ; i < 4 ; i++) { if( p->next[i] ) { temp = p->fail ; while( temp && !temp->next[i] ) temp = temp->fail ; p->next[i]->fail = temp ? temp->next[i] : rt ; if( temp != NULL && temp->next[i]->flag ) p->next[i]->flag = 1 ; que.push(p->next[i]) ; } else p->next[i] = p == rt ? rt : p->fail->next[i] ; } } } nnode setmat(node *rt) { int i , j , u , v ; nnode q ; node *p = rt ; while( !que.empty() ) que.pop() ; memset(q.Map,0,sizeof(q.Map)) ; memset(vis,0,sizeof(vis)) ; que.push(p) ; q.n = num ; vis[ p->id ] = 1 ; while( !que.empty() ) { p = que.front() ; que.pop() ; u = p->id ; for(i = 0 ; i < 4 ; i++) { if( !p->flag && !p->next[i]->flag ) q.Map[ p->id ][ p->next[i]->id ]++ ; if( !vis[p->next[i]->id] ) { vis[ p->next[i]->id ] = 1 ; que.push( p->next[i] ) ; } } } return q ; } nnode mul(nnode a,nnode b) { nnode c ; c.n = a.n ; int i , j , k ; for(i = 0 ; i < a.n ; i++) { for(j = 0 ; j < a.n ; j++) { c.Map[i][j] = 0 ; for(k = 0 ; k < a.n ; k++) c.Map[i][j] = ( c.Map[i][j] + a.Map[i][k]*b.Map[k][j] ) % MOD ; } } return c ; } nnode pow(nnode p,int k) { nnode temp ; int i , j ; temp.n = p.n ; memset(temp.Map,0,sizeof(temp.Map)) ; for(i = 0 ; i < p.n ; i++) temp.Map[i][i] = 1 ; while( k ) { if( k&1 ) temp = mul(temp,p) ; p = mul(p,p) ; k >>= 1 ; } return temp ; } int main() { int n , m , i , j ; node *rt ; nnode p ; while( scanf("%d %d", &m, &n) != EOF ) { num = 0 ; rt = newnode() ; for(i = 1 ; i <= m ; i++) { scanf("%s", str) ; settree(str,rt,i) ; } setfail(rt) ; p = setmat(rt) ; /*for(i = 0 ; i < p.n ; i++) { for(j = 0 ; j < p.n ; j++) printf("%d ", p.Map[i][j]) ; printf("\n") ; }*/ p = pow(p,n) ; LL ans = 0 ; for(i = 0 ; i < p.n ; i++) ans = ( ans + p.Map[0][i] ) % MOD ; printf("%d\n", ans) ; } return 0 ; }