http://poj.org/problem?id=3691
http://acm.hdu.edu.cn/showproblem.php?pid=2457
DNA repair
Description Biologists finally invent techniques of repairing DNA that contains segments causing kinds of inherited diseases. For the sake of simplicity, a DNA is represented as a string containing characters ‘A‘, ‘G‘ , ‘C‘ and ‘T‘. The repairing techniques are simply You are to help the biologists to repair a DNA by changing least number of characters. Input The input consists of multiple test cases. Each test case starts with a line containing one integers N (1 ≤ N ≤ 50), which is the number of DNA segments causing inherited diseases. The following N lines gives N non-empty strings of length not greater than 20 containing only characters in "AGCT", which are the DNA segments causing inherited disease. The last line of the test case is a non-empty string of length not greater than 1000 containing only characters in "AGCT", which is the DNA to be repaired. The last test case is followed by a line containing one zeros. Output For each test case, print a line containing the test case number( beginning with 1) followed by the number of characters which need to be changed. If it‘s impossible to repair the given DNA, print -1. Sample Input 2 AAA AAG AAAG 2 A TG TGAATG 4 A G C T AGT 0 Sample Output Case 1: 1 Case 2: 4 Case 3: -1 Source |
题意:
给出N个模式串和一个文本串,问最少修改文本串中多少个字母使得文本串中不包含模式串。
分析:
N个模式串构建AC自动机,然后文本串在AC自动机中走,其中单词结点不可达。
用dp[i][j]表示文本串第i个字母转移到AC自动机第j个结点最少修改字母的个数,状态转移方程为dp[i][j]=min(dp[i][j],dp[i-1][last]+add),last表示j的前趋,add为当前点是否修改。由于第i个只和第i-1个有关,所以可以使用滚动数组来优化空间。
/* * * Author : fcbruce <[email protected]> * * Time : Tue 18 Nov 2014 11:17:49 AM CST * */ #include <cstdio> #include <iostream> #include <sstream> #include <cstdlib> #include <algorithm> #include <ctime> #include <cctype> #include <cmath> #include <string> #include <cstring> #include <stack> #include <queue> #include <list> #include <vector> #include <map> #include <set> #define sqr(x) ((x)*(x)) #define LL long long #define itn int #define INF 0x3f3f3f3f #define PI 3.1415926535897932384626 #define eps 1e-10 #ifdef _WIN32 #define lld "%I64d" #else #define lld "%lld" #endif #define maxm #define maxn 1024 using namespace std; int q[maxn]; const int maxsize = 4; struct Acauto { int ch[maxn][maxsize]; bool val[maxn]; int last[maxn],nex[maxn]; int sz; int dp[2][maxn]; Acauto() { memset(ch[0],0,sizeof ch[0]); val[0]=false; sz=1; } void clear() { memset(ch[0],0,sizeof ch[0]); val[0]=false; sz=1; } int idx(const char c) { if (c=='A') return 0; if (c=='T') return 1; if (c=='C') return 2; return 3; } void insert(const char *s) { int u=0; for (int i=0;s[i]!='\0';i++) { int c=idx(s[i]); if (ch[u][c]==0) { memset(ch[sz],0,sizeof ch[sz]); val[sz]=false; ch[u][c]=sz++; } u=ch[u][c]; } val[u]=true; } void get_fail() { int f=0,r=-1; nex[0]=0; for (int c=0;c<maxsize;c++) { int u=ch[0][c]; if (u!=0) { nex[u]=0; q[++r]=u; last[u]=0; } } while (f<=r) { int x=q[f++]; for (int c=0;c<maxsize;c++) { int u=ch[x][c]; if (u==0) { ch[x][c]=ch[nex[x]][c]; continue; } q[++r]=u; int v=nex[x]; nex[u]=ch[v][c]; val[u]|=val[nex[u]]; } } } int DP(const char *T) { memset(dp,0x3f,sizeof dp); dp[0][0]=0; int x=1; for (int i=0;T[i]!='\0';i++,x^=1) { memset(dp[x],0x3f,sizeof dp[x]); int c=idx(T[i]); for (int j=0;j<sz;j++) { if (dp[x^1][j]==INF) continue; for (int k=0;k<4;k++) { if (val[ch[j][k]]) continue; int add=k==c?0:1; dp[x][ch[j][k]]=min(dp[x][ch[j][k]],dp[x^1][j]+add); } } } int MIN=INF; for (int i=0;i<sz;i++) MIN=min(MIN,dp[x^1][i]); if (MIN==INF) MIN=-1; return MIN; } }acauto; char DNA[1024]; int main() { #ifdef FCBRUCE freopen("/home/fcbruce/code/t","r",stdin); #endif // FCBRUCE int n,__=0; while (scanf("%d",&n),n!=0) { acauto.clear(); for (int i=0;i<n;i++) { scanf("%s",DNA); acauto.insert(DNA); } acauto.get_fail(); scanf("%s",DNA); printf("Case %d: %d\n",++__,acauto.DP(DNA)); } return 0; }