单词统计的题目,给出一些单词,统计有多少单词在一个文本中出现,最经典的入门题了。
AC自动机的基础:
1 Trie, 以这个数据结构为基础的,不过增加一个fail指针和构造fail的函数
2 KMP,不是直接运用KMP,而是需要KMP的思想,KMP思想都没有的话,理解这个算法会更加吃力的。
注意本题的单词会有重复出现的,一个单词只能统计一次。
搜索了一下网上的题解,发现好多代码都是一大抄的啊,⊙﹏⊙b汗。
本博客的乃是原创代码,代码风格也是差不多固定的,转载请注明出处:http://blog.csdn.net/kenden23,不少所谓的IT网站转载我的文章,不但链接没给出,连作者也没有,还好意思说自己是IT网站吗?
请尊重作者,如果认为这些算法代码那么好敲的,可以自己去敲去。
#include <cstdio> const int ARR_SIZE = 26; const int MAX_N = 10001; const int MAX_M = 1000001; const int MAX_KEY_LEN = 51; struct Node { Node *arr[ARR_SIZE]; Node *fail; int n; }; void clearNode(Node *rt) { for (int i = 0; i < ARR_SIZE; i++) { rt->arr[i] = NULL; } rt->n = 0; rt->fail = NULL; } Node *q[MAX_KEY_LEN*MAX_N], pool[MAX_KEY_LEN*MAX_N], *Trie; int head, tail, poolID; void insert(char *str) { Node *pCrawl = Trie; for ( ; *str; str++) { int id = *str - 'a'; if (!pCrawl->arr[id]) { pCrawl->arr[id] = &pool[poolID++]; clearNode(pCrawl->arr[id]); } pCrawl = pCrawl->arr[id]; } pCrawl->n++; } void buildFail() { Node *pCrawl; head = tail = 0; q[tail++] = Trie; while (head < tail) { pCrawl = q[head++]; for (int i = 0; i < ARR_SIZE; i++) { if (pCrawl->arr[i] == NULL) continue; pCrawl->arr[i]->fail = Trie;//initialize all to Trie Node *fail = pCrawl->fail; while (fail) { if (fail->arr[i])//find the first next up level match {//which make it the longest match and the best. pCrawl->arr[i]->fail = fail->arr[i]; break; } fail = fail->fail; }//whi (p != NULL) q[tail++] = pCrawl->arr[i]; }//for (int i = 0; i < kind; i++) }//while (head < tail) } int searchWordsInText(char *text) { Node *pCrawl = Trie; int i = 0, ans = 0; while (text[i]) { int id = text[i++] - 'a'; //find the longest prefix match while (!pCrawl->arr[id] && pCrawl != Trie) pCrawl = pCrawl->fail; if (pCrawl->arr[id]) pCrawl = pCrawl->arr[id]; else continue; Node *tmp = pCrawl; while (tmp && tmp->n != -1) {//If one word apprear multiply times, only count as one time. ans += tmp->n; tmp->n = -1; tmp = tmp->fail; }//traval through all words that end with text[i], add them to result } return ans; } int main() { int T, n; char keyWord[MAX_KEY_LEN], text[MAX_M]; scanf("%d", &T); while (T--) { Trie = &pool[0]; clearNode(Trie); poolID = 1; scanf("%d", &n); getchar(); while (n--) { gets(keyWord); insert(keyWord); } gets(text); buildFail(); printf("%d\n", searchWordsInText(text)); } return 0; }
HDU 2222 Keywords Search AC自动机入门题
时间: 2024-10-01 07:12:36