字符串hash - 简单的字符匹配 --- poj 3461

Oulipo

Problem‘s Link：http://poj.org/problem?id=3461

Mean:

给你一个模式串P和一个母串S，让你统计P串在S串中出现的次数。

analyse:

这题我一开始想到的就是使用KMP，就用KMP写了，93ms，挺快的。我又用AC自动机写了一遍(纯属娱乐)，万万没想到竟然超时了，是我姿势不对么？

后来看别人有用字符串hash写的，听说字符串hash在某些问题中比AC自动机什么的厉害多了，于是又用字符串hash写了一遍，确实挺不错的，代码30+行，而且速度也挺快，173ms。要好好搞一下字符串hash，这个确实是一个好东西，听说字符串hash熟练了以后根本不用其他AC自动机、后缀数组就可以解决很多问题了。

在字符串hash中又学到了unsigned long long超范围后会自动对2^64取模，hash的利器啊，省去了手动取模。

Time complexity：O(n*m)

Source code：

字符串hash代码：

#include<stdio.h>
#include<string.h>
#define ULL unsigned long long
int seed=100;
char s1[10005],s2[1000005];
int main()
{
    int t;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%s%s",s1,s2);
        int len1=strlen(s1),len2=strlen(s2);
        ULL a1=0,a2=0,l1=1;
        for(int i=0;i<len1;++i)
        {
            a1=a1*seed+(s1[i]-‘A‘+1);
            l1*=seed;
        }
        for(int i=0;i<len1;++i)
        {
            a2=a2*seed+(s2[i]-‘A‘+1);
        }
        int ans=0;
        if(a1==a2) ans++;
        for(int i=len1;i<len2;++i)
        {
            a2=a2*seed+(s2[i]-‘A‘+1)-l1*(s2[i-len1]-‘A‘+1);
            if(a2==a1) ans++;
        }
        printf("%d\n",ans);
    }
    return 0;
}

KMP代码：

// Memory   Time
// 1347K     0MS
// by : Snarl_jsb
// 2014-10-04-11.53
#include<algorithm>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<iostream>
#include<vector>
#include<queue>
#include<stack>
#include<map>
#include<string>
#include<climits>
#include<cmath>
#define N 1000010
#define LL long long
using namespace std;
char s1[10005],s2[1000005];
vector<int> next;
void GetNext(char s[])
{
    int len=strlen(s),k=0;
    next.clear();
    next.push_back(0);
    for(int i=1;i<len;++i)
    {
        while(k!=0&&s[i]!=s[k])
            k=next[k-1];
        if(s[i]==s[k])
            k++;
        next.push_back(k);
    }
}
int KMP(char s1[],char s2[])
{
    int l1=strlen(s1),l2=strlen(s2);
    int k=0,ans=0;;
    for(int i=0;i<l2;++i)
    {
        while(k!=0&&s2[i]!=s1[k])
            k=next[k-1];
        if(s2[i]==s1[k])
            k++;
        if(k==l1)
        {
            ans++;
            k=next[k-1];
        }
    }
    return ans;
}
int main()
{
    int t;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%s%s",s1,s2);
        int len1=strlen(s1),len2=strlen(s2);
        GetNext(s1);
        printf("%d\n",KMP(s1,s2));
    }
    return 0;
}

AC自动机超时代码（求大牛指针QAQ）：

// Memory   Time
// 1347K     0MS
// by : Snarl_jsb
// 2014-09-30-11.01
#include<algorithm>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<iostream>
#include<vector>
#include<queue>
#include<stack>
#include<map>
#include<string>
#include<climits>
#include<cmath>
#define LL long long
using namespace std;

const int N = 1001000;
char str[N];
struct node
{
    node *next[30];     //  每个结点都对应30个字母的指针
    node *fail;     //      失配指针
    int count;      //
    node()      //  构造函数初始化
    {
        for(int i = 0; i < 30; i++)
            next[i] = NULL;
        count = 0;
        fail = NULL;
    }
}*q[N];
node *root;
int head, tail;

void Insert(char *str) //   插入单词.相当于构建一个Trie树
{
    node *p = root;
    int i = 0, index;
    while(str[i]) {
        index = str[i] - 65; //  转化为相对数字来存
        if(p->next[index] == NULL) // 该字母未插入过
            p->next[index] = new node();    //  为该字母申请一个结点
        p = p->next[index];     //   移至下一个
        i++;
    }
    p->count++;     //      记录该结点的单词总共插入的次数
}
void build_ac_automation(node *root)        //      bfs建立fail指针
{
    root->fail = NULL;
    q[tail++] = root;
    while(head < tail) {
        node *temp = q[head++];
        node *p = NULL;
        for(int i = 0; i < 30; i++) {
            if(temp->next[i] != NULL) {
                if(temp == root) temp->next[i]->fail = root;
                else {
                    p = temp->fail;
                    while(p != NULL) {
                        if(p->next[i] != NULL) {
                            temp->next[i]->fail = p->next[i];
                            break;
                        }
                        p = p->fail;
                    }
                    if(p == NULL) temp->next[i]->fail = root;
                }
                q[tail++] = temp->next[i];
            }
        }
    }
}
int total=0;
int Query(node *root)       //  匹配 + 统计
{
    int i = 0, cnt = 0, index;
    node *p = root;
    int idx=0;
    while(str[i])
    {
        index = str[i] - 65;
        while(p->next[index] == NULL && p != root) //前缀是相同的，所以不管哪个指针走到了count不为0的结点上，那么该结点所代表的单词就匹配成功
            p = p->fail;//失配情况下，p指针指向p->fail.(相当于KMP的next数组)
        p = p->next[index];//由于现在所在的位置是父节点，所以需要向下移动一个位置
        if(p == NULL)
            p = root; //如果匹配失败，移动到root，重新开始匹配
        node *temp = p;//
        while(temp != root && temp->count != -1)  //统计--如果匹配成功,那么count>1，表示该结点代表的单词数量;否则表示该结点没有单词
        {
            if(temp->count>0)
            {
                cnt ++; //统计该单词出现的次数
            }
//            temp->count = -1;   //标记为-1，表示该单词已经加入了cnt中，下次就不用重复统计
            temp = temp->fail;//判断整条链上的匹配情况
        }
        i++;
    }
    return cnt;
}

int main()
{
    int t;
    cin>>t;
    while(t--)
    {
        head=tail=0;
        root=new node();
        scanf("%s",str);
        Insert(str);
        build_ac_automation(root);
        scanf("%s",str);
//        Query(root);
        printf("%d\n",Query(root));
    }
    return 0;
}

时间： 2024-08-28 01:27:17

字符串hash - 简单的字符匹配 --- poj 3461的相关文章

Codeforces 358B--string--（简单的字符匹配）

题意:一句话加密.每个单词的前后都要加上"<3",完成转换后的句子的任何地方都可以加任何字符或数字(加不加,在哪加,加多少都是不定的).判断输入的密文格式是否符合上面的要求. 分析:简单的字符匹配的题. 先预处理需要加密的句子,然后与输入的密文比较.主要是注意积累string的用法,还有本题预处理的方法. #include<iostream> #include<string> using namespace std; int main() { string

2014牡丹江网络预选赛I题（字符串hash+简单DP）zoj3817

Chinese Knot Time Limit: 2 Seconds Memory Limit: 65536 KB Special Judge Chinese knot is a decorative handicraft that began as a form of Chinese folk artifact in the Tang and Song Dynasty in China. It was later popularized in the Ming. Alice

对字符串进行简单的字符数字统计探索java中的List功能

题目: 统计一个字符串中数字和字符串的个数,并分别进行排列,要求 1.数字,字符串可以从键盘获取. 2.储存在list 3.统计数字个数,字符串个数 4.把数字和字符串按从小到大的顺序输出 5.不能使用数组. List的用法 List包括List接口以及List接口的所有实现类.因为List接口实现了Collection接口,所以List接口拥有Collection接口提供的所有常用方法,又因为List是列表类型,所以List接口还提供了一些适合于自身的常用方法.[自行百度] List接口提供的

对上次“对字符串进行简单的字符数字统计探索java中的List功能 ”程序，面向对象的改进

之前的随笔中的程序在思考后发现,运用了太多的static 函数,没有将面向对象的思想融入,于是做出了一下修改: 1 import java.util.ArrayList; 2 import java.util.Collections; 3 import java.util.List; 4 import java.util.Scanner; 5 6 7 public class classtest { 8 9 List<String> number=new ArrayList<String

poj 3461 字符串单串匹配--KMP或者字符串HASH

http://poj.org/problem?id=3461 先来一发KMP算法: #include <cstdio> #include <cstring> #include <algorithm> #include <string> #include <iostream> #include <cmath> #include <map> #include <queue> using namespace std;

字符串hash + 二分答案 - 求最长公共子串 --- poj 2774

Long Long Message Problem's Link:http://poj.org/problem?id=2774 Mean: 求两个字符串的最长公共子串的长度. analyse: 前面在学习后缀数组的时候已经做过一遍了,但是现在主攻字符串hash,再用字符串hash写一遍. 这题的思路是这样的: 1)取较短的串的长度作为high,然后二分答案(每次判断长度为mid=(low+high)>>1是否存在,如果存在就增加下界:不存在就缩小上界): 2)主要是对答案的判断(judge函数

题目1021：统计字符（hash简单应用）

问题来源 http://ac.jobdu.com/problem.php?pid=1021 问题描述每次输入两个字符串,统计第一个字符串中的每个字符在第二个字符串中出现的次数. 问题分析太明显了,hash数组的简单应用:吧字符数值作为数组下标对每个字符进行计数. 另外,字符串的整行输入,请参考:http://www.cnblogs.com/AlvinZH/p/6798023.html 参考代码 // // Created by AlvinZH on 2017/5/18. // Copyrig

poj 2774 最长公共子串--字符串hash或者后缀数组或者后缀自动机

http://poj.org/problem?id=2774 想用后缀数组的看这里:http://blog.csdn.net/u011026968/article/details/22801015 本文主要讲下怎么hash去找开始的时候写的是O(n^2 logn)算法果断超时...虽然也用了二分的,, 代码如下: //hash+二分 #include <cstdio> #include <cstring> #include <algorithm> #include

POJ 1200 Crazy Search （字符串hash）

题目大意: 分析长度为n的子串有多少种. 思路分析: 对于没出现的字符,将其分配一个数字. 然后将子串看做一个nc进制的数. 然后hash判断. #include <cstdio> #include <iostream> #include <algorithm> #include <map> #include <cstring> #include <string> using namespace std; bool vis[26666