【数据结构】处理哈希冲突的开链法（哈希桶）算法实现

实现哈希表时，我们常见的方法是线性探测、二次探测，这两个算法也很简单。若有兴趣，可以查看我的博客。但是，这两个算法有一个共同点就是：空间利用率低。为什么这么说呢？线性探测、二次探测的高效性很大程度上要取决于它的载荷因子，载荷因子即：存放关键字个数/空间大小。

通过查阅资料，我发现，使用素数做除数可以减少哈希冲突（具体原因不详，大师专研的，发现很好用，就在这里分享给大家）。见下：

----素数表

// 使用素数表对齐做哈希表的容量，降低哈希冲突

const int _PrimeSize = 28;

static const unsigned long _PrimeList [_PrimeSize] =

{

53ul, 97ul, 193ul, 389ul, 769ul,

1543ul, 3079ul, 6151ul, 12289ul, 24593ul,

49157ul, 98317ul, 196613ul, 393241ul, 786433ul,

1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,

50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,

1610612741ul, 3221225473ul, 4294967291ul

};

开链法（哈希桶）结构：

而哈希桶实现时，我们可以将载荷因子设成1.

代码如下：

#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
using namespace std;

#include<vector>

template<class K,class V>
struct HashTableNode
{
    K _key;
    V _value;
    HashTableNode* _next;
    HashTableNode(const K& key,const V& value)
        :_key(key)
        , _value(value)
        , _next(NULL)
    {}
};

template<class K,class V>
class HashTable
{
public:
    typedef HashTableNode<K,V> Node;

    HashTable()
        :_table(NULL)
        , _size()
    {}

    size_t _HashFunc(const K& key)
    {
        //_table.size()表示哈希桶的空间大小
        return key % _table.size();
    }
    
    
    //拷贝构造
    HashTable(const HashTable& ht)
    {
        //将哈希表ht拷贝给this
        this->_table.resize(ht._table.size());
        for (int i = 0; i < ht._table.size(); i++)
        {
            Node* cur = ht._table[i];
            while (cur)
            {
                Node* tmp = new Node(cur->_key, cur->_value);
                tmp->_next = _table[i];
                _table[i] = tmp;
                this->_size++;

                cur = cur->_next;
            }
        }    
    }

    HashTable<K, V> operator=(const HashTable<K, V>& ht)
    {    
        if (&ht != this)
        {
            //删除哈希表this
            for (int i = 0; i < this->_table.size(); i++)
            {
                Node* cur = _table[i];
                while (cur)
                {
                    Node* del = cur;
                    cur = cur->_next;
                    /*delete del;
                    del = NULL;*/
                    Remove(del->_key);
                }
            }

            //将哈希表ht拷贝给this
            this->_table.resize(ht._table.size());
            for (int i = 0; i < ht._table.size(); i++)
            {
                Node* cur = ht._table[i];
                while (cur)
                {
                    Node* tmp = new Node(cur->_key, cur->_value);
                    tmp->_next = _table[i];
                    _table[i] = tmp;
                    this->_size++;

                    cur = cur->_next;
                }
            }        
        }
        return *this;
    }

    //赋值运算符重载的现代写法
    HashTable<K, V> operator=(HashTable<K, V> ht)
    {
        if (&ht != this)
        {
            swap(_table, ht._table);
            swap(_size, ht._size);
        }    
        return *this;
    }

    ~HashTable()
    {
        //删除哈希表ht
        if (this->_table.size() !=0)
        {
            for (int i = 0; i < this->_table.size(); i++)
            {
                Node* cur = _table[i];
                while (cur)
                {
                    Node* del = cur;
                    cur = cur->_next;
                    delete del;
                    del = NULL;
                }
            }
        }
    }

    //获取新的哈希表容量大小
    size_t _GetnewSize()
    {
        static const int _PrimeSize = 28;
        static const unsigned long _PrimeList[_PrimeSize] =
        {
            53ul, 97ul, 193ul, 389ul, 769ul,
            1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
            49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
            1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
            50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
            1610612741ul, 3221225473ul, 4294967291ul
        };

        for (int i = 0; i < _PrimeSize; i++)
        {
            if (_PrimeList[i]> _table.size())
            {
                return _PrimeList[i];
            }
        }
        return _PrimeList[_PrimeSize - 1];
    }

    //给哈希桶扩容
    void _ExpandCapacity()
    {        
        //开辟新的更大容量的哈希表
        size_t newSize = _GetnewSize();
        vector<Node*> newTable;
        newTable.resize(newSize);

        //将每处顺序表上的单链表元素摘下来插入到新的顺序表上
        for (int i = 0; i < _table.size(); i++)
        {
            Node* cur = _table[i];
            while (cur)
            {
                Node* tmp = cur;
                cur = cur->_next;
                int index = _HashFunc(tmp->_key);
                //头插法插插节点
                tmp->_next = newTable[index];
                newTable[index] = tmp;
            }
            _table[i] = NULL;
        }
        _table.swap(newTable);
    }

    //插入关键字
    bool Insert(const K& key,const V& value)
    {
        //检查载荷因子，考虑是否扩容
        //哈希桶的载荷因子设置为1
        if (_size == _table.size())
        {
            _ExpandCapacity();
        }

        //往顺序表的index处插入节点
        size_t index = _HashFunc(key);
        Node* begin = _table[index];
        while (begin)
        {
            //设计成不可出现重复元素
            if (begin->_key == key)
            {
                return false;
            }

            begin = begin->_next;
        }

        //考虑到同一条单链表上，无所谓元素存放顺序，且较尾插简单。--》头插
        Node* tmp = new Node(key, value);
        tmp->_next =_table[index];
        _table[index] = tmp;
        _size++;
        return true;
    }

    //查找关键字
    Node* Find(const K& key)
    {
        int index = _HashFunc(key);
        Node* cur = _table[index];
        while (cur)
        {
            if (cur->_key == key)
                return cur;
            cur = cur->_next;
        }
        return NULL;
    }

    //删除关键字
    bool Remove(const K& key)
    {
        int index = _HashFunc(key);
        Node* cur = _table[index];
        Node* prev = NULL;
        while (cur)
        {
            if (cur->_key == key)
                break;
            prev = cur;
            cur = cur->_next;
        }

        if (cur)
        {
            if (cur == _table[index])
            {            
                _table[index] = cur->_next;
            }
            else
            {
                Node* next = cur->_next;
                prev->_next = next;
            }
            delete cur;
            cur = NULL;
            --_size;
            return true;        
        }
        return false;
    }

    //打印哈希桶
    void PrintHashTable()
    {
        for (int i = 0; i < _table.size(); i++)
        {
            Node* cur = _table[i];
            cout << i<<":" ;
            while (cur)
            {
                cout << cur->_key << "->";
                cur = cur->_next;
            }
            cout << "NULL" << endl;
        }
        cout << endl;
    }
    
private:
    vector<Node*> _table;
    size_t _size;//数据个数
};

void TestHashTableBucket()
{
    typedef HashTableNode<int, char> Node;

    HashTable<int, char> ht;
    ht.Insert(1, ‘a‘);
    ht.Insert(2, ‘b‘);
    ht.Insert(3, ‘c‘);
    ht.Insert(4, ‘d‘);
    ht.Insert(5, ‘d‘);
    ht.Insert(54, ‘x‘);
    ht.Insert(55, ‘y‘);
    ht.Insert(56, ‘z‘);

    ht.PrintHashTable();

    /*Node* ret = ht.Find(5);
    cout << ret->_value << endl;

    ht.Remove(1);
    ht.Remove(6);
    ht.PrintHashTable();*/

    /*HashTable<int, char> ht1(ht);
    ht1.PrintHashTable();*/

    HashTable<int, char> ht2;
    ht2.Insert(54, ‘x‘);
    ht2.Insert(55, ‘y‘);
    ht2.Insert(56, ‘z‘);
    ht2.Insert(1, ‘a‘);
    ht2.Insert(2, ‘b‘);
    ht2.Insert(3, ‘c‘);
    ht2.Insert(4, ‘d‘);
    ht2.Insert(5, ‘d‘);

    ht2.PrintHashTable();

    ht = ht2;
    ht.PrintHashTable();

}

int main()
{
    TestHashTableBucket();
    system("pause");
    return 0;
}

时间： 2024-10-13 21:34:38

【数据结构】处理哈希冲突的开链法（哈希桶）算法实现的相关文章

哈希表（开链法）

纯代码 #pragma once #include <iostream> #include <vector> using namespace std; struct __HashFuncString { size_t operator()(const string &key) { size_t hash = 0; for (size_t i = 0; i < key.size(); ++i) { hash += key[i]; } return hash; } };

【数据结构】c++实现HashTable（开链法）

#include <iostream> #include <vector> using namespace std; template <class K, class V> struct HashTableNode { K _key; V _value; HashTableNode<K, V>* _next; HashTableNode(const K&key, const V&value) :_key(key) , _value(value

【干货】C++哈希桶（开链法解决哈希冲突）类的实现

开链法(哈希桶)是解决哈希冲突的常用手法,结构如下: 数据结构的设计思路是这样的,定义一个K-V的链式节点(Node),以数组方式存储节点指针实现代码如下: #include<vector> #include"HashTable.h" size_t GetSize() { static size_t index = 0; const int _PrimeSize = 28; static const unsigned long _PrimeList[_PrimeSize]

【算法与数据结构】哈希表-链地址法

哈希表的链地址法来解决冲突问题将所有关键字为同义词的记录存储在同一个线性链表中,假设某哈希函数产生的哈希地址在区间[0, m - 1]上,则设立一个至振兴向量 Chain ChainHash[m]; 数据结构 //链表结点 typedef struct _tagNode { int data; //元素值(关键字) struct _tagNode* next; //下一个结点 }Node, *PNode; //哈希表结点 typedef struct _tagHashTable { //这里

算法学习 - HashTable开放地址法解决哈希冲突

开放地址法解决哈希冲突线性开放地址法线性开放地址法就是在hash之后,当发现在位置上已经存在了一个变量之后,放到它下一个位置,假如下一个位置也冲突,则继续向下,依次类推,直到找到没有变量的位置,放进去. 平方开放地址法平方地址法就是在hash之后,当正确位置上存在冲突,不放到挨着的下一个位置,而是放到第2^0位置,假如继续冲突放到2^1的位置,依次2^3... 直到遇到不冲突的位置放进去. 双散列开放地址法双散列同上,不过不是放到2^的位置,而是放到key - hash(key, tab

算法学习 - Hash Table操作，分离链接法解决哈希冲突

分离链接法 hash table是映射机制的,最大的优点就是它的操作是O(1)级别的.但是会出现哈希冲突,这就需要几种办法来解决.这里先说一种:分离链接法. 就是当插入的位置已经存在一个值之后,那么在这个值之后插入,就可以了,也叫拉链法.(但是其实会降低查找速度,变成O(n)级别) 下面是代码: // // main.cpp // HashTable_SeparateChaining // // Created by Alps on 14-8-5. // Copyright (c) 2014年

处理哈希冲突的线性探测法

哈希表,是根据关键字(Key value)而直接访问在内存存储位置的数据结构.也就是说,它通过计算一个关于键值的函数,将所需查询的数据映射到表中一个位置来访问记录,这加快了查找速度.这个映射函数称做散列函数,存放记录的数组称做散列表.(摘自维基百科) 对不同的关键字可能得到同一散列地址,即k1!=k2,而f(k1)=f(k2),这种现象称为碰撞(英语:Collision),也叫哈希冲突. 处理哈希冲突的方法有很多种: 闭散列法开链法(哈希桶) 素数表字符串哈希算法在这里我们讨论最简单的闭散

哈希冲突的处理【闭散列方法-线性探测和二次探测】

散列表(Hash table,也叫哈希表),是根据关键码值(Key value)而直接进行访问的数据结构.也就是说,它通过把关键码值映射到表中一个位置来访问记录,以加快查找的速度.这个映射函数叫做散列函数,存放记录的数组叫做散列表. 给定表M,存在函数Hash(key),对任意给定的关键字值key,代入函数后若能得到包含该关键字的记录在表中的地址,则称表M为哈希(Hash)表,函数Hash(key)为哈希(Hash) 函数. 构造哈希表的两种方法 1.直接定址法--取关键字的某个线性函数为散列地

SDUT 3379 数据结构实验之查找七：线性之哈希表

数据结构实验之查找七:线性之哈希表 Time Limit: 1000MS Memory Limit: 65536KB Submit Statistic Problem Description 根据给定的一系列整数关键字和素数p,用除留余数法定义hash函数H(Key)=Key%p,将关键字映射到长度为p的哈希表中,用线性探测法解决冲突.重复关键字放在hash表中的同一位置. Input 连续输入多组数据,每组输入数据第一行为两个正整数N(N <= 1000)和p(p >= N的最小素数),N是