哈希是一种算法,将指定的数据按一定规律映射到一段空间内,又可以按照这种规律对它的值进行相应的操作,这一段空间可以称作哈希表,它的的查找速度要快于线性的数据结构,同时也快于表格队列等,所以它具有独特的优势,一般将哈希算法用于快速查找和加密算法。
对于最简单的哈希表,里面设置一个key,它决定将这个值存于哈希表的什么位置,同时把每个设置一个状态,如果有插入数据就将其设置为EXITS,其他操作同理,现在可以实现最简单的哈希表。
namespace First
{
enum State
{
EMPTY,
DELETE,
EXITS
};
template <typename T>
class HashTable
{
public:
HashTable(size_t capacity = 10)//构造
:_capacity(capacity)
, _tables(new T[_capacity])
, _states(new State[_capacity])
, _size(0)
{
for (int i = 0; i < _capacity; i++)//最初始得状态置成空的
{
_states[i] = EMPTY;
}
}
~HashTable()//析构
{
delete[] _tables;
delete[] _states;
}
HashTable(const HashTable<T>& h)//拷贝构造
:_capacity(h._capacity)
, _tables(new T[h._capacity])
, _states(new State[h._capacity])
, _size(h._size)
{
for (int i = 0; i < h._capacity; i++)
{
_tables[i] = h._tables[i];
_states[i] = h._states[i];
}
}
HashTable& operator=(HashTable<T> h)//赋值运算符重载
{
if (this != &h)
{
swap(_tables, h._tables);
swap(_states, h._states);
swap(_capacity, h._capacity);
swap(_size, h._size);
}
return *this;
}
bool Insert(const T& key)//插入
{
if (_size == _capacity)
{
cout << "HashTable full" << endl;
return false;
}
int index = HashFunc(key);
int start = index;
while (_states[index] == EXITS)//往后线形探测
{
if (_tables[index] == key)//有相等的
{
return false;
}
index++;
if (index == _capacity)//最后一个
{
index = 0;
}
if (index == start)//找了一圈没找到
{
return false;
}
}
_tables[index] = key;
_states[index] = EXITS;
_size++;
}
bool Find(const T& key)//查找
{
int index = HashFunc(key);
int start = index;
while (_states[index] != EMPTY)
{
if (_tables[index] == key)
{
if (_states[index] != DELETE)
{
cout << "find succees" << endl;
return true;
}
else
{
cout << "find fail" << endl;
return false;
}
}
index++;
if (index == _capacity)
{
index = 0;
}
if (start == index)
{
cout << "find fail" << endl;
return false;
}
}
cout << "find fail" << endl;
return false;
}
bool Remove(const T& key)///删除
{
int index = HashFunc(key);
int start = index;
while (_states[index] != EMPTY)
{
if (_tables[index] == key)
{
if (_states[index] != DELETE)
{
cout << "delete key" << endl;
_states[index] = DELETE;
return true;
}
else
{
cout << "delete fail" << endl;
return false;
}
}
index++;
if (index == _capacity)
{
index = 0;
}
if (start == index)
{
return false;
}
}
cout << "delete fail" << endl;
return true;
}
void Print()//打印哈希表
{
for (int i = 0; i < _capacity; i++)
{
cout << ‘[‘ << _tables[i] << ‘,‘ << _states[i] << ‘]‘ << ‘ ‘;
}
cout << endl;
}
protected:
int HashFunc(const T& key)
{
return key%_capacity;
}
private:
size_t _capacity;
T* _tables;
State* _states;
size_t _size;
};
}
/**************************************/
从上面的代码可以看出,这个哈希表并不适用于实际,因为首先它是一个静态的,如果存入的key值过多就会造成越界访问,同时用的是线性探测方法,这样降低了cpu的访问命中率,现在可以实现一种动态的而且随意设置负载因子的功能。
namespace Second//因为有负载因子的限制,可以提高cpu访问命中率
{
enum State
{
EMPTY,
DELETE,
EXITS
};
template <typename T>
class HashTable
{
public:
HashTable(size_t capacity = 30)//构造
:_capacity(capacity)
, _tables(new T[_capacity])
, _states(new State[_capacity])
, _size(0)
{
for (int i = 0; i < _capacity; i++)//最初始得状态置成空的
{
_states[i] = EMPTY;
}
}
~HashTable()//析构
{
delete[] _tables;
delete[] _states;
}
HashTable(const HashTable<T>& h)//拷贝构造
:_capacity(h._capacity)
, _tables(new T[h._capacity])
, _states(new State[h._capacity])
, _size(h._size)
{
for (int i = 0; i<h._capacity; i++)
{
_tables[i] = h._tables[i];
_states[i] = h._states[i];
}
}
HashTable& operator=(HashTable<T> h)//赋值运算符重载
{
if (this != &h)
{
swap(_tables, h._tables);
swap(_states, h._states);
swap(_capacity, h._capacity);
swap(_size, h._size);
}
return *this;
}
//bool Insert(const T& key)//插入(线性探测)
//{
//_CheckCapacity();
//int index = _HashFunc(key);
//int start = index;
//while (_states[index]==EXITS)
//{
//if (_tables[index] == key)
//{
//return false;
//}
//index++;
//if (index == _capacity)
//{
//index = 0;
//}
//if (index == start)
//{
//return false;
//}
//
//}
//_tables[index] = key;
//_states[index] = EXITS;
//_size++;
//}
bool Insert(const T& key)//插入(二次探测,即某个数的二次方,这样数据存着更稀疏)
{
_CheckCapacity();
int index = _HashFunc(key);
int start = index;
int i = 0;
while (_states[index]==EXITS)
{
if (_tables[index] == key)
{
return false;
}
index = _HashFuncT(index, ++i);
if (start = index)
{
return false;
}
if (index == _capacity)
{
index = 0;
}
}
_tables[index] = key;
_states[index] = EXITS;
_size++;
}
bool Find(const T& key)//查找
{
int index = _HashFunc(key);
int start = index;
int i = 0;
while (_states[index]!=EMPTY)
{
if (_tables[index] == key)
{
if (_states[index] != DELETE)
{
cout << "find success" << endl;
return true;
}
else
{
cout << "find fail" << endl;
return false;
}
}
index = _HashFuncT(index, ++i);
if (start = index)
{
cout << "find fail" << endl;
return false;
}
if (index == _capacity)
{
index = 0;
}
}
cout << "find fail" << endl;
return false;
}
bool Remove(const T& key)///删除
{
int index = _HashFunc(key);
int start = index;
int i = 0;
while (_states[index] == EXITS)
{
if (_tables[index] == key)
{
_states[index] = DELETE;
_size--;
return true;
}
index = _HashFuncT(index, ++i);
if (start == index)
{
return false;
}
if (index == _capacity)
{
index = 0;
}
}
return false;
}
void Print()//打印哈希表
{
for (int i = 0; i < _capacity; i++)
{
cout << ‘[‘ << _tables[i] << ‘,‘ << _states[i] << ‘]‘ << ‘ ‘;
}
cout << endl;
}
protected:
int _HashFuncT(int index,int i)
{
return (index + i*i) % _capacity;
}
int _HashFunc(const T& key)
{
return key%_capacity;
}
void _CheckCapacity()//检查容量
{
if ((10 * _size)/ _capacity == 6)//负载因子设为0.6
{
HashTable<T> tmp(2 * _capacity);
for (int i = 0; i < _capacity; i++)
{
if (_states[i]==EXITS)
{
tmp.Insert(_tables[i]);
}
}
_swap(tmp);
}
}
void _swap(HashTable<T> h)
{
swap(_tables, h._tables);
swap(_states, h._states);
swap(_capacity, h._capacity);
swap(_size, h._size);
}
private:
size_t _capacity;
T* _tables;
State* _states;
size_t _size;
};
}
/****************************************/
上面的代码对于key形式的相对第一种已经比较健全了。现在可以利用哈希算法可以实现一种key/value形式的功能,可以支持字典功能,key是一个信息,同时value是key的一个附带信息,比如说key为学号,那么班级就是附带的信息value,例如还有简单的英汉字典形式,现进行简单的实现。
namespace Third//支持字典形式的
{
enum State
{
EMPTY,
DELETE,
EXITS
};
template<class T,class V>
struct HashTableNode
{
HashTableNode()
{}
HashTableNode(const T& key, const V& value)
:_key(key)
, _value(value)
{}
T _key;
V _value;
};
template <class T>
struct __HashFunc
{
size_t operator()(const T& key)
{
return key;
}
};
//实现key,value形式,并且是二次探测的
template <class T ,class V,class HashFunc=__HashFunc<T>>
class Dictionary
{
public:
Dictionary(size_t capacity=10)
:_capacity(capacity)
, _tables(new HashTableNode<T,V> [_capacity])
, _states(new State[_capacity])
,_size(0)
{
for (int i = 0; i < _capacity; i++)
{
_states[i] = EMPTY;//将最开始的状态置为空
}
}
~Dictionary()
{
delete[] _tables;
delete[] _states;
}
bool Insert(const T& key,const V& value)
{
_CheckCapacity();
int index = _HashFunonce(key);
int start = index;
int i = 0;
while (_states[index] == EXITS)
{
if (_tables[index]._key == key)
{
return false;
}
index = _HashFuntwice(index, ++i);
if (index == _capacity)
{
index = 0;
}
if (index == start)
{
return false;
}
}
_tables[index] = HashTableNode<T, V>(key, value);
_states[index] = EXITS;
_size++;
return true;
}
HashTableNode<T,V>* Find(const T& key)
{
int index = _HashFunonce(key);
int start = index;
int i = 0;
while (_states[index]==EXITS)
{
if (_tables[index]._key == key)
{
cout << "find success" << endl;
return _tables+index;
}
index = _HashFuntwice(index, ++i);
if (start == index)
{
cout << "find fail" << endl;
return NULL;
}
}
cout << "find fail" << endl;
return NULL;
}
bool Remove(const T& key)
{
int index = _HashFunonce(key);
int start = index;
int i = 0;
while (_states[index]!=EMPTY)
{
if (_tables[index]._key == key)
{
if (_states[index]!=DELETE)
{
_states[index] = DELETE;
_size--;
return true;
}
else
{
return false;
}
}
index = _HashFuntwice(index, ++i);
if (index == start)
{
return false;
}
}
return false;
}
void Print()
{
for (int i = 0; i < _capacity; i++)
{
cout << "[" << _tables[i]._key << "," << _tables[i]._value <<","<< _states[i]<<"]" << " ";
}
cout << endl;
}
protected:
void _CheckCapacity()//将负载因子设为0.6
{
if (_size * 10 / _capacity == 6)
{
Dictionary<T, V, HashFunc> tmp(2 * _capacity);
for (int i = 0; i < _capacity; i++)
{
if (_states[i] == EXITS)
{
tmp.Insert(_tables[i]._key,_tables[i]._value);
}
}
_Swap(tmp);
}
}
void _Swap(Dictionary<T, V, HashFunc> tmp)
{
swap(_tables, tmp._tables);
swap(_states, tmp._states);
swap(_capacity, tmp._capacity);
swap(_size, tmp._size);
}
size_t _HashFunonce(const T& key)
{
return key %_capacity;
}
size_t _HashFuntwice(int index,int i)//获取二次探测的下标
{
return (index + i*i) % _capacity;
}
private:
size_t _capacity;
HashTableNode<T,V>* _tables;
State* _states;
size_t _size;
};
}
void test3()//二次探测,负载因子,实现字典的功能
{
/*Third::Dictionary<int, string> h1;
h1.Insert(10, "c语言基础");
h1.Insert(59, "c++基础");
h1.Insert(9, "数据结构");
h1.Insert(19, "Linux");
h1.Insert(18, "网络编程");*/
Third::Dictionary<int,int>h1;
h1.Insert(10, 1);
h1.Insert(59, 2);
h1.Insert(9, 3);
h1.Insert(19,4);
h1.Insert(18, 5);
//h1.Print();
cout<<h1.Find(9)->_value<<endl;
//h1.Remove(9);
//h1.Remove(19);
//h1.Remove(10);
//h1.Print();
}
上述就是对哈希算法的简单应用。