用c++封装一个Hash Table,并与STL map 进行操作性能上的比较


1.在计算机科学中,hash table 是一种常用的数据结构,它本质上是一种关联容器,它映射一个key 到value。它通过一个hash function把key映射成一个整数值,这个整数值对应存放value值的容器的下标。

2.它主要支持三种操作:插入key,value对(insert),查询(search)给定key, 删除key, value对(delete);


4.hash table要处理核心问题是选择好的hash function,这能使得key映射出的index足够分散均匀,尽量减少碰撞(不同的key映射到同一个index),为了消除碰撞产生,一般常用几种方法:Separate chaining, Linear probing,Quadratic probing, Rehash, double Hashing,详细细节请参看维基百科;


6.本文设计测试用例,比较了hashtable 和STL map 的操作性能。发现执行相同的操作,hashtable 所消耗时间为STL map 九分之一;


#ifndef _HASH_TABLE_H_
#define _HASH_TABLE_H_

#include <stdlib.h>
#include <stdio.h>
#include <map>
#include <assert.h>
#include <string>

#include "windows.h"

* Compare function
static bool StrCompare( const char* first, const char* second )
	size_t firstLen = strlen(first);
	size_t secondLen = strlen(second);

	return firstLen == secondLen && !strcmp( first, second );

* Hash function
static unsigned int ImplHashFunc( const char* buf, int len )
	unsigned int hash = 5381;

		hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */

	return hash;

* Hash function
static unsigned int ImplCaseHashFunc( const unsigned char* buf, int len )
	unsigned int hash = 5381;

		hash = ((hash << 5) + hash) + tolower(*buf++); /* hash * 33 + c */

	return hash;

* Hash function
static unsigned int ImplHashFuncSimple( unsigned int key )
	key += ~(key << 15);
	key ^=  (key >> 10);
	key +=  (key << 3);
	key ^=  (key >> 6);
	key += ~(key << 11);
	key ^=  (key >> 16);
	return key;

* encapsulate the hash table
* advantage:
*          good performance;
*          terse interface to make more easy for outstanding and to employ
template<class T>
class HashTable
	typedef unsigned int (*HashFunctor)( const char* key, int len);
	typedef bool (*KeyCompare)( const char* keyFirst, const char* keySecond );

	static const int INIT_TABLE_SIZE = 689981;	

	typedef struct tagEntryNode
		char*  key;
		T      value;
		tagEntryNode* next;

		tagEntryNode():key(0), value(), next(0)


		tagEntryNode( const char* _key, const T& val ):
			value(val), next(0)
			size_t len = strlen(_key) + 1;
			key = new char[len];
			strncpy( key, _key, len - 1);
			key[len - 1] = ‘\0‘;

			delete [] key;
			key = 0;

	}EntryNode, *pEntryNode;

	typedef struct tagHashNode
		EntryNode** table;
		size_t      used;
		size_t      size;
		size_t      sizeMask;

		tagHashNode():table(0), used(0), size(0),sizeMask(0)


		tagHashNode( size_t _size ):table(0), used(0), size(_size),
			Init( _size );



		void Init( size_t _size )
			size = _size;
			sizeMask = size - 1;
			table = new EntryNode*[size];
			memset( table, 0x00, sizeof(EntryNode*)*size );

	}HashNode, *pHashNode;

	HashTable( HashFunctor functor = ImplHashFunc, KeyCompare cmpFunctor = StrCompare ):m_hashFunctor(functor),
		         m_hashTable(new HashNode), m_resizeRatio(2)


	* Clear all node and entity
	void Clear()
		Clear( m_hashTable );

	* Inset the pair of key and value
	bool Insert( const char* key, const T& value )

		return Insert( m_hashTable, key, value );

	* Retrieve the pointer of value for given key
	T* Find( const char* key )
		unsigned int hash = m_hashFunctor(key, strlen(key));
		unsigned int idx = hash % m_hashTable->size;

		EntryNode* entry = m_hashTable->table[idx];
		while( entry )
			if( m_keyCmpFunctor( entry->key, key) )
				return &entry->value;

			entry = entry->next;

		return NULL;

	* Delete hashEntry for given key
	void Delete( const char* key )
		unsigned int hash = m_hashFunctor(key, strlen(key));
		unsigned int idx = hash % m_hashTable->size;

		EntryNode* entry = m_hashTable->table[idx];
		EntryNode* preEntry = 0;
		while( entry )
			if( m_keyCmpFunctor( entry->key, key ) )
				if( preEntry )
					preEntry->next = entry->next;
					m_hashTable->table[idx] = entry->next;

				delete entry;
				entry = 0;
				preEntry = entry;
				entry  = entry->next;



	* Fink the index of corresponding of key value in the table
	int FindKeyIndex( pHashNode hashNode, const char* key )
		unsigned int hash = m_hashFunctor( key, strlen( key ) );
		unsigned int idx = hash % hashNode->size;

		EntryNode* entry = hashNode->table[idx];
		if( 0 == entry )

		while( entry )
			if( m_keyCmpFunctor( entry->key, key) )
				return -1;

			entry = entry->next;

		return idx;

	* Implement insert operation
	bool Insert( pHashNode hashNode, const char* key, const T& value )
		int idx = FindKeyIndex( hashNode, key );
		if( idx != -1 )
			EntryNode* newNode = new EntryNode( key, value );
			newNode->next = hashNode->table[idx];
			hashNode->table[idx] = newNode;

			return true;

		return false;

	* Rehash double store memory to make more root then remake insert operation
	* very important
	void Rehash()
		if( m_hashTable->used >= m_hashTable->size ||
			  (m_hashTable->used > 0 && (m_hashTable->size / m_hashTable->used) < m_resizeRatio ) )
			size_t newSize = NextPrime( m_hashTable->size * 2 );
			pHashNode newHashNode = new HashNode( newSize );

			for( size_t i = 0; i < m_hashTable->size; i++ )
				pEntryNode entryNode = m_hashTable->table[i];
				while( entryNode )
					Insert( newHashNode, entryNode->key, entryNode->value );
					entryNode = entryNode->next;


			Clear( m_hashTable );
			m_hashTable = newHashNode;


	* Implement clear operation
	void Clear( pHashNode hashNode )
		for( size_t i = 0; i < m_hashTable->size; i++ )
			pEntryNode entryNode = m_hashTable->table[i];
			while( entryNode )
				pEntryNode next = entryNode->next;
				delete entryNode;
				entryNode = next;


		delete [] m_hashTable->table;

	* Initialization
	void Init( size_t tableSize )
		m_hashTable->Init( tableSize );

	* Helper function
	* check prime
	bool IsPrime( size_t x)
		for( std::size_t i = 3; true; i += 2 )
			std::size_t q = x / i;
			if( q < i )
				return true;

			if( x == q * i )
				return false;
		return true;

	* Find next prime for given x
	size_t	NextPrime( size_t x )
		if( x <= 2 )
			return 2;

		if(!(x & 1))

		for(; !IsPrime(x); x += 2 );

		return x;


	HashFunctor m_hashFunctor;

	KeyCompare  m_keyCmpFunctor;

	pHashNode   m_hashTable;

	size_t      m_resizeRatio;


* Test hash table
void TestHashTable()
	unsigned long start = GetTickCount();

	HashTable<int> hashTable;
	const int Len = 500000;
	for( int i = 0; i < Len; i++ )
		char key[16] = {0};
		sprintf(key, "%s_%d", "china", i );

		assert(hashTable.Insert( key, i ));

	for( int i = 0; i < Len; i++ )
		char key[16] = {0};
		sprintf(key, "%s_%d", "china", i );

		if( i > 0 && !(i % 50) )
			hashTable.Delete( key );
			assert( !hashTable.Find( key ) );
			assert(i == *hashTable.Find( key));


	unsigned long interval = GetTickCount() - start;
	printf(" hash table consume time is %d \n", interval );

* Test STL map
void TestHTSTLMap()
	unsigned long start = GetTickCount();

	std::map<std::string, int > strMap;
	const int Len = 500000;
	for( int i = 0; i < Len; i++ )
		char key[16] = {0};
		sprintf(key, "%s_%d", "china", i );
		std::string keyStr(key);

		strMap.insert( std::make_pair(keyStr, i )) ;

    std::map<std::string, int >::iterator iter = strMap.begin();
	for( int i = 0; i < Len; i++ )
		char key[16] = {0};
		sprintf(key, "%s_%d", "china", i );
		std::string keyStr(key);

		if( i > 0 && !(i % 50) )
			strMap.erase( key );
			assert( strMap.find( key ) == strMap.end() );
			iter = strMap.find( keyStr );
			assert( iter->second == i );


	unsigned long interval = GetTickCount() - start;
	printf(" STL map consume time is %d \n", interval );

* Test suite and compare performance
void TestSuiteHashTable()



compile and run in visual studio 2005

本文主要分析g++ stl中哈希表的实现方法.stl中,除了以红黑树为底层存储结构的map和set,还有用哈希表实现的hash_map和hash_set.map和set的查询时间是对数级的,而hash_map和hash_set更快,可以达到常数级,不过哈希表需要更多内存空间,属于以空间换时间的用法,而且选择一个好的哈希函数也不那么容易. 一. 哈希表基本概念 哈希表,又名散列表,是根据关键字直接访问内存的数据结构.通过哈希函数,将键值映射转换成数组中的位置,就可以在O(1)的时间内访问到数据.举