STL源码剖析 容器 stl_hashtable.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie

hashtable

---------------------------------------------------------------------------

二叉搜索树具有对数平均时间的表现,它建立在输入数据有足够的随机性的假设

hashtable 有常数平均时间的表现,基于统计,不需依赖输入元素的随机性

hashtalbe 的简单实现:

所有元素都 16-bits 不带正负号的整数,范围 0~65535,配置一个 array,索引号码为 0~65535,初值全部为 0 。

每一个元素值表示索引号对应的元素值出现的次数。

每插入元素 i ,执行 array[i]++; 每删除元素 i ,执行 array[i]--;

搜索元素 i ,只要检查 array[i] 是否为 0

排序时,只要遍历 array ,输入 array[i] 个 i

问题:

1.如果元素是字符串(或其它)而非整数,将无法被拿来作为 array 的索引

2.如果元素是 32-bits,需要的索引数是 2^32;

解决:

问题1

整数是由数字组成的,字符串是由字符组成的。

在整数的时候,索引取的是各个数字的十进制组合表示。 如 1234 取索引 1*10^3 + 2*10^2 + 3*10^1 + 4*10^0

在字符串的时候,索引同样也可以取各个字符的ASCII编码组合表示。如 hou 可以取索引 ‘h‘*128^2 + ‘o‘*128^1 + ‘u‘*128^0

问题2

采用hash function 将元素值映射到大小可接受的索引范围

-->问题:不同元素被映射到相同的位置

-->解决:线性探测、二次探测、开链

线性、二次指的是碰撞时前进的步伐大小

线性 --> H+1,   H+2,   H+3, ...

二次 --> H+1^2, H+2^2, H+3^2, ...

开链法是指在每一个表元素中维护一个 list ,在那个元素上执行元素的插入、搜寻、删除

SGI STL 的 hash table 采用的是开链法

hashtable 的能容纳的元素个数就是 bucket vector 的大小,当超过这个大小时,hashtable就会调用 resize() 函数重新分配大小,

重建新的 hashtable

#ifndef __SGI_STL_INTERNAL_HASHTABLE_H
#define __SGI_STL_INTERNAL_HASHTABLE_H

// Hashtable class, used to implement the hashed associative containers
// hash_set, hash_map, hash_multiset, and hash_multimap.

#include <stl_algobase.h>
#include <stl_alloc.h>
#include <stl_construct.h>
#include <stl_tempbuf.h>
#include <stl_algo.h>
#include <stl_uninitialized.h>
#include <stl_function.h>
#include <stl_vector.h>
#include <stl_hash_fun.h>

__STL_BEGIN_NAMESPACE

// hashtable 元素所维护的链表节点
template <class Value>
struct __hashtable_node
{
  __hashtable_node* next;
  Value val;
};  

template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey, class Alloc = alloc>
class hashtable;

template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator;

template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_const_iterator;

//hashtable 的迭代器
template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator {
  typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
          hashtable;
  typedef __hashtable_iterator<Value, Key, HashFcn,
                               ExtractKey, EqualKey, Alloc>
          iterator;
  typedef __hashtable_const_iterator<Value, Key, HashFcn,
                                     ExtractKey, EqualKey, Alloc>
          const_iterator;
  typedef __hashtable_node<Value> node;

  typedef forward_iterator_tag iterator_category;
  typedef Value value_type;
  typedef ptrdiff_t difference_type;
  typedef size_t size_type;
  typedef Value& reference;
  typedef Value* pointer;

  node* cur;  //迭代器目前所指的节点
  hashtable* ht; //保持对容器的连结关系(因为可能需要从 bucket 跳到 bucket)

  __hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
  __hashtable_iterator() {}
  reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
  pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
  iterator& operator++();
  iterator operator++(int);
  bool operator==(const iterator& it) const { return cur == it.cur; }
  bool operator!=(const iterator& it) const { return cur != it.cur; }
};

//为什么会有个专门的 __hashtable_const_iterator ,用 const __hashtable_iterator 不行吗?
template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_const_iterator {
  typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
          hashtable;
  typedef __hashtable_iterator<Value, Key, HashFcn,
                               ExtractKey, EqualKey, Alloc>
          iterator;
  typedef __hashtable_const_iterator<Value, Key, HashFcn,
                                     ExtractKey, EqualKey, Alloc>
          const_iterator;
  typedef __hashtable_node<Value> node;

  typedef forward_iterator_tag iterator_category;
  typedef Value value_type;
  typedef ptrdiff_t difference_type;
  typedef size_t size_type;
  typedef const Value& reference;
  typedef const Value* pointer;

  const node* cur;
  const hashtable* ht;

  __hashtable_const_iterator(const node* n, const hashtable* tab)
    : cur(n), ht(tab) {}
  __hashtable_const_iterator() {}
  __hashtable_const_iterator(const iterator& it) : cur(it.cur), ht(it.ht) {}
  reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
  pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
  const_iterator& operator++();
  const_iterator operator++(int);
  bool operator==(const const_iterator& it) const { return cur == it.cur; }
  bool operator!=(const const_iterator& it) const { return cur != it.cur; }
};

//线性探测、二次探测的方法的表大小最好是质数
//开链法的表不需要是质数,不过 SGI STL 仍以质数来设计表的大小
// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
  53,         97,           193,         389,       769,
  1543,       3079,         6151,        12289,     24593,
  49157,      98317,        196613,      393241,    786433,
  1572869,    3145739,      6291469,     12582917,  25165843,
  50331653,   100663319,    201326611,   402653189, 805306457,
  1610612741, 3221225473ul, 4294967291ul
};

inline unsigned long __stl_next_prime(unsigned long n)
{
  const unsigned long* first = __stl_prime_list;
  const unsigned long* last = __stl_prime_list + __stl_num_primes;
  const unsigned long* pos = lower_bound(first, last, n); //用 lower_bound 来查找与要设计的表的大小最接近的质数
  return pos == last ? *(last - 1) : *pos;
}

template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey,
          class Alloc>
class hashtable {
public:
  typedef Key key_type;     //键值的类型
  typedef Value value_type; //实值的类型
  typedef HashFcn hasher;   //hash function
  typedef EqualKey key_equal;//判断键值是否相同

  typedef size_t            size_type;
  typedef ptrdiff_t         difference_type;
  typedef value_type*       pointer;
  typedef const value_type* const_pointer;
  typedef value_type&       reference;
  typedef const value_type& const_reference;

  hasher hash_funct() const { return hash; }
  key_equal key_eq() const { return equals; }

private:
  // 三个 function object
  hasher hash;
  key_equal equals;
  ExtractKey get_key;

  typedef __hashtable_node<Value> node;
  typedef simple_alloc<node, Alloc> node_allocator; //每次分配一个 node 大小的空间

  vector<node*,Alloc> buckets; // 以 vector 表示 buckets 数组,每个 bucket 元素维护一个指向链表的 node * 地址
  size_type num_elements; //hashtable 里的元素个数

public:
  typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
                               Alloc>
  iterator;

  typedef __hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
                                     Alloc>
  const_iterator;

  friend struct
  __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>;
  friend struct
  __hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>;

public:
  hashtable(size_type n,
            const HashFcn&    hf,
            const EqualKey&   eql,
            const ExtractKey& ext)
    : hash(hf), equals(eql), get_key(ext), num_elements(0)
  {
    initialize_buckets(n);
  }

  hashtable(size_type n,
            const HashFcn&    hf,
            const EqualKey&   eql)
    : hash(hf), equals(eql), get_key(ExtractKey()), num_elements(0)
  {
    initialize_buckets(n);
  }

  hashtable(const hashtable& ht)
    : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_elements(0)
  {
    copy_from(ht);
  }

  hashtable& operator= (const hashtable& ht)
  {
    if (&ht != this) {
      clear();
      hash = ht.hash;
      equals = ht.equals;
      get_key = ht.get_key;
      copy_from(ht);
    }
    return *this;
  }

  ~hashtable() { clear(); }

  size_type size() const { return num_elements; }
  size_type max_size() const { return size_type(-1); }
  bool empty() const { return size() == 0; }

  void swap(hashtable& ht)
  {
    __STD::swap(hash, ht.hash);
    __STD::swap(equals, ht.equals);
    __STD::swap(get_key, ht.get_key);
    buckets.swap(ht.buckets);
    __STD::swap(num_elements, ht.num_elements);
  }

  iterator begin()
  {
    for (size_type n = 0; n < buckets.size(); ++n)
      if (buckets[n])
        return iterator(buckets[n], this);
    return end();
  }

  iterator end() { return iterator(0, this); }

  const_iterator begin() const
  {
    for (size_type n = 0; n < buckets.size(); ++n)
      if (buckets[n])
        return const_iterator(buckets[n], this);
    return end();
  }

  const_iterator end() const { return const_iterator(0, this); }

  friend bool
  operator== __STL_NULL_TMPL_ARGS (const hashtable&, const hashtable&);

public:

  size_type bucket_count() const { return buckets.size(); }

  size_type max_bucket_count() const
    { return __stl_prime_list[__stl_num_primes - 1]; } 

  size_type elems_in_bucket(size_type bucket) const
  {
    size_type result = 0;
    for (node* cur = buckets[bucket]; cur; cur = cur->next)
      result += 1;
    return result;
  }

  //插入元素,不许重复
  pair<iterator, bool> insert_unique(const value_type& obj)
  {
    resize(num_elements + 1); //判断是否需要重建表,如需要就扩充
    return insert_unique_noresize(obj);
  }

  //插入元素,允许重复
  iterator insert_equal(const value_type& obj)
  {
    resize(num_elements + 1);
    return insert_equal_noresize(obj);
  }

  pair<iterator, bool> insert_unique_noresize(const value_type& obj);
  iterator insert_equal_noresize(const value_type& obj);

#ifdef __STL_MEMBER_TEMPLATES
  template <class InputIterator>
  void insert_unique(InputIterator f, InputIterator l)
  {
    insert_unique(f, l, iterator_category(f));
  }

  template <class InputIterator>
  void insert_equal(InputIterator f, InputIterator l)
  {
    insert_equal(f, l, iterator_category(f));
  }

  template <class InputIterator>
  void insert_unique(InputIterator f, InputIterator l,
                     input_iterator_tag)
  {
    for ( ; f != l; ++f)
      insert_unique(*f);
  }

  template <class InputIterator>
  void insert_equal(InputIterator f, InputIterator l,
                    input_iterator_tag)
  {
    for ( ; f != l; ++f)
      insert_equal(*f);
  }

  template <class ForwardIterator>
  void insert_unique(ForwardIterator f, ForwardIterator l,
                     forward_iterator_tag)
  {
    size_type n = 0;
    distance(f, l, n);
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_unique_noresize(*f);
  }

  template <class ForwardIterator>
  void insert_equal(ForwardIterator f, ForwardIterator l,
                    forward_iterator_tag)
  {
    size_type n = 0;
    distance(f, l, n);
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_equal_noresize(*f);
  }

#else /* __STL_MEMBER_TEMPLATES */
  void insert_unique(const value_type* f, const value_type* l)
  {
    size_type n = l - f;
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_unique_noresize(*f);
  }

  void insert_equal(const value_type* f, const value_type* l)
  {
    size_type n = l - f;
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_equal_noresize(*f);
  }

  void insert_unique(const_iterator f, const_iterator l)
  {
    size_type n = 0;
    distance(f, l, n);
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_unique_noresize(*f);
  }

  void insert_equal(const_iterator f, const_iterator l)
  {
    size_type n = 0;
    distance(f, l, n);
    resize(num_elements + n);
    for ( ; n > 0; --n, ++f)
      insert_equal_noresize(*f);
  }
#endif /*__STL_MEMBER_TEMPLATES */

  reference find_or_insert(const value_type& obj);

  iterator find(const key_type& key)
  {
    //首先得到键值 key 应该落入的 bucket
    size_type n = bkt_num_key(key);
    node* first;
	//然后遍历这个 bucket 看看能不能找到有相同 key 的元素
    for ( first = buckets[n];
          first && !equals(get_key(first->val), key);
          first = first->next)
      {}
    return iterator(first, this);
  } 

  const_iterator find(const key_type& key) const
  {
    size_type n = bkt_num_key(key);
    const node* first;
    for ( first = buckets[n];
          first && !equals(get_key(first->val), key);
          first = first->next)
      {}
    return const_iterator(first, this);
  } 

  size_type count(const key_type& key) const
  {
    const size_type n = bkt_num_key(key);
    size_type result = 0;

    for (const node* cur = buckets[n]; cur; cur = cur->next)
      if (equals(get_key(cur->val), key))
        ++result;
    return result;
  }

  pair<iterator, iterator> equal_range(const key_type& key);
  pair<const_iterator, const_iterator> equal_range(const key_type& key) const;

  size_type erase(const key_type& key);
  void erase(const iterator& it);
  void erase(iterator first, iterator last);

  void erase(const const_iterator& it);
  void erase(const_iterator first, const_iterator last);

  void resize(size_type num_elements_hint);
  void clear();

private:
  size_type next_size(size_type n) const { return __stl_next_prime(n); }

  void initialize_buckets(size_type n)
  {
    const size_type n_buckets = next_size(n);
    buckets.reserve(n_buckets);
    buckets.insert(buckets.end(), n_buckets, (node*) 0);
    num_elements = 0;
  }

  // 接受键值, 返回 key 位于哪个 bucket
  size_type bkt_num_key(const key_type& key) const
  {
    return bkt_num_key(key, buckets.size());
  }

  //接受实值, 返回 obj 位于哪个 bucket
  size_type bkt_num(const value_type& obj) const
  {
	// 调用 get_key 得到 key,调用 bkt_num_key 得到 #bucket
    return bkt_num_key(get_key(obj));
  }
  // 接受键值和 bucket 个数, 返回 key 位于哪个 bucket
  size_type bkt_num_key(const key_type& key, size_t n) const
  {
    return hash(key) % n;
  }
  //接受实值和 bucket 个数, 返回 obj 位于哪个 bucket
  size_type bkt_num(const value_type& obj, size_t n) const
  {
    return bkt_num_key(get_key(obj), n);
  }

  node* new_node(const value_type& obj)
  {
    node* n = node_allocator::allocate(); //分配节点空间
    n->next = 0;
    __STL_TRY {
      construct(&n->val, obj); //构造节点
      return n;
    }
    __STL_UNWIND(node_allocator::deallocate(n));
  }

  void delete_node(node* n)
  {
    destroy(&n->val); //析构节点
    node_allocator::deallocate(n); //释放空间
  }

  void erase_bucket(const size_type n, node* first, node* last);
  void erase_bucket(const size_type n, node* last);

  void copy_from(const hashtable& ht);

};

template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
  const node* old = cur;
  cur = cur->next; //如果当前迭代器所指的链表节点有下一个节点,那就将迭代器指向那个节点。否则就要跳到下一个 bucket 了
  if (!cur) {
    size_type bucket = ht->bkt_num(old->val); //定位当前 bucket 的下一个
    while (!cur && ++bucket < ht->buckets.size()) //找到当前 bucket 之后的第一个不为空的 bucket
      cur = ht->buckets[bucket]; //让 cur 指向 bucket 的头节点
  }
  return *this;
}

//如非必要,还是使用前置 operator++ 吧,后置的编译器要帮它生成和个 int 参数,运行时要产生临时对象
//内部还是调用前置 operator++ 实现的,返回的时候又要调用拷贝构造函数,还要析构掉之前生成的临时对象
template <class V, class K, class HF, class ExK, class EqK, class A>
inline __hashtable_iterator<V, K, HF, ExK, EqK, A>
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++(int)
{
  iterator tmp = *this;
  ++*this;
  return tmp;
}

template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_const_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_const_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
  const node* old = cur;
  cur = cur->next;
  if (!cur) {
    size_type bucket = ht->bkt_num(old->val);
    while (!cur && ++bucket < ht->buckets.size())
      cur = ht->buckets[bucket];
  }
  return *this;
}

template <class V, class K, class HF, class ExK, class EqK, class A>
inline __hashtable_const_iterator<V, K, HF, ExK, EqK, A>
__hashtable_const_iterator<V, K, HF, ExK, EqK, A>::operator++(int)
{
  const_iterator tmp = *this;
  ++*this;
  return tmp;
}

#ifndef __STL_CLASS_PARTIAL_SPECIALIZATION

template <class V, class K, class HF, class ExK, class EqK, class All>
inline forward_iterator_tag
iterator_category(const __hashtable_iterator<V, K, HF, ExK, EqK, All>&)
{
  return forward_iterator_tag();
}

template <class V, class K, class HF, class ExK, class EqK, class All>
inline V* value_type(const __hashtable_iterator<V, K, HF, ExK, EqK, All>&)
{
  return (V*) 0;
}

template <class V, class K, class HF, class ExK, class EqK, class All>
inline hashtable<V, K, HF, ExK, EqK, All>::difference_type*
distance_type(const __hashtable_iterator<V, K, HF, ExK, EqK, All>&)
{
  return (hashtable<V, K, HF, ExK, EqK, All>::difference_type*) 0;
}

template <class V, class K, class HF, class ExK, class EqK, class All>
inline forward_iterator_tag
iterator_category(const __hashtable_const_iterator<V, K, HF, ExK, EqK, All>&)
{
  return forward_iterator_tag();
}

template <class V, class K, class HF, class ExK, class EqK, class All>
inline V*
value_type(const __hashtable_const_iterator<V, K, HF, ExK, EqK, All>&)
{
  return (V*) 0;
}

template <class V, class K, class HF, class ExK, class EqK, class All>
inline hashtable<V, K, HF, ExK, EqK, All>::difference_type*
distance_type(const __hashtable_const_iterator<V, K, HF, ExK, EqK, All>&)
{
  return (hashtable<V, K, HF, ExK, EqK, All>::difference_type*) 0;
}

#endif /* __STL_CLASS_PARTIAL_SPECIALIZATION */

template <class V, class K, class HF, class Ex, class Eq, class A>
bool operator==(const hashtable<V, K, HF, Ex, Eq, A>& ht1,
                const hashtable<V, K, HF, Ex, Eq, A>& ht2)
{
  typedef typename hashtable<V, K, HF, Ex, Eq, A>::node node;
  if (ht1.buckets.size() != ht2.buckets.size())
    return false;
  for (int n = 0; n < ht1.buckets.size(); ++n) {
    node* cur1 = ht1.buckets[n];
    node* cur2 = ht2.buckets[n];
    for ( ; cur1 && cur2 && cur1->val == cur2->val;
          cur1 = cur1->next, cur2 = cur2->next)
      {}
    if (cur1 || cur2)
      return false;
  }
  return true;
}  

#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER

template <class Val, class Key, class HF, class Extract, class EqKey, class A>
inline void swap(hashtable<Val, Key, HF, Extract, EqKey, A>& ht1,
                 hashtable<Val, Key, HF, Extract, EqKey, A>& ht2) {
  ht1.swap(ht2);
}

#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */

//在不需要重建表的情况下插入新节点。键值重复的节点不会插入
template <class V, class K, class HF, class Ex, class Eq, class A>
pair<typename hashtable<V, K, HF, Ex, Eq, A>::iterator, bool>
hashtable<V, K, HF, Ex, Eq, A>::insert_unique_noresize(const value_type& obj)
{
  const size_type n = bkt_num(obj); //决定 obj 就位于哪个 bucket
  node* first = buckets[n]; //令 first 指向 bucket 对应的串行头部

  for (node* cur = first; cur; cur = cur->next)
    //发现键值与链表同的某节点键值相同,不插入
    if (equals(get_key(cur->val), get_key(obj)))
      return pair<iterator, bool>(iterator(cur, this), false);

  //在链表头插入新节点
  node* tmp = new_node(obj);
  tmp->next = first;
  buckets[n] = tmp;
  ++num_elements;
  return pair<iterator, bool>(iterator(tmp, this), true);
}

//在不需要重建表的情况下插入新节点。插入的节点键值可以重复
template <class V, class K, class HF, class Ex, class Eq, class A>
typename hashtable<V, K, HF, Ex, Eq, A>::iterator
hashtable<V, K, HF, Ex, Eq, A>::insert_equal_noresize(const value_type& obj)
{
  const size_type n = bkt_num(obj); //决定 obj 就位于哪个 bucket
  node* first = buckets[n]; //令 first 指向 bucket 对应的串行头部

  for (node* cur = first; cur; cur = cur->next)
    //如果发现与链表中的某键值相同,就马上插入,然后返回
    if (equals(get_key(cur->val), get_key(obj))) {
      node* tmp = new_node(obj);
      tmp->next = cur->next;
      cur->next = tmp;
      ++num_elements;
      return iterator(tmp, this);
    }
  //链表中没有相同键值的节点,将新节点插入在链表
  node* tmp = new_node(obj);
  tmp->next = first;
  buckets[n] = tmp;
  ++num_elements;
  return iterator(tmp, this);
}

template <class V, class K, class HF, class Ex, class Eq, class A>
typename hashtable<V, K, HF, Ex, Eq, A>::reference
hashtable<V, K, HF, Ex, Eq, A>::find_or_insert(const value_type& obj)
{
  resize(num_elements + 1);

  size_type n = bkt_num(obj);
  node* first = buckets[n];

  for (node* cur = first; cur; cur = cur->next)
    if (equals(get_key(cur->val), get_key(obj)))
      return cur->val;

  node* tmp = new_node(obj);
  tmp->next = first;
  buckets[n] = tmp;
  ++num_elements;
  return tmp->val;
}

template <class V, class K, class HF, class Ex, class Eq, class A>
pair<typename hashtable<V, K, HF, Ex, Eq, A>::iterator,
     typename hashtable<V, K, HF, Ex, Eq, A>::iterator>
hashtable<V, K, HF, Ex, Eq, A>::equal_range(const key_type& key)
{
  typedef pair<iterator, iterator> pii;
  const size_type n = bkt_num_key(key);

  for (node* first = buckets[n]; first; first = first->next) {
    if (equals(get_key(first->val), key)) {
      for (node* cur = first->next; cur; cur = cur->next)
        if (!equals(get_key(cur->val), key))
          return pii(iterator(first, this), iterator(cur, this));
      for (size_type m = n + 1; m < buckets.size(); ++m)
        if (buckets[m])
          return pii(iterator(first, this),
                     iterator(buckets[m], this));
      return pii(iterator(first, this), end());
    }
  }
  return pii(end(), end());
}

template <class V, class K, class HF, class Ex, class Eq, class A>
pair<typename hashtable<V, K, HF, Ex, Eq, A>::const_iterator,
     typename hashtable<V, K, HF, Ex, Eq, A>::const_iterator>
hashtable<V, K, HF, Ex, Eq, A>::equal_range(const key_type& key) const
{
  typedef pair<const_iterator, const_iterator> pii;
  const size_type n = bkt_num_key(key);

  for (const node* first = buckets[n] ; first; first = first->next) {
    if (equals(get_key(first->val), key)) {
      for (const node* cur = first->next; cur; cur = cur->next)
        if (!equals(get_key(cur->val), key))
          return pii(const_iterator(first, this),
                     const_iterator(cur, this));
      for (size_type m = n + 1; m < buckets.size(); ++m)
        if (buckets[m])
          return pii(const_iterator(first, this),
                     const_iterator(buckets[m], this));
      return pii(const_iterator(first, this), end());
    }
  }
  return pii(end(), end());
}

template <class V, class K, class HF, class Ex, class Eq, class A>
typename hashtable<V, K, HF, Ex, Eq, A>::size_type
hashtable<V, K, HF, Ex, Eq, A>::erase(const key_type& key)
{
  const size_type n = bkt_num_key(key);
  node* first = buckets[n];
  size_type erased = 0;

  if (first) {
    node* cur = first;
    node* next = cur->next;
    while (next) {
      if (equals(get_key(next->val), key)) {
        cur->next = next->next;
        delete_node(next);
        next = cur->next;
        ++erased;
        --num_elements;
      }
      else {
        cur = next;
        next = cur->next;
      }
    }
    if (equals(get_key(first->val), key)) {
      buckets[n] = first->next;
      delete_node(first);
      ++erased;
      --num_elements;
    }
  }
  return erased;
}

template <class V, class K, class HF, class Ex, class Eq, class A>
void hashtable<V, K, HF, Ex, Eq, A>::erase(const iterator& it)
{
  if (node* const p = it.cur) {
    const size_type n = bkt_num(p->val);
    node* cur = buckets[n];

    if (cur == p) {
      buckets[n] = cur->next;
      delete_node(cur);
      --num_elements;
    }
    else {
      node* next = cur->next;
      while (next) {
        if (next == p) {
          cur->next = next->next;
          delete_node(next);
          --num_elements;
          break;
        }
        else {
          cur = next;
          next = cur->next;
        }
      }
    }
  }
}

template <class V, class K, class HF, class Ex, class Eq, class A>
void hashtable<V, K, HF, Ex, Eq, A>::erase(iterator first, iterator last)
{
  size_type f_bucket = first.cur ? bkt_num(first.cur->val) : buckets.size();
  size_type l_bucket = last.cur ? bkt_num(last.cur->val) : buckets.size();

  if (first.cur == last.cur)
    return;
  else if (f_bucket == l_bucket)
    erase_bucket(f_bucket, first.cur, last.cur);
  else {
    erase_bucket(f_bucket, first.cur, 0);
    for (size_type n = f_bucket + 1; n < l_bucket; ++n)
      erase_bucket(n, 0);
    if (l_bucket != buckets.size())
      erase_bucket(l_bucket, last.cur);
  }
}

template <class V, class K, class HF, class Ex, class Eq, class A>
inline void
hashtable<V, K, HF, Ex, Eq, A>::erase(const_iterator first,
                                      const_iterator last)
{
  erase(iterator(const_cast<node*>(first.cur),
                 const_cast<hashtable*>(first.ht)),
        iterator(const_cast<node*>(last.cur),
                 const_cast<hashtable*>(last.ht)));
}

template <class V, class K, class HF, class Ex, class Eq, class A>
inline void
hashtable<V, K, HF, Ex, Eq, A>::erase(const const_iterator& it)
{
  erase(iterator(const_cast<node*>(it.cur),
                 const_cast<hashtable*>(it.ht)));
}

//判断是否需要重建表,如需要就扩充
//为什么要 resize 呢? 底层的 vector 不是会动态增加大小吗?
// --> ??因为表太大了, 装载率太大,降低了查找效率
template <class V, class K, class HF, class Ex, class Eq, class A>
void hashtable<V, K, HF, Ex, Eq, A>::resize(size_type num_elements_hint)
{
  const size_type old_n = buckets.size();
  //如果元素个数大于 bucket vector 的大小,就重建表
  if (num_elements_hint > old_n) {
    //新表的大小
    const size_type n = next_size(num_elements_hint);
    if (n > old_n) {
	  //临时的表,用来存放新建立的表,之后会和原来的表 swap。
      vector<node*, A> tmp(n, (node*) 0);
      __STL_TRY {
	    //遍历每一个旧 bucket
        for (size_type bucket = 0; bucket < old_n; ++bucket) {
          node* first = buckets[bucket];
		  //遍历bucket 里的每一个节点
          while (first) {
            size_type new_bucket = bkt_num(first->val, n); //当前节点应落在新 bucket vector 的哪一个 bucket 里
			// 下面四个操作当前节点链接到新 bucket 下的链表前面
            buckets[bucket] = first->next;
            first->next = tmp[new_bucket];
            tmp[new_bucket] = first;
            first = buckets[bucket];
          }
        }
        buckets.swap(tmp); //和原来的表交换,由编译器收回 tmp。 --> 为什么要 swap 。把 tmp 作为新的 bucket vector 不好吗?
							--> ??这样重建前指向旧表的迭代器现在不至于指向一个不存在的地址 --> 但指向的内容会发生改变吗?
      }
#         ifdef __STL_USE_EXCEPTIONS
      catch(...) {
        for (size_type bucket = 0; bucket < tmp.size(); ++bucket) {
          while (tmp[bucket]) {
            node* next = tmp[bucket]->next;
            delete_node(tmp[bucket]);
            tmp[bucket] = next;
          }
        }
        throw;
      }
#         endif /* __STL_USE_EXCEPTIONS */
    }
  }
}

template <class V, class K, class HF, class Ex, class Eq, class A>
void                  node* first, node* last)
{
  node* cur = buckets[n];
  if (cur == first)
    erase_bucket(n, last);
  else {
    node* next;
    for (next = cur->next; next != first; cur = next, next = cur->next)
      ;
    while (next) {
      cur->next = ne= cur->next;
      --num_elements;
    }
  }
}

template <class V, class K, class HF, class Ex, class Eq, class A>
void
hashtable<V, K, HF, Ex, Eq, A>::erase_bucket(const size_type n, node* last)
{
  node* cur = buckets[n];
  while (cur != last) {
    node* next = cur->next;
    delete_node(cur);
    cur = next;
    buckets[n] = cur;
    --num_elements;
  }
}

//整体删除
//hashtable 由两部分组成 bucket vector 和 每一个 bucket 维护的 链表
//释放空间是只需要释放动态生成的空间,即每一个 bucket 维护的链表
//bucket vector 的空间当它离开它的作用域时由编译器收回
template <class V, class K, class HF, class Ex, class Eq, class A>
void hashtable<V, K, HF, Ex, Eq, A>::clear()
{
   //遍历每一个 bucket
  for (size_type i = 0; i < buckets.size(); ++i) {
    node* cur = buckets[i];
	//将 bucket list 中的每一个节点删除掉
    while (cur != 0) {
      node* next = cur->next;
      delete_node(cur);
      cur = next;
    }
    buckets[i] = 0;
  }
  num_elements = 0;
}

//如果 ht 就是 this 呢? 这里怎么没有自我复制检查?
template <class V, class K, class HF, class Ex, class Eq, class A>
void hashtable<V, K, HF, Ex, Eq, A>::copy_from(const hashtable& ht)
{
  //先清除自己的 bucket vector ,这操作是调用 vector::clear。造成所有元素为 0
  buckets.clear();
  //如果自己的空间大于对方,就什么都不做,否则 reserve() 函数会把 vector 的空间变得和 ht.buckets.size() 一样大
  buckets.reserve(ht.buckets.size());
  //将每个 bucket 初始化为 NULL
  buckets.insert(buckets.end(), ht.buckets.size(), (node*) 0);
  __STL_TRY {
    遍历对方的每一个 bucket
    for(size_type i = 0; i < ht.buckets.size(); ++i){
      //如果 bucket 的链表不空,就复制它的每一个节点
	  if(const node* cur = ht.buckets[i]){
        node* copy = new_node(cur->val);
        buckets[i] = copy;

        for (node* next = cur->next; next; cur = next, next = cur->next) {
          copy->next = new_node(next->val);
          copy = copy->next;
        }
      }
    }
    num_elements = ht.num_elements;
  }
  __STL_UNWIND(clear());
}

__STL_END_NAMESPACE

#endif /* __SGI_STL_INTERNAL_HASHTABLE_H */

// Local Variables:
// mode:C++
// End:

STL源码剖析 容器 stl_hashtable.h

时间: 2024-11-10 14:35:46

STL源码剖析 容器 stl_hashtable.h的相关文章

《STL源码剖析》---stl_hashtable.h阅读笔记

在前面介绍的RB-tree红黑树中,可以看出红黑树的插入.查找.删除的平均时间复杂度为O(nlogn).但这是基于一个假设:输入数据具有随机性.而哈希表/散列表hash table在插入.删除.查找上具有"平均常数时间复杂度"O(1):且不依赖输入数据的随机性. hash table的实现有线性探测.二次探测.二次散列等实现,SGI的STL是采用开链法(separate chaining)来实现的.大概原理就是在hash table的每一项都是个指针(指向一个链表),叫做bucket.

STL源码剖析 容器 stl_vector.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie vector ---------------------------------------------------------------------- 描述: 1.迭代器 vector 维护的是一个连续线性空间,它的迭代器是普通指针, 能满足 RandomAccessIterator 所有必要条件:operator*, operator->,operator++,operator--,

STL源码剖析 容器 stl_set.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie set ------------------------------------------------------------------------ 所有元素都会根据元素的键值自动被排序. 不可以通过 set 的迭代器改变 set 的元素值.因为 set 元素值就是其键值,关系到 set 元素的排列规则. set<T>::iterator 被定义为底层 RB-tree 的 const

STL源码剖析 容器 stl_tree.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie RB-tree(红黑树) -------------------------------------------------------------------------- 平衡二叉搜索树 --> 平衡可提高搜索效率 常见的平衡二叉搜索树有: AVL-tree(任何节点的左右子树高度相差最多 1).红黑树.AA-tree AVL-tree 破坏平衡的情况及恢复平衡的方法 恢复时要先找到失

STL源码剖析 容器 stl_map.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie map -------------------------------------------------------------------------------- 所有元素都会根据元素的键值自动被排序. map的所有元素都是 pair,同时拥有实值和键值. 不可以修改元素的键值,因为它关系到 map 元素的排列规则 可以修改元素的实值,因为它不影响 map 的排列规则 map ite

STL源码剖析 容器 stl_list.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie list ---------------------------------------------------------------------- ??为什么很多在算法库里有的算法还要在类的成员函数里重新实现一遍? -->1.因为算法库里的是通用的,对于具体的类来说效率不高. 比如说 reverse 如果直接用 stl_algo.h 里的 reverse,会再调用 iter_swap,

STL源码剖析 容器 stl_deque.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie deque ------------------------------------------------------------------------ ??一直看不懂 operator->() ,不明白它为什么不用接受参数,直接 return &(operator*()) 好像我们用迭代器的时候也不没怎么用到这个函数,甚至我都不会用 1.概述 vector 是单向开口的连续线性空

STL源码剖析 容器 stl_stack.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie stack ------------------------------------------------------------- stack 是一种配接器(adapter),以某种容器作为底部结构,改变其接口,使之符合"先进后出"的特性. SGI STL 默认以 deque 为 stack 底部结构 没有遍历行为,没有遍历器 示例: #include <stack&g

STL源码剖析 容器 stl_queue.h

本文为senlie原创,转载请保留此地址:http://blog.csdn.net/zhengsenlie queue ---------------------------------------------------------------------- stack 是一种配接器(adapter),以某种容器作为底部结构,改变其接口,使之符合"先进先出"的特性. SGI STL 默认以 deque 为 stack 底部结构 没有遍历行为,没有遍历器 示例: #include &l