跳表的C语言实现，不同于redis版本

本来跳表的原理是很简单的（相对于红黑树），但是国庆节断断续续搞了5天才把它写完……

写之前我了解到的跳表都是纯粹基于链式结构的，写的过程中看了一下redis的实现，发现它的每一个键列都是用数组来表示的。仔细想了想发现这种实现除了跳表的最大层数会被固定（因为是用的数组）之外，在性能、代码简洁性方面都是非常好的。而且实际使用中，可能也并不希望跳表的层数毫无限制地增长。

不过最后我自己的实现还是按照纯粹链式结构实现，因为数组的方式redis已经实现过了。

关于跳表原理网上很多，这里不再赘述，代码疏漏之处恳请指出。上一张图表示我代码中的跳表逻辑结构：

跳表API定义——skip_list.h

#ifndef SKIP_LIST_H_INCLUDED
#define SKIP_LIST_H_INCLUDED

typedef struct skip_list_s *skip_list_t;

/**
 * @return	新建的的空跳表实例
 */
skip_list_t
skip_list_create();

/**
 * 销毁跳表实例，不会销毁跳表中包含的值。
 */
void
skip_list_destroy(skip_list_t sl);

/**
 * 查询跳表中key对应的值。
 * 返回NULL不代表跳表中一定不包含key，以skip_list_contains(sl, key)结果为准。
 * @param	key		要查询的键，允许key在跳表中不存在。
 * @return	跳表中key对应的值
 */
void*
skip_list_get(skip_list_t sl, int key);

/**
 * 向跳表中添加一个键值对，这将使得skip_list_contains(sl, key)==1。
 * 如果跳表中已经存在相同的键，则替换其旧值，否则创建一个新的键值对。
 * @param	value	key对应的新的值，允许为NULL。
 * @return	跳表中key原来对应的值
 */
void*
skip_list_put(skip_list_t sl, int key, void *value);

/**
 * 从跳表中删除一个键值对，这将使得skip_list_contains(sl, key)==0。
 * @param	key		要删除的键，允许key在跳表中不存在。
 * @return	跳表中key对应的值
 */
void*
skip_list_remove(skip_list_t sl, int key);

/**
 * @return	跳表中存在key则1，否则0
 */
int
skip_list_contains(skip_list_t sl, int key);

/**
 * @return	跳表中键的数量
 */
int
skip_list_count(skip_list_t sl);

/**
 * 检索跳表中键的集合，结果按照键升序排列
 * @param	[out] keys		用于存储键集合
 * @param	[int] length	keys数组的长度
 * @return	键的数量（=MIN(length, 跳表中所有键的数量)）
 */
int
skip_list_key_set(skip_list_t sl, int keys[], int length);

/**
 * 检索跳表中值的集合，结果按照键升序排列
 * @param	[out] values	用于存储值集合
 * @param	[int] length	values数组的长度
 * @return	值的数量（=MIN(length, 跳表中所有键的数量)）
 */
int
skip_list_value_set(skip_list_t sl, void *values[], int length);

#endif // SKIP_LIST_H_INCLUDED

跳表API测试——main.c

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "skip_list.h"

#define COUNT	10

int main() {
	skip_list_t sl;
	int i, tmp, *keys;

	keys = (int*)malloc(COUNT*sizeof(int));

	srand(time(NULL));

	sl = skip_list_create();

	for(i=0; i<COUNT; i++) {
		keys[i] = rand();
		tmp = rand();
		printf("put %5d : %5d, return %5d", keys[i], tmp, (int)skip_list_put(sl, keys[i], (void*)tmp));
		printf(", count=%d\n", skip_list_count(sl));
	}

	puts("*****************************************");

	for(i=0; i<COUNT; i++) {
		printf("put %5d : %5d, return %d\n", keys[i], keys[i], (int)skip_list_put(sl, keys[i], (void*)keys[i]));
	}

	puts("*****************************************");

	skip_list_key_set(sl, keys, COUNT);
	printf("key set : ");
	for(i=0; i<COUNT-1; i++) {
		printf("%d, ", keys[i]);
	}
	printf("%d\n", keys[COUNT-1]);

	puts("*****************************************");

	for(i=0; i<COUNT; i++) {
		printf("get %5d, return %d\n", keys[i], (int)skip_list_get(sl, keys[i]));
	}

	puts("*****************************************");

	for(i=0; i<COUNT; i++) {
		printf("constains %5d, return %d\n", keys[i], skip_list_contains(sl, keys[i]));
	}

	puts("*****************************************");

	for(i=0; i<COUNT; i++) {
		printf("remove %5d, return %5d", keys[i], (int)skip_list_remove(sl, keys[i]));
		printf(", count=%d\n", skip_list_count(sl));
	}

	puts("*****************************************");

	for(i=0; i<COUNT; i++) {
		printf("constains %5d, %d\n", keys[i], skip_list_contains(sl, keys[i]));
	}

	skip_list_destroy(sl);

	free(keys);

	return 0;
}

跳表API实现——skip_list.c

#include "skip_list.h"
#include <stdlib.h>

typedef struct data_s *data_t;

typedef struct node_s *node_t;

//表示节点中存储的键值对
struct data_s {
	int key;
	void *value;
};

//表示跳表中的节点
struct node_s {
	node_t right;
	node_t down;
	data_t data;	//注意同一列的所有节点都指向同一个data
};

//按照二叉查找树的概率分布随机生成一个节点高度
static inline int
rand_level() {
	int level = 1;
	while(rand()&1) {
		level++;
	}
	return level;
}

//从node右边开始逐层向下查找key对应的键值对
//在某一层找到以后立即返回，以提高查找速度
//node不能为NULL
static inline data_t
search_data(node_t node, int key) {
	for(; node; node = node->down) {
		for(; node->right && key > node->right->data->key; node = node->right);
		//此时node->data->key < key <= node->right->data->key
		if(node->right && key == node->right->data->key) {
			return node->right->data;
		}
	}
	return NULL;
}

//从node右边开始逐层向下查找key对应的键值对，并将垂直路径记录在upadte数组中
//必须走到最底层以后才返回，以便记录完整的update路径
//node和update不能为NULL
static inline data_t
search_data_update(node_t node, int key, node_t *update) {
	for(;; node = node->down) {
		for(; node->right && key > node->right->data->key; node = node->right);
		//node->data->key < key <= node->right->data->key
		//保证当前node一定在目标key的左边，以便remove时更新
		*update++ = node;
		if(!node->down) {
			break;
		}
	}
	if(node->right && key == node->right->data->key) {
		return node->right->data;
	}
	return NULL;
}

//在跳表最顶层上面增加一些空层
//top_left不能为NULL，性能可以改进
static inline int
gain_empty_top_lines(node_t top_left, int count) {
	int i;
	for(i = 0; i < count; i++) {
		node_t tmp;
		tmp = (node_t)malloc(sizeof(struct node_s));
		tmp->right = top_left->right;
		tmp->down = top_left->down;
		top_left->right = NULL;
		top_left->down = tmp;
	}
	return i;
}

//清除跳表最顶层的几个空层
//top_left不能为NULL，性能可以改进
static inline int
clean_empty_top_lines(node_t top_left) {
	int count;
	for(count = 0; !top_left->right; count++) {
		node_t tmp = top_left->down;
		if(!tmp) {
			break;
		}
		top_left->right = tmp->right;
		top_left->down = tmp->down;
		free(tmp);
	}
	return count;
}

//在跳表中为新的键值对增加一列位置
//data和update不能为NULL
static inline void
add_key_column(data_t data, node_t *update, int length) {
	int i;
	for(i=0; i<length; i++) {
		node_t tmp;
		tmp = (node_t)malloc(sizeof(struct node_s));
		tmp->data = data;
		tmp->right = update[i]->right;
		update[i]->right = tmp;
	}
	for(i=0; i<length-1; i++) {
		update[i]->right->down = update[i+1]->right;
	}
	update[length-1]->right->down = NULL;
}

//在跳表中删除key所在的列
//update不能为NULL
static inline void
remove_key_column(int key, node_t *update, int length) {
	int i;
	for(i = 0; i < length; i++) {
		node_t right = update[i]->right;
		if(right && right->data->key == key) {
			update[i]->right = right->right;
			free(right);
		}
	}
}

//释放节点并返回它的下一个（右边或下边）节点
static inline node_t
free_and_next(node_t node, node_t next) {
	free(node);
	return next;
}

struct skip_list_s {
	struct node_s top_left;	//跳表左上角的节点
	int level;	//跳表层数
	int count;	//跳表中键值对的数量
};

skip_list_t
skip_list_create() {
	skip_list_t sl;
	sl = (skip_list_t)malloc(sizeof(struct skip_list_s));
	sl->top_left.right = NULL;
	sl->top_left.down = NULL;
	sl->level = 1;
	sl->count = 0;
	return sl;
}

void
skip_list_destroy(skip_list_t sl) {
	node_t left, node;
	for(left = &sl->top_left; left->down; left = left->down) {
		for(node = left->right; node; node = free_and_next(node, node->right));
	}
	for(node = left->right; node; node = free_and_next(node, node->right));
	for(left = sl->top_left.down; left; left = free_and_next(left, left->down));
	free(sl);
}

void*
skip_list_get(skip_list_t sl, int key) {
	data_t data;
	data = search_data(&sl->top_left, key);
	if(data) {
		return data->value;
	}
	return NULL;
}

void*
skip_list_put(skip_list_t sl, int key, void *value) {
	void *old_value = NULL;
	data_t data;
	data = search_data(&sl->top_left, key);
	if(data) {
		old_value = data->value;
		data->value = value;
	} else {
		node_t *update;
		int target_level;
		target_level = rand_level();
		if(target_level > sl->level) {
			sl->level += gain_empty_top_lines(&sl->top_left, target_level-sl->level);
		}
		update = (node_t*)malloc(sizeof(node_t)*sl->level);
		search_data_update(&sl->top_left, key, update);
		data = (data_t)malloc(sizeof(struct data_s));
		data->key = key;
		data->value = value;
		//target_level<=sl->level
		add_key_column(data, update+(sl->level-target_level), target_level);
		free(update);
		sl->count++;
	}
	return old_value;
}

void*
skip_list_remove(skip_list_t sl, int key) {
	void *old_value = NULL;
	node_t *update;
	data_t data;
	update = (node_t*)malloc(sizeof(node_t)*sl->level);
	data = search_data_update(&sl->top_left, key, update);
	if(data) {
		//删除key所在列
		remove_key_column(key, update, sl->level);
		//清除掉删除key所在列以后上面出现的空行
		sl->level -= clean_empty_top_lines(&sl->top_left);
		old_value = data->value;
		free(data);
		sl->count--;
	}
	free(update);
	return old_value;
}

int
skip_list_contains(skip_list_t sl, int key) {
	return !!search_data(&sl->top_left, key);
}

int
skip_list_count(skip_list_t sl) {
	return sl->count;
}

int
skip_list_key_set(skip_list_t sl, int keys[], int length) {
	int i;
	node_t left, node;
	for(left = &sl->top_left; left->down; left = left->down);
	for(i = 0, node = left->right; i<length && node; i++, node = node->right) {
		keys[i] = node->data->key;
	}
	return i;
}

int
skip_list_value_set(skip_list_t sl, void *values[], int length) {
	int i;
	node_t left, node;
	for(left = &sl->top_left; left->down; left = left->down);
	for(i = 0, node = left->right; i<length && node; i++, node = node->right) {
		values[i] = node->data->value;
	}
	return i;
}

时间： 2024-10-18 01:11:22

跳表的C语言实现，不同于redis版本的相关文章

Go语言实现跳表(SkipList)

跳表(skiplist)在redis/levelDB中属于核心数据结构,我简单粗暴的用Golang实现了下. 就我的简单理解来说,就一个普通的链表,在insert时,通过Random_level(),把一层变成很多层, 越上数据越小,跨度越大. 查找时从上往下找,用空间换时间. 记下测试代码: package main import ( "fmt" //"github.com/xclpkg/algorithm" "math/rand" ) fun

C语言跳表(skiplist)实现

一.简介跳表(skiplist)是一个非常优秀的数据结构,实现简单,插入.删除.查找的复杂度均为O(logN).LevelDB的核心数据结构是用跳表实现的,redis的sorted set数据结构也是有跳表实现的.代码在这里:http://flyingsnail.blog.51cto.com/5341669/1020034 二.跳表图解考虑一个有序表: 从该有序表中搜索元素 < 23, 43, 59 > ,需要比较的次数分别为 < 2, 4, 6 >,总共比较的次数为 2 +

跳表SkipList

原文:http://www.cnblogs.com/xuqiang/archive/2011/05/22/2053516.html 跳表SkipList 1.聊一聊跳表作者的其人其事 2. 言归正传,跳表简介 3. 跳表数据存储模型 4. 跳表的代码实现分析 5. 论文,代码下载及参考资料 <1>. 聊一聊作者的其人其事跳表是由William Pugh发明.他在 Communications of the ACM June 1990, 33(6) 668-676 发表了Skip lists

跳表SkipList—定义

1.聊一聊跳表作者的其人其事 2. 言归正传,跳表简介 3. 跳表数据存储模型 4. 跳表的代码实现分析 5. 论文,代码下载及参考资料 <1>. 聊一聊作者的其人其事跳表是由William Pugh发明.他在 Communications of the ACM June 1990, 33(6) 668-676 发表了Skip lists: a probabilistic alternative to balanced trees,在该论文中详细解释了跳表的数据结构和插入删除操作. Will

稀疏矩阵的三元组顺序表的C语言实现

对于没有排序功能的集合来说,都可以使用java.util.Collections.sort()方法进行排序,它除了集合对象以外,还需要提供一个比较器.如果列表中的元素全部都是相同的类型,并且这个类实现了Comparable接口,就可以简单的调用Collections.sort()方法,如果这个类没有实现comparable接口,那么可以创建一个比较器传递一个Comparator实例作为Sort()的第二个参数进行排序,另外,如果不想使用默认的分类顺序进行排序,同样也可以传递一个Comparato

【搜索引擎（二）】索引、倒排索引、哈希表、跳表

索引其实在计算机中我们早已接触过跟索引有关的东西,比如数据库里的索引(index),还有硬盘文件系统中其实也有类似的东西,简而言之,索引是一种为了方便找到自己需要的东西而设计出来的条目,你可以通过找索引找到自己想要内容的位置.索引过程是: 关键字->索引->文档.在图书馆内的书分门别类,就是一种按类别来分的索引.当然索引还有很多其他的实现. 仅仅有索引的概念是不够的.虽然分门别类是一种方法,但是我们在拥有一堆文档的时候必须要有从文档到索引的规范过程,并且索引的结构要满足能够让人(或者计算机)

数据结构：跳表

1.理想情况在一个使用有序链表描述的具有n个元素的字典中进行搜索,至多需要n次比较.如果在链中部节点加一个指针,则比较次数可以减少到n/2+1.搜索时,首先将要搜索的元素与中间节点进行比较,如果该元素较小,则仅需搜索链表的左半部分.否则,只需搜索又半部分. 以上图为例,如果要搜索的数为26,则将26先与40比较,因为26<40,因此只需要搜索40的左边元素. 而如果在左半部分和右半部分再增加一个中间指针,则可以进一步减小搜索范围(b). 初始的链称为0级链,如上图中的全部节点. 至少指向2个节

SkipList 跳表

为什么选择跳表目前经常使用的平衡数据结构有:B树,红黑树,AVL树,Splay Tree, Treep等. 想象一下,给你一张草稿纸,一只笔,一个编辑器,你能立即实现一颗红黑树,或者AVL树出来吗? 很难吧,这需要时间,要考虑很多细节,要参考一堆算法与数据结构之类的树, 还要参考网上的代码,相当麻烦. 用跳表吧,跳表是一种随机化的数据结构,目前开源软件 Redis 和 LevelDB 都有用到它, 它的效率和红黑树以及 AVL 树不相上下,但跳表的原理相当简单,只要你能熟练操作链表, 就能轻

倒排索引优化 - 跳表

在前面一篇介绍倒排索引的文章中我们知道, 两个关键字的合并操作的时候复杂度是 θ(N), 如果在合并操作时遇到最极端的情况, 所扫描和比较的次数是两个列表集合的所有元素个数之和, 即是线性增长的, 这在数据量特别大的时候是很低效的. 我们还是看一下两个集合的合并操作代码示例: a = [1, 2, 3, 6, 9, 11, 45, 67] b = [4, 6, 13, 45, 69, 98] i = j = 0 result = [] while i < len(a) and j < le