基于二叉树和双向链表实现限制长度的最优Huffman编码

该代码采用二叉树结合双向链表实现了限制长度的最优Huffman编码,本文代码中的权重全部采用整数值表示。http://pan.baidu.com/s/1mgHn8lq

算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf

示例:符号ABCDE的权重分别为10,6,2,1,1

   不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;

   限制长度3的最优Huffman编码为  A:0,B:100,C:101,D:110,E:111,  平均码长为2.0bits/symbol;

限制长度最优Huffman编码实现代码如下:

//Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs
//author:by Pan Yumin.2014-06-18
//with the method of BinaryTree and linked-list
#include <stdio.h>
#include <memory.h>
#include <malloc.h>

#define  MaxSymbols 256	//the Maximum Number of Symbols
#define  MaxHuffLen	16	//the Limited Length

typedef unsigned char boolean;
#ifndef FALSE			//in case these macros already exist
#define FALSE	0		//values of boolean
#endif

#ifndef TRUE
#define TRUE	1
#endif

typedef struct __Node{
	int width;
	int weight;
	int index;
	int depth;

	struct __Node *prev;	//double linked list
	struct __Node *next;	//double linked list
	struct __Node *left;	//left child
	struct __Node *right;	//right child
}Node;

typedef struct __HuffTable{
	unsigned int index;
	unsigned int len;
	unsigned int code;
}HuffTable;

//Test memory leak
/*int g_malloc = 0,g_free = 0;

void* my_malloc(int size){
	g_malloc++;
	return malloc(size);
}
void my_free(void *ptr){
	if(ptr){
		g_free++;
		free(ptr);
		ptr = NULL;
	}
}
#define malloc my_malloc
#define free my_free*/

//Get the smallest term in the diadic expansion of X
int GetSmallestTerm(int X)
{
	int N=0;
	while((X & 0x01) == 0){
		X >>= 1;
		N++;
	}
	return 1<<N;
}
void deleteNode(Node *head,unsigned char *Flag,int Symbols,boolean isDelete)
{
	if(head->left == NULL && head->right == NULL){
		if(isDelete)
			Flag[head->depth*Symbols+head->index] = 0;
		else
			Flag[head->depth*Symbols+head->index] = 1;
	}
	if(head->left){
		deleteNode(head->left,Flag,Symbols,isDelete);
	}
	if(head->right){
		deleteNode(head->right,Flag,Symbols,isDelete);
	}
	free(head);	head = NULL;
}

//N:the Num of node
void Package_Merge(Node *head,Node **tail,int minWidth,unsigned char * Flag,int Symbols)
{
	Node *tmp = NULL,*node_1 = NULL,*node_2 = NULL;
	Node *node_P_head = NULL,*node_P_tail = NULL;		//node_P_tail not store data,node_P_head store data
	Node *node_head = head;								//the head of 2*minWidth
	//package
	node_P_tail = (Node *)malloc(sizeof(Node));
	memset(node_P_tail,0,sizeof(Node));

	node_2 = node_P_tail;	node_1 = (*tail)->prev;
	for(;node_1 != NULL && node_1 != head; node_1=(*tail)->prev){
		if(node_1->width == minWidth){
			tmp = (Node*)malloc(sizeof(Node));
			tmp->right = node_1->next;				//insert from right to left,so the weight from small to large
			tmp->left = node_1;
			tmp->width = 2*minWidth;
			tmp->weight = node_1->weight+node_1->next->weight;
			tmp->next = node_2;
			tmp->prev = NULL;

			node_2->prev = tmp;
			node_2 = tmp;
			*tail = node_1->prev;	(*tail)->next = NULL;		//two intervals
		}else{
			break;
		}
	}
	node_P_head = node_2;

	if(*tail != head && (*tail)->width == minWidth){	//if the number of minwidth is odd,delete the max weight item of minwidth
		*tail = (*tail)->prev;
		deleteNode((*tail)->next,Flag,Symbols,TRUE);
		(*tail)->next = NULL;
	}

	//find the range of 2*minWidth
	node_1 = *tail;
	for(;node_1 != head && node_1->width == 2*minWidth;node_1 = node_1->prev){
	}
	node_head = node_1;		//the head of 2*minWidth, node_head not store 2*minWidth

	//merge
	node_1 = node_head->next;	node_2 = node_P_head;
	for(;node_1 != NULL && node_2 != node_P_tail;){
		if(node_1->weight >= node_2->weight){
			node_1 = node_1->next;
		}else{		//insert to the major list
			node_1->prev->next = node_2;
			node_2->prev = node_1->prev;
			node_1->prev = node_2;

			node_2 = node_2->next;
			node_2->prev->next = node_1;

			node_2->prev = NULL;
		}
	}
	if(node_1 == NULL){		//insert list 2 to the major list
		(*tail)->next = node_2;
		node_2->prev = *tail;
		*tail = node_P_tail->prev;
		(*tail)->next = NULL;
		free(node_P_tail);	node_P_tail = NULL;
	}else{
		free(node_P_tail);	node_P_tail = NULL;
	}
}

//N:the Num of node
int LengthLimitedHuffmanCode(Node *head,Node *tail,int X,unsigned char * Flag,int Symbols)
{
	int minwidth,r;

	while(X>0){
		minwidth = GetSmallestTerm(X);
		if( head->next == NULL)		//I empty
			return -1;
		r = tail->width;				//Just for Huffman Code,else r = GetMinWidth(head);
		if(r>minwidth){
			return -2;
		}else if(r == minwidth){
			tail = tail->prev;
			deleteNode(tail->next,Flag,Symbols,FALSE);
			tail->next = NULL;
			X = X-minwidth;
		}else{
			Package_Merge(head,&tail,r,Flag,Symbols);
		}
	}

	return 0;
}
void PrintHuffCode(HuffTable Huffcode)
{
	int i;
	for(i=Huffcode.len-1;i>=0;i--){
		printf("%d",(Huffcode.code>>i) & 0x01);
	}
}
void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex)
{
	char Code[17];
	int Pre_L = 0;
	int i=0,j=0;
	unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0};	//rank: the number of symbols in every length
	//find the first code
	for(i=0;i<Symbols;i++){
		for(j=0;j<L;j++){
			HuffCode[i].len += Flag[j*Symbols+i];
		}
		if(HuffCode[i].len != 0)
			rank[HuffCode[i].len]++;
		HuffCode[i].index = SortIndex[i];
	}

	for(i=0;i<=L;i++){
		codes[i+1] = (codes[i]+rank[i])<<1;
		rank[i] = 0;
	}

	//code
	for(i=0;i<Symbols;i++){
		HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++;
	}
}
float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum)
{
	float bitspersymbol = 0.0;
	int i;
	for(i=0;i<Symbols;i++){
		bitspersymbol += (float)HuffCode[i].len*weight[i];
	}
	return bitspersymbol/WeightSum;
}

void FreqSort(int *Freq,int *SortIndex,int Symbols)
{
	int i,j,tmp;
	for(i=0;i<Symbols;i++){
		for(j=i+1;j<Symbols;j++){
			if(Freq[i]<Freq[j]){
				tmp = Freq[i];
				Freq[i] = Freq[j];
				Freq[j] = tmp;

				tmp = SortIndex[i];
				SortIndex[i] = SortIndex[j];
				SortIndex[j] = tmp;
			}
		}
	}
}

int GenLenLimitedOptHuffCode(int *Freq,int Symbols)
{
	int i,j;
	unsigned char *Flag = NULL;	//record the state of the node
	unsigned int rank[MaxHuffLen];
	Node *node = NULL,*head = NULL,*tail = NULL,*tmp = NULL;	//head not store data,just a head,tail store data
	int Ret = 0;
	HuffTable HuffCode[MaxSymbols];
	float bitspersymbols = 0.0;
	int WeightSum = 0;
	int SortIndex[MaxSymbols];

	if(Symbols > (1<<MaxHuffLen)){
		printf("Symbols > (1<<MaxHuffLen)\n");
		return -1;
	}

	for(i=0;i<MaxSymbols;i++){
		SortIndex[i] = i;
	}
	FreqSort(Freq,SortIndex,Symbols);		//sort

	for(i=0;i<Symbols;i++){
		WeightSum += Freq[i];
	}

	head = (Node*)malloc(sizeof(Node));
	memset(head,0,sizeof(Node));
	Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char));
	memset(Flag,1,MaxHuffLen*Symbols*sizeof(unsigned char));

	memset(HuffCode,0,sizeof(HuffCode));
	node = head;

	for(i=0;i<MaxHuffLen;i++){
		for(j=0;j<Symbols;j++){
			tmp = (Node*)malloc(sizeof(Node));
			tmp->prev = node;							tmp->next = NULL;
			tmp->left = NULL;							tmp->right = NULL;
			tmp->width = 1<<(MaxHuffLen-i-1);
			tmp->weight = Freq[j];
			tmp->index = j;								tmp->depth = i;
			node->next = tmp;
			node = tmp;
		}
	}
	tail = node;	//tail
	Ret = LengthLimitedHuffmanCode(head,tail,(Symbols-1)<<MaxHuffLen,Flag,Symbols);

	GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex);

	//print HuffCode
	for(i=0;i<Symbols;i++){
		printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]);
		PrintHuffCode(HuffCode[i]);
		printf("\tCodeLen:%02d",HuffCode[i].len);
		printf("\n");
	}
	bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum);
	printf("average code length:%f bits/symbol.\n",bitspersymbols);

	free(head);	head = NULL;
	free(Flag);	Flag = NULL;

	return Ret;
}
#include <time.h>
int main()
{
	//int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578};	//weight is not zero.
 	int Freq[MaxSymbols] = {10,6,2,1,1};	//weight is not zero.
 	GenLenLimitedOptHuffCode(Freq,5);
 	return 0;
}

运行上述程序输出结果如下所示:


基于二叉树和双向链表实现限制长度的最优Huffman编码

时间: 2024-10-24 07:09:26

基于二叉树和双向链表实现限制长度的最优Huffman编码的相关文章

基于二叉树和数组实现限制长度的最优Huffman编码

具体介绍详见上篇博客:基于二叉树和双向链表实现限制长度的最优Huffman编码 基于数组和基于链表的实现方式在效率上有明显区别: 编码256个符号,符号权重为1...256,限制长度为16,循环编码1w次,Release模式下.基于链表的耗时为8972ms,基于数组的耗时为1793ms,速度是链表实现方式的5倍. 详细代码例如以下: //Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://p

算法题——二叉树转换为双向链表

1 BSTreeNode* ConvertNode(BSTreeNode* pNode, bool asRight) 2 { 3 if(!pNode) 4 return NULL; 5 6 BSTreeNode *pLeft = NULL; 7 BSTreeNode *pRight = NULL; 8 9 // Convert the left sub-tree 10 if(pNode->m_pLeft) 11 pLeft = ConvertNode(pNode->m_pLeft, false

Jcompress: 一款基于huffman编码和最小堆的压缩、解压缩小程序

前言 最近基于huffman编码和最小堆排序算法实现了一个压缩.解压缩的小程序.其源代码已经上传到github上面: Jcompress下载地址 .在本人的github上面有一个叫Utility的repository,该分类下面有一个名为Jcompress的目录便是本文所述的压缩.解压缩小程序的源代码.后续会在Utility下面增加其他一些实用的小程序,比如基于socket的文件断点下载小程序等等.如果你读了此文觉得还不错,不防给笔者的github点个star, 哈哈.在正式介绍Jcompres

基于Huffman编码的压缩软件的Python实现

哈夫曼编码是利用贪心算法进行文本压缩的算法,其算法思想是首先统计文件中各字符出现的次数,保存到数组中,然后将各字符按照次数升序排序,挑选次数最小的两个元素进行连结形成子树,子树的次数等于两节点的次数之和,接着把两个元素从数组删除,将子树放入数组,重新排序,重复以上步骤.为了解压,在压缩时首先往文件中填入huffman编码的映射表的长度,该表的序列化字符串,编码字符串分组后最后一组的长度(编码后字符串长度模上分组长度),最后再填充编码后的字符串.本算法中以一个字节,8位作为分组长度,将编码后二进制

二叉树转双向链表

二叉排序树在不改变BinaryNode<Type>的 struct{data;*left;*right}  情况下是可以转换成双向链表的. 由于二叉树的主要数据都记录在根节点BinaryNode<Type>* root 上的,所以这里就只用BinaryNode类手动建一棵树, 树形如下: 新增的有三个主要函数, BinaryNode<Type>* GetHead() 是用来查找右子树最小的那个节点 BinaryNode<Type>* GetTail() 是用

算法题:用二叉树构造双向链表

#include <iostream> #include <string.h> using namespace std; struct Node { Node *left;//相当于双向链表的prev指针. Node *right;//相当于双向链表的next指针. char data; Node(char d = char()):data(d),left(NULL),right(NULL){} }; class MTree { public: MTree():root(NULL)

二叉树与双向链表的转换

题目描述 输入一棵二叉搜索树,将该二叉搜索树转换成一个排序的双向链表.要求不能创建任何新的结点,只能调整树中结点指针的指向. 分析:二叉搜索树要转成有序的链表,可以想到的是利用中序遍历二叉树,每得到一个输出结点就修改其指针指向,从而构成有序链表. 题解: /* struct TreeNode { int val; struct TreeNode *left; struct TreeNode *right; TreeNode(int x) : val(x), left(NULL), right(N

基于二叉树的优先队列

简介 优先队列:指队列中的元素都被指派一个优先级,元素按优先级最大(最小)出队,存储堆的数组的第一个元素就是最大的(或最小的).所以用堆作为优先队列的元素载体是合适的. 队列有两个基本操作:1.入队2.出队. 队列的特点是先进先出.通常都把队列比喻成排队买东西,大家都很守秩序,先排队的人就先买东西.但是优先队列有所不同,它不遵循先进先出的规则,而是根据队列中元素的优先权,优先权最大的先被取出.通常把优先队列比喻成现实生活中的打印.一个打印店里有很多打印机,每台机器的性能不一样,有的打印机打印很快

将一棵二叉树转换为双向链表的俩中算法

要求:输入一棵二叉排序树,将该二叉搜索树转换成一个排序的双向链表.要求不能创建新的结点,只能调整树中结点的指针的指向.如下图: 方法一:我们借助一个容器来顺序存储结点的指针,然后改变指针的指向. 1 //////////////////////二叉搜索树与双向链表(方法一)////////////////////////////////////////// 2 3 void Convertfirst(BinaryTreeNode* pRoot , vector<BinaryTreeNode*>