FP_Growth算法原理及实现

***********************************************声明******************************************************

原创作品,出自 “晓风残月xj” 博客,欢迎转载,转载时请务必注明出处(http://blog.csdn.net/xiaofengcanyuexj)。

由于各种原因,可能存在诸多不足,欢迎斧正!

*********************************************************************************************************

前面提到关联规则寻找频繁项集的Apriori算法,Apriori算法是挖掘布尔型关联规则频繁项集的最为经典、最为基本的算法,但是该算法需要不断寻找候选集,然后剪枝即去掉包含非频繁子集的候选集 ,效率不是很高,时间复杂度由暴力枚举所有子集的指数级别O(n^2) 降为多项式级别,多项式具体系数是底层实现情况而定
。Apriori算法的主要瓶颈在于不断寻找候选项集,可不可以找到一种不用频繁寻找候选项集的算法呢?而且当待挖掘的数据很大进而需要存储在数据库中时,Apriori算法还有一个无可回避的问题就是每次都要扫描数据库,涉及大量I/O操作,比较耗时(当然可以不用数据库)。

FP_Gwoth算法是一种不生成候选集从而寻找频繁项集的算法,主要基于树结构:包含一个一棵FP_Tree和一个项头表,每个项通过一个结点链指向它在树中出现的位置。基本结构如下所示。需要注意的是项头表需要按照支持度递减排序,在FP_Tree(有后缀的也成条件FP_Tree)中高支持度的节点只能是低支持度节点的祖先节点。这样一来可以保证尽可能的共用祖先节点,更重要的是保证正确性。

procedure FP_Growth(FP_Tree, α)

if FP_Tree 只含单个路径P then{                                                   
【1】

for 路径P中结点的每个组合(记作β)                                     
【2】

产生模式βUα,其支持度MinSupport =β 中结点的最小支持度;

}

else{

for each αi 在FP_Tree的项头表(按照支持度由低到高顺序进行扫描){ 【3】

产生一个模式β= αiUβ,其支持度MinSupport=αi.MinSupport;

构造β的条件模式基,然后构造β的条件FP_Treeβ;                  
【4】

if FP_Tree不为空 then

调用 FP_Growth (FP_Treeβ, β);

}

}

【1】 FP_Tree 只含单个路径P,即只有一条分支且分支不能分叉,如果分叉可能隐含了分支合并问题,可能导致在为合并之前误删为不满足最小支持度;

【2】 若分支上有n个属性值,则总共有2^n组合,可以每个属性值取或不取两种情况递归下去;

【3】当前条件FP_Tree的的项头表,用尾插法建立单链表;

【4】
以当前项头表的αi沿着条件FP_Tree的每条分支向上找出所有条件模式,然后建立后缀模式β的条件FP_Treeβ

源代码:

在此声明,以下代码并不是系本人原创,如需使用,必须声明,谢谢!

//FP_Tree.h

/**
 * Created by xujin on 2014/12/4.
   All Rights Reserved,but you can use this program.
 */
#ifndef FP_TREE_H
#define FP_TREE_H

#include"Transaction.h"
#include"TransactionSet.h"

#include<map>
using namespace std;

const int MAXN_CHILD=20;
typedef string ItemType;

struct ItemSupport
{
	ItemType m_ITItemName;
	int m_nSupportCount;
	ItemSupport(ItemType tItem,int tSup)
	{
		m_ITItemName=tItem;
	    m_nSupportCount=tSup;
	}
};

struct CFP_TreeNode
{
	int m_nSupportCount;
	int m_nChildSize;
	ItemType m_ITItemName;
	CFP_TreeNode *m_pFatherNode;
	CFP_TreeNode *m_pLinkedNode;
	CFP_TreeNode *m_pChildNode[MAXN_CHILD];

	CFP_TreeNode()
	{
		m_ITItemName.clear();
		m_nChildSize=0;
		m_nSupportCount =0;
		m_pFatherNode=NULL;
		m_pLinkedNode=NULL;
		for(int i=0;i<MAXN_CHILD;++i)
			m_pChildNode[i]=NULL;
	}

	CFP_TreeNode(int tCount)
	{
		m_ITItemName.clear();
		m_nChildSize=0;
		m_nSupportCount =tCount;
		m_pFatherNode=NULL;
		m_pLinkedNode=NULL;
		for(int i=0;i<MAXN_CHILD;++i)
			m_pChildNode[i]=NULL;
	}

	CFP_TreeNode(ItemType tITtem,CFP_TreeNode *tFa,CFP_TreeNode *tLinked,int tCount)
	{
		m_ITItemName=tITtem;
		m_nChildSize=0;
		m_nSupportCount=tCount;
		m_pFatherNode =tFa;
		m_pLinkedNode=tLinked;
		for(int i=0;i<MAXN_CHILD;++i)
			m_pChildNode[i]=NULL;
	}

};

struct CItemHeaderNode
{
	int m_nSupportCount;
	CFP_TreeNode *m_pFPFirst;
	CItemHeaderNode()
	{
		m_nSupportCount =0 ;
		m_pFPFirst = NULL;
	}
	CItemHeaderNode(int tCount)
	{
		m_nSupportCount =tCount ;
		m_pFPFirst = NULL;
	}
};

class CFP_Tree
{
private:
	double m_dMinConfidence;
	double m_dMinSupport;
	int m_nSize;
	int m_nMinConfidence;
	int m_nMinSupport;

	CFP_TreeNode *m_pCFP_TreeRoot;

private:

	void insertFPTree(CFP_TreeNode *tRoot,CTransaction &tTran,int id,int tCount);
	void DFSPrintPath(CFP_TreeNode *tRoot,vector<ItemSupport> &tItemSupportSet);
	void printLinkList(CFP_TreeNode *tRoot);
	void destroy(CFP_TreeNode *tRoot);

public:
	vector<ItemSupport>m_vecItemSupportSet;
	map<ItemType,CItemHeaderNode>m_mapItemHeaderList;

	void sortMapItemHeaderList();
	void addItem(ItemType tItem,int tCount);
	void eraseInfrequent1ItemSet();

	/***********************************************
	*
	*功能:对tTranSet进行计数支持度从大到小排序
	*
	***********************************************/
	void sortTransactionSet(CTransactionSet &tTranSet);

	CFP_Tree(CTransactionSet &tTranSet,double tMinCon,double tMinSup,int tCount);
	CFP_Tree(double tMinCon,double tMinSup,int tSize);
	void insertFPTree(CTransaction &tTran,int id,int tCount);
	void printPath();
	void printItemHeaderList();
	bool isSinglePath(CFP_TreeNode *tRoot);
	void destroy();
	friend class CFP_Growth;
};

#endif
//FP_Tree.cpp
/**
 * Created by xujin on 2014/12/4.
   All Rights Reserved,but you can use this program.
 */
#include<algorithm>
#include"FP_Tree.h"

bool cmp(ItemSupport &a,ItemSupport &b)
{
	return a.m_nSupportCount<b.m_nSupportCount;
}

void CFP_Tree::sortMapItemHeaderList()
{
	vector<ItemSupport>tItemSupportSet;

	for(map<ItemType,CItemHeaderNode>::iterator iter=m_mapItemHeaderList.begin();iter!=m_mapItemHeaderList.end();++iter)
	{
		tItemSupportSet.push_back(ItemSupport(iter->first,iter->second.m_nSupportCount));
	}

	sort(tItemSupportSet.begin(),tItemSupportSet.end(),cmp);

	m_vecItemSupportSet.clear();
	for(vector<ItemSupport>::iterator iter=tItemSupportSet.begin();iter!=tItemSupportSet.end();++iter)
	{
	//	cout<<"--->("<<iter->m_ITItemName<<","<<iter->m_nSupportCount<<")"<<endl;
		m_vecItemSupportSet.push_back(*iter);
	}
}

CFP_Tree::CFP_Tree(CTransactionSet &tTranSet,double tMinCon,double tMinSup,int tCount)
{
	this->m_mapItemHeaderList.clear();
	this->m_vecItemSupportSet.clear();
	this->m_dMinConfidence=tMinCon;
	this->m_dMinSupport =tMinSup;
	this->m_nSize=tTranSet.getSize();
    this->m_nMinConfidence = (this->m_dMinConfidence)*(this->m_nSize);
	this->m_nMinSupport=(this->m_dMinSupport)*(this->m_nSize);

	this->m_pCFP_TreeRoot=new CFP_TreeNode(tCount);

	for(vector<CTransaction>::iterator iter=tTranSet.getVeCTransaction().begin();iter!=tTranSet.getVeCTransaction().end();++iter)
	{
		for(vector<string>::iterator strIter=iter->getVecItem().begin();strIter!=iter->getVecItem().end();++strIter)
		{
			this->addItem(*strIter,1);
		}
	}
	this->sortTransactionSet(tTranSet);

	//cout<<"**********CFP_Tree::m_mapItemHeaderList.size()="<<CFP_Tree::m_mapItemHeaderList.size()<<endl;

	this->eraseInfrequent1ItemSet();
	this->sortMapItemHeaderList();

	//cout<<"**********CFP_Tree::m_mapItemHeaderList.size()="<<CFP_Tree::m_mapItemHeaderList.size()<<endl;
}

CFP_Tree::CFP_Tree(double tMinCon,double tMinSup,int tSize)
{
	this->m_mapItemHeaderList.clear();
	this->m_vecItemSupportSet.clear();
	this->m_dMinConfidence=tMinCon;
	this->m_dMinSupport =tMinSup;
	this->m_nSize=tSize;
    this->m_nMinConfidence = (this->m_dMinConfidence)*tSize;
	this->m_nMinSupport=(this->m_dMinSupport)*tSize;

	this->m_pCFP_TreeRoot=new CFP_TreeNode();
}

void CFP_Tree::addItem(ItemType tItem,int tCount)
{
	map<ItemType,CItemHeaderNode>::iterator iter=m_mapItemHeaderList.find(tItem);
	if(iter!=m_mapItemHeaderList.end())
	{
	//	cout<<"&&&&&&&&&&&"<<endl;
		iter->second.m_nSupportCount +=tCount;
	}
	else
	{
		//cout<<"**********"<<endl;
	    CItemHeaderNode p(tCount);
	//	cout<<"**********p.m_nSupportCount"<<p.m_nSupportCount<<endl;
		pair<ItemType,CItemHeaderNode> node=make_pair(tItem,p);
		m_mapItemHeaderList.insert(node);
	}
}

void CFP_Tree::eraseInfrequent1ItemSet()
{
	for(map<ItemType,CItemHeaderNode>::iterator iter=m_mapItemHeaderList.begin();iter!=m_mapItemHeaderList.end();)
	{
		if(iter->second.m_nSupportCount<this->m_nMinSupport)
		{
			m_mapItemHeaderList.erase(iter);
		}
		else
		{
			++iter;
		}
	}
}

class CTransactionItemSort
{
public:
  static bool cmp(ItemType a,ItemType b);
  static CFP_Tree *g_pRoot;
//std::sort要求函数对象,或是静态/全局函数指针
//非静态成员函数指针不能直接传递给std::sort

};

CFP_Tree *CTransactionItemSort::g_pRoot=NULL;

bool CTransactionItemSort::cmp(ItemType a,ItemType b)
{
	int aCount,bCount;
	map<ItemType,CItemHeaderNode>::iterator iter=CTransactionItemSort::g_pRoot->m_mapItemHeaderList.find(a);
	if(iter!=CTransactionItemSort::g_pRoot->m_mapItemHeaderList.end())
	{
		aCount=iter->second.m_nSupportCount;
	}
	else
	{
		aCount=-1;
	}

	iter=CTransactionItemSort::g_pRoot->m_mapItemHeaderList.find(b);
	if(iter!=CTransactionItemSort::g_pRoot->m_mapItemHeaderList.end())
	{
		bCount=iter->second.m_nSupportCount;
	}
	else
	{
		bCount=-1;
	}
	return aCount>bCount;
}

void CFP_Tree::sortTransactionSet(CTransactionSet &tTranSet)
{
	CTransactionItemSort::g_pRoot=this;
	for(vector<CTransaction>::iterator iter=tTranSet.getVeCTransaction().begin();iter!=tTranSet.getVeCTransaction().end();++iter)
	{
		sort(iter->getVecItem().begin(),iter->getVecItem().end(),CTransactionItemSort::cmp);//cmp调用出错
	}
}

void CFP_Tree::insertFPTree(CFP_TreeNode *tRoot,CTransaction &tTran,int id,int tCount)
{
	if(id>=tTran.getVecItem().size()||this->m_mapItemHeaderList.find(tTran.getVecItem()[id])==this->m_mapItemHeaderList.end())
		return ;

	CFP_TreeNode *pChild;
	//cout<<"**********this->m_nChildSize="<<this->m_nChildSize<<endl;
	for(int i=0;i<tRoot->m_nChildSize;++i)
	{
		 pChild=tRoot->m_pChildNode[i];
		if(pChild!=NULL&&pChild->m_ITItemName==tTran.getVecItem()[id])
		{
		//	cout<<"pChild!=NULL&&pChild->m_ITItemName==tTran.getVecItem()[id]"<<endl;
			pChild->m_nSupportCount+=tCount;
			this->insertFPTree(pChild,tTran,id+1,tCount);
			return ;
		}
	}

	//cout<<"&&&&&&&this->m_nChildSize="<<this->m_nChildSize<<endl;
	ItemType item=tTran.getVecItem()[id];
	pChild=new CFP_TreeNode(item,tRoot, this->m_mapItemHeaderList[item].m_pFPFirst,tCount);
	this->m_mapItemHeaderList[item].m_pFPFirst=pChild;
	tRoot->m_pChildNode[tRoot->m_nChildSize]=pChild;
	++tRoot->m_nChildSize;
	insertFPTree(pChild,tTran,id+1,tCount);

	//cout<<"!!!!!!!!!!!!this->m_nChildSize="<<this->m_nChildSize<<endl;
}

void CFP_Tree::insertFPTree(CTransaction &tTran,int id,int tCount)
{
	this->insertFPTree(this->m_pCFP_TreeRoot,tTran,id,tCount);
}

void CFP_Tree::DFSPrintPath(CFP_TreeNode *tRoot,vector<ItemSupport> &tItemSupportSet)
{
	if(tRoot->m_nChildSize==0)
	{
		for(vector<ItemSupport>::iterator iter=tItemSupportSet.begin(); iter!=tItemSupportSet.end();++iter)
			cout<<"--->("<<iter->m_ITItemName<<","<<iter->m_nSupportCount<<")";
		cout<<endl;
		return ;
	}
	//cout<<"&&&&&&&&this->m_nChildSize="<<this->m_nChildSize<<endl;

	for(int i=0;i<tRoot->m_nChildSize;++i)
	{
		CFP_TreeNode *pChild= tRoot->m_pChildNode[i];
		tItemSupportSet.push_back(ItemSupport(pChild->m_ITItemName,pChild->m_nSupportCount));
		DFSPrintPath(pChild,tItemSupportSet);
		tItemSupportSet.pop_back();
	}
}

void CFP_Tree::printPath()
{
	vector<ItemSupport>tItemSupportSet;
	cout<<"打印FP_Tree树:"<<endl;
	this->DFSPrintPath(m_pCFP_TreeRoot,tItemSupportSet);
}

void CFP_Tree::printLinkList(CFP_TreeNode *tRoot)
{
	cout<<"--->("<<tRoot->m_ITItemName<<","<<tRoot->m_nSupportCount<<")";
	if(tRoot->m_pLinkedNode!=NULL)
	  printLinkList(tRoot->m_pLinkedNode);
}

void CFP_Tree::printItemHeaderList()
{
	cout<<"打印顶点表中每个单链表:"<<endl;
	for(map<ItemType,CItemHeaderNode>::iterator iter=m_mapItemHeaderList.begin();iter!=m_mapItemHeaderList.end();++iter)
	{
		if(iter->second.m_pFPFirst!=NULL)
		{
			cout<<"("<<iter->first<<","<<iter->second.m_nSupportCount<<") :";
			printLinkList(iter->second.m_pFPFirst);
			cout<<endl;
		}
	}
}

void CFP_Tree::destroy(CFP_TreeNode *tRoot)
{
	CFP_TreeNode *pChild;
	for(int i=0;i<tRoot->m_nChildSize;++i)
	{
	    pChild=tRoot->m_pChildNode[i];
		if(pChild!=NULL)
		{
			destroy(pChild);
		}
	}
	tRoot->m_pFatherNode=NULL;
	tRoot->m_pLinkedNode=NULL;
	for(int i=0;i<tRoot->m_nChildSize;++i)
	{
		if(tRoot->m_pChildNode[i]!=NULL)
		{
		   delete tRoot->m_pChildNode[i];
	    	tRoot->m_pChildNode[i]=NULL;
		}
	}
}

void CFP_Tree::destroy()
{
	this->destroy(this->m_pCFP_TreeRoot);
}

bool CFP_Tree::isSinglePath(CFP_TreeNode *tRoot)
{
	if(0==tRoot->m_nChildSize)
		return true;
	else if(tRoot->m_nChildSize>1)
		return false;
	return isSinglePath(tRoot->m_pChildNode[0]);
}
//FP_Crowth.h
/**
 * Created by xujin on 2014/12/4.
   All Rights Reserved,but you can use this program.
 */
#ifndef FP_GROWTH_H
#define FP_GROWTH_H

#include"Transaction.h"
#include"TransactionSet.h"
#include"FP_Tree.h"

class CFP_Growth
{
private:
	CFP_Tree *m_pCFPTConditionTree;

private:
	void initCFP_Growth(CFP_Tree *tCFPTTree,vector<ItemSupport>& tItemSupportSet);
	void printOneFreSet(vector<ItemSupport> &tItemSupportSet);
	void findCombine(CFP_TreeNode *tRoot,vector<ItemSupport> &tItemSupportSet);

public:

	CFP_Growth(CFP_Tree *tCFPTTree,vector<ItemSupport>& tItemSupportSet);
	void printPath();
	void printItemHeaderList();
};

#endif
//FP_Crowth.cpp
/**
 * Created by xujin on 2014/12/4.
   All Rights Reserved,but you can use this program.
 */
#include"FP_Crowth.h"
#include<algorithm>
using namespace std;

void CFP_Growth::initCFP_Growth(CFP_Tree *tCFPTTree,vector<ItemSupport>& tItemSupportSet)
{
	for(vector<ItemSupport>::iterator iter=tCFPTTree->m_vecItemSupportSet.begin();iter!=tCFPTTree->m_vecItemSupportSet.end();++iter)
	{
		map<ItemType,CItemHeaderNode>::iterator iterMap=tCFPTTree->m_mapItemHeaderList.find(iter->m_ITItemName);

		//创建条件FP_growth树
		CFP_Tree * pCFPTConTree =new CFP_Tree(tCFPTTree->m_dMinConfidence,tCFPTTree->m_dMinSupport,tCFPTTree->m_nSize);
		for(CFP_TreeNode *next=iterMap->second.m_pFPFirst; next!=NULL; next=next->m_pLinkedNode)
		{
			CTransaction tran;
			CFP_TreeNode *fa=next->m_pFatherNode;
			int count=next->m_nSupportCount;
			while(fa!=NULL&&!fa->m_ITItemName.empty())
			{
				tran.addItem(fa->m_ITItemName);
				pCFPTConTree->addItem(fa->m_ITItemName,count);
				fa=fa->m_pFatherNode;
			}
			CTransaction reve;
			for(vector<string>::reverse_iterator iter=tran.getVecItem().rbegin();iter!=tran.getVecItem().rend();++iter)
			{
				reve.addItem(*iter);
			}
			pCFPTConTree->insertFPTree(reve,0,count);
		}
		pCFPTConTree->sortMapItemHeaderList();

		tItemSupportSet.push_back(ItemSupport(iterMap->first,iterMap->second.m_nSupportCount));
		new CFP_Growth(pCFPTConTree,tItemSupportSet);
		tItemSupportSet.pop_back();
	}
}

CFP_Growth::CFP_Growth(CFP_Tree *tCFPTTree,vector<ItemSupport> &tItemSupportSet)
{
	this->m_pCFPTConditionTree=tCFPTTree;

	if(0==tCFPTTree->m_pCFP_TreeRoot->m_nChildSize)
	{
		this->printOneFreSet(tItemSupportSet);
		return ;
	}
	else if(tCFPTTree->isSinglePath(tCFPTTree->m_pCFP_TreeRoot))
	{
		findCombine(tCFPTTree->m_pCFP_TreeRoot->m_pChildNode[0],tItemSupportSet);
		return ;
	}
	else
	{
		initCFP_Growth(tCFPTTree,tItemSupportSet);
	}
}

void CFP_Growth::findCombine(CFP_TreeNode *tRoot,vector<ItemSupport> &tItemSupportSet)
{
	if(tRoot==NULL)
	{
		printOneFreSet(tItemSupportSet);
		return ;
	}
	findCombine(tRoot->m_pChildNode[0],tItemSupportSet);

	tItemSupportSet.push_back(ItemSupport(tRoot->m_ITItemName,tRoot->m_nSupportCount));
	findCombine(tRoot->m_pChildNode[0],tItemSupportSet);
	tItemSupportSet.pop_back();

}

void CFP_Growth::printOneFreSet(vector<ItemSupport> &tItemSupportSet)
{
	if(1==tItemSupportSet.size())
		return ;

	int count=m_pCFPTConditionTree->m_nSize*10;

	for(vector<ItemSupport>::reverse_iterator iter=tItemSupportSet.rbegin();iter!=tItemSupportSet.rend();++iter)
	{
		if(count>iter->m_nSupportCount)
			count=iter->m_nSupportCount;
	}
	if(count<m_pCFPTConditionTree->m_nMinSupport)
	   return ;

	cout<<"{ ";
	for(vector<ItemSupport>::reverse_iterator iter=tItemSupportSet.rbegin();iter!=tItemSupportSet.rend();++iter)
	{
		cout<<iter->m_ITItemName<<" ";
	}
	cout<<count<<" }";
	cout<<endl;
}

void CFP_Growth::printPath()
{
	vector<ItemSupport>tItemSupportSet;
	cout<<"打印FP_Tree树:"<<endl;
	this->m_pCFPTConditionTree->DFSPrintPath(this->m_pCFPTConditionTree->m_pCFP_TreeRoot,tItemSupportSet);
}

void CFP_Growth::printItemHeaderList()
{
	cout<<"打印顶点表中每个单链表:"<<endl;
	for(map<ItemType,CItemHeaderNode>::iterator iter=this->m_pCFPTConditionTree->m_mapItemHeaderList.begin();iter!=this->m_pCFPTConditionTree->m_mapItemHeaderList.end();++iter)
	{
		if(iter->second.m_pFPFirst!=NULL)
		{
			cout<<"("<<iter->first<<","<<iter->second.m_nSupportCount<<") :";
			this->m_pCFPTConditionTree->printLinkList(iter->second.m_pFPFirst);
			cout<<endl;
		}
	}
}

FP-growth算法比Apriori算法快一个数量级,在空间复杂度方面也比Apriori也有数量级级别的优化。但是对于海量数据,FP-growth的时空复杂度仍然很高,可以采用的改进方法包括数据库划分,数据采样等等。

由于时间有限,在写博文的过程中参考过一些文献,在此表示感谢;同时鉴于水平原因,你难免有不足之处,欢迎斧正!

时间: 2024-10-05 21:48:07

FP_Growth算法原理及实现的相关文章

Adaboost算法原理分析和实例+代码(简明易懂)

Adaboost算法原理分析和实例+代码(简明易懂) [尊重原创,转载请注明出处] http://blog.csdn.net/guyuealian/article/details/70995333     本人最初了解AdaBoost算法着实是花了几天时间,才明白他的基本原理.也许是自己能力有限吧,很多资料也是看得懵懵懂懂.网上找了一下关于Adaboost算法原理分析,大都是你复制我,我摘抄你,反正我也搞不清谁是原创.有些资料给出的Adaboost实例,要么是没有代码,要么省略很多步骤,让初学者

FP Tree算法原理总结

在Apriori算法原理总结中,我们对Apriori算法的原理做了总结.作为一个挖掘频繁项集的算法,Apriori算法需要多次扫描数据,I/O是很大的瓶颈.为了解决这个问题,FP Tree算法(也称FP Growth算法)采用了一些技巧,无论多少数据,只需要扫描两次数据集,因此提高了算法运行的效率.下面我们就对FP Tree算法做一个总结. 1. FP Tree数据结构 为了减少I/O次数,FP Tree算法引入了一些数据结构来临时存储数据.这个数据结构包括三部分,如下图所示: 第一部分是一个项

分布式memcached学习(四)&mdash;&mdash; 一致性hash算法原理

    分布式一致性hash算法简介 当你看到"分布式一致性hash算法"这个词时,第一时间可能会问,什么是分布式,什么是一致性,hash又是什么.在分析分布式一致性hash算法原理之前,我们先来了解一下这几个概念. 分布式 分布式(distributed)是指在多台不同的服务器中部署不同的服务模块,通过远程调用协同工作,对外提供服务. 以一个航班订票系统为例,这个航班订票系统有航班预定.网上值机.旅客信息管理.订单管理.运价计算等服务模块.现在要以集中式(集群,cluster)和分布

POJ1523(求连用分量数目,tarjan算法原理理解)

SPF Time Limit: 1000MS   Memory Limit: 10000K Total Submissions: 7406   Accepted: 3363 Description Consider the two networks shown below. Assuming that data moves around these networks only between directly connected nodes on a peer-to-peer basis, a

Kmeans聚类算法原理与实现

Kmeans聚类算法 1 Kmeans聚类算法的基本原理 K-means算法是最为经典的基于划分的聚类方法,是十大经典数据挖掘算法之一.K-means算法的基本思想是:以空间中k个点为中心进行聚类,对最靠近他们的对象归类.通过迭代的方法,逐次更新各聚类中心的值,直至得到最好的聚类结果. 假设要把样本集分为k个类别,算法描述如下: (1)适当选择k个类的初始中心,最初一般为随机选取: (2)在每次迭代中,对任意一个样本,分别求其到k个中心的欧式距离,将该样本归到距离最短的中心所在的类: (3)利用

【转】两种非对称算法原理:RSA和DH

转自:http://blog.chinaunix.net/uid-7550780-id-2611984.html 两种非对称算法原理:RSA和DH 虽然对称算法的效率高,但是密钥的传输需要另外的信道.非对称算法RSA和DH可以解决密钥的传输问题(当然,它们的作用不限于此).这两个算法的名字都是来自于算法作者的缩写,希望有朝一日能够出现用中国人命名的加密算法.非对称算法的根本原理就是单向函数,f(a)=b,但是用b很难得到a. RSA算法 RSA算法是基于大数难于分解的原理.不但可以用于认证,也可

Canny边缘检测算法原理及其VC实现详解(一)

转自:http://blog.csdn.net/likezhaobin/article/details/6892176 图象的边缘是指图象局部区域亮度变化显著的部分,该区域的灰度剖面一般可以看作是一个阶跃,既从一个灰度值在很小的缓冲区域内急剧变化到另一个灰度相差较大的灰度值.图象的边缘部分集中了图象的大部分信息,图象边缘的确定与提取对于整个图象场景的识别与理解是非常重要的,同时也是图象分割所依赖的重要特征,边缘检测主要是图象的灰度变化的度量.检测和定位,自从1959提出边缘检测以来,经过五十多年

排序算法原理及实现

算法一:直接插入排序 算法实现原理:就是计算一个新元素是应该放在哪里?每次进来一个都会进行和原来顺序进行重新组合. 代码实现:Java public int[] testInsertionSort(int[] data){ // this methord is very easy. for(int i = 1;i < data.length;i++){ int temp = data[i]; int j =i; while(j>0 && data[j-1]>temp){

Bagging与随机森林算法原理小结

在集成学习原理小结中,我们讲到了集成学习有两个流派,一个是boosting派系,它的特点是各个弱学习器之间有依赖关系.另一种是bagging流派,它的特点是各个弱学习器之间没有依赖关系,可以并行拟合.本文就对集成学习中Bagging与随机森林算法做一个总结. 随机森林是集成学习中可以和梯度提升树GBDT分庭抗礼的算法,尤其是它可以很方便的并行训练,在如今大数据大样本的的时代很有诱惑力. 1.  bagging的原理 在集成学习原理小结中,我们给Bagging画了下面一张原理图. 从上图可以看出,