关联规则挖掘算法

using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.Text;

using System.Windows.Forms;

using System.Collections;

namespace Apriori

{

    //事务

    struct trans

    {

        public string tID;

        public ArrayList items;

    }

    //项集和支持度计数

    struct itemAndSup

    {

        public ArrayList items;

        public int sup;

    }

    public partial class Form1 : Form

    {

        private ArrayList tData = new ArrayList();      //事务数据

        private int minSup = 2;                         //最小支持度计数阀值

        private ArrayList C0 = new ArrayList();         //L的超集

        private ArrayList L0 = new ArrayList();         //频繁k项集

        private int step;                               //已完成步骤数

        private bool finish;                            //算法是否完成

        public Form1()

        {

            InitializeComponent();

        }

        private void Form1_Load(object sender, EventArgs e)

        {

            Init();

        }

        //初始化程序主界面

        private void Init()

        {

            this.Text = "关联规则算法";

            tData.Clear();

            C0.Clear();

            L0.Clear();

            this.TDataView.Items.Clear();

            this.CResultView.Items.Clear();

            this.LResultView.Items.Clear();

            this.ItemList.Items.Clear();

            this.TDataView.Items.Add("TID\t商品ID的列表\n");

            this.ItemList.Items.Add("I1");

            this.ItemList.Items.Add("I2");

            this.ItemList.Items.Add("I3");

            this.ItemList.Items.Add("I4");

            this.ItemList.Items.Add("I5");

            this.confList.Items.Add("I1");

            this.confList.Items.Add("I2");

            this.confList.Items.Add("I3");

            this.confList.Items.Add("I4");

            this.confList.Items.Add("I5");

            this.ListConf.Items.Add("I1");

            this.ListConf.Items.Add("I2");

            this.ListConf.Items.Add("I3");

            this.ListConf.Items.Add("I4");

            this.ListConf.Items.Add("I5");

            this.txtbMinSup.Text = minSup.ToString();

            step = 0;

            finish = false;

        }     

        

        //删除事务

        private void DeleteItem_Click(object sender, EventArgs e)

        {

            if (this.TDataView.SelectedIndex == 0)

                return;

            tData.RemoveAt(this.TDataView.SelectedIndex - 1);

            this.TDataView.Items.RemoveAt(this.TDataView.SelectedIndex);

        }

        //示例事务

        #region

        private void Example_Click(object sender, EventArgs e)

        {

            example();

        }

        private void example()

        {

            trans t1 = new trans();

            t1.tID = "T100";

            t1.items = new ArrayList();

            t1.items.Add("I1");

            t1.items.Add("I2");

            t1.items.Add("I5");

            AddItemToDataView(t1);

            tData.Add(t1);

            trans t2 = new trans();

            t2.tID = "T200";

            t2.items = new ArrayList();

            t2.items.Add("I2");

            t2.items.Add("I4");

            AddItemToDataView(t2);

            tData.Add(t2);

            trans t3 = new trans();

            t3.tID = "T300";

            t3.items = new ArrayList();

            t3.items.Add("I2");

            t3.items.Add("I3");

            AddItemToDataView(t3);

            tData.Add(t3);

            trans t4 = new trans();

            t4.tID = "T400";

            t4.items = new ArrayList();

            t4.items.Add("I1");

            t4.items.Add("I2");

            t4.items.Add("I4");

            AddItemToDataView(t4);

            tData.Add(t4);

            trans t5 = new trans();

            t5.tID = "T500";

            t5.items = new ArrayList();

            t5.items.Add("I1");

            t5.items.Add("I3");

            AddItemToDataView(t5);

            tData.Add(t5);

            trans t6 = new trans();

            t6.tID = "T600";

            t6.items = new ArrayList();

            t6.items.Add("I2");

            t6.items.Add("I3");

            AddItemToDataView(t6);

            tData.Add(t6);

            trans t7 = new trans();

            t7.tID = "T700";

            t7.items = new ArrayList();

            t7.items.Add("I1");

            t7.items.Add("I3");

            AddItemToDataView(t7);

            tData.Add(t7);

            trans t8 = new trans();

            t8.tID = "T800";

            t8.items = new ArrayList();

            t8.items.Add("I1");

            t8.items.Add("I2");

            t8.items.Add("I3");

            t8.items.Add("I5");

            AddItemToDataView(t8);

            tData.Add(t8);

            trans t9 = new trans();

            t9.tID = "T900";

            t9.items = new ArrayList();

            t9.items.Add("I1");

            t9.items.Add("I2");

            t9.items.Add("I3");

            AddItemToDataView(t9);

            tData.Add(t9);

        }

        #endregion

        //最小支持度阈值变化

        private void txtbMinSup_TextChanged(object sender, EventArgs e)

        {

            try

            {

                //获得最小支持度阈值,将其赋给minSup

                minSup = int.Parse(this.txtbMinSup.Text);

            }

            catch

            {

                MessageBox.Show("非法输入!");

                this.txtbMinSup.Text = minSup.ToString();

            }

        }

        //添加事务

        #region

        //获得事务ID号

        public string GetTID()

        {

            return this.TID.Text;

        }

        //获得事务中的项

        public ArrayList GetItemList()

        {

            ArrayList items = new ArrayList();

            for (int i = 0; i < this.SelectItemList.Items.Count; i++)

            {

                string itemID = this.SelectItemList.Items[i].ToString();

                items.Add(itemID);

            }

            items.Sort();

            return items;

        }

        //添加项到备选栏

        private void Add_Click(object sender, EventArgs e)

        {

            if (this.ItemList.SelectedIndex < 0)

                return;

            for (int i = 0; i < this.SelectItemList.Items.Count; i++)

            {

                if (this.SelectItemList.Items[i] == this.ItemList.SelectedItem)

                    return;

            }

            this.SelectItemList.Items.Add(this.ItemList.SelectedItem);

        }

        //从备选栏中删除项

        private void Delete_Click(object sender, EventArgs e)

        {

            if (this.SelectItemList.SelectedIndex < 0)

                return;

            this.SelectItemList.Items.RemoveAt(this.SelectItemList.SelectedIndex);

        }

        //确定添加事务到挖掘准备框

        private void Submit_Click(object sender, EventArgs e)

        {

            insertform();

        }

        private void insertform()

        {

            trans t = new trans();

            t.tID = GetTID();

            t.items = GetItemList();

            AddItemToDataView(t);

            tData.Add(t);

        }

        

        private void AddItemToDataView(trans t)

        {

            string transLine = "";

            //添加TID

            transLine = transLine + t.tID + "\t";

            //添加商品ID列表

            for (int i = 0; i < t.items.Count; i++)

            {

                transLine = transLine + t.items[i].ToString() + ",";

            }

            transLine = transLine + "\n";

            this.TDataView.Items.Add(transLine);

        }     

        //取消添加事务到挖掘准备框

        private void Cancel_Click(object sender, EventArgs e)

        {

            this.SelectItemList.Items.Clear();

        }

        #endregion

        //计算下一项

        private void Next_Click(object sender, EventArgs e)

        {

            if (finish == true)

            {

                this.Next.Text = "计算下一步";

                Init();

                return;

            }

            

            ArrayList OldL = new ArrayList(L0);

            //增加步骤计数,用来决定计算C或者是L。

            step++;

            //计算L并显示L视图

            #region           

            //计算L

            if (step % 2 == 1)

            {

                //找出频繁1项集L1

                if (step == 1)

                {

                    //当前事务总数tData.Count

                    for (int i = 0; i < tData.Count; i++)

                    {

                        trans t = (trans)tData[i];

                        //当前一个事务中的项的总数t.items.Count

                        for (int j = 0; j < t.items.Count; j++)

                        {

                            bool flag = true;

                            //判断一下当前项是不是已经被计算过支持度,L0用于存放频繁K项集(items和sup)

                            for (int k = 0; k < L0.Count; k++)

                            {

                                string mda=((itemAndSup)L0[k]).items[0].ToString();

                                if (((itemAndSup)L0[k]).items[0] == t.items[j])

                                {

                                    flag = false;

                                    break;

                                }

                            }

                            if (flag == false)

                                continue;

                            ArrayList items = new ArrayList();

                            items.Add(t.items[j]);

                            int sup = FindItemSup(items);

                            if (sup >= minSup)

                            {

                                itemAndSup temp = new itemAndSup();

                                temp.sup = sup;

                                temp.items = items;

                                L0.Add(temp);

                            }

                        }

                    }

                }

                //通过Ck来确定Lk

                else

                {

                    L0.Clear();

                    for (int i = 0; i < C0.Count; i++)

                    {

                        itemAndSup temp = (itemAndSup)C0[i];

                        if (temp.sup >= minSup)

                            L0.Add(temp);

                    }

                }

                //更新L的视图               

                    if (L0.Count != 0)

                    {

                        this.LResultView.Items.Clear();

                        this.LResultView.Items.Add("项集\t支持度计数\n");

                        for (int i = 0; i < L0.Count; i++)

                        {

                            ArrayList items = ((itemAndSup)L0[i]).items;

                            int sup = ((itemAndSup)L0[i]).sup;

                            string LResultLine = "";

                            for (int j = 0; j < items.Count; j++)

                            {

                                LResultLine = LResultLine + items[j].ToString() + ",";

                            }

                            LResultLine = LResultLine + "\t" + sup + "\n";

                            this.LResultView.Items.Add(LResultLine);

                        }

                        this.resultBox.Items.Clear();

                        this.resultBox.Items.Add("项集\t支持度计数\n");

                        for (int i = 0; i < OldL.Count; i++)

                        {

                            ArrayList items = ((itemAndSup)OldL[i]).items;

                            int sup = ((itemAndSup)OldL[i]).sup;

                            string ResultLine = "";

                            for (int j = 0; j < items.Count; j++)

                            {

                                ResultLine = ResultLine + items[j].ToString() + ",";

                            }

                            ResultLine = ResultLine + "\t" + sup + "\n";

                            this.resultBox.Items.Add(ResultLine);

                        }

                    }

                    else

                    {                       

                        this.resultBox.Items.Clear();

                        this.resultBox.Items.Add("项集\t支持度计数\n");

                        for (int i = 0; i < OldL.Count; i++)

                        {

                            ArrayList items = ((itemAndSup)OldL[i]).items;

                            int sup = ((itemAndSup)OldL[i]).sup;

                            string ResultLine = "";

                            for (int j = 0; j < items.Count; j++)

                            {

                                ResultLine = ResultLine + items[j].ToString() + ",";

                            }

                            ResultLine = ResultLine + "\t" + sup + "\n";

                            this.resultBox.Items.Add(ResultLine);

                        }

                        OldL.Clear();

                        this.LResultView.Items.Clear();

                        this.LResultView.Items.Add("项集\t支持度计数\n");

                        for (int i = 0; i < OldL.Count; i++)

                        {

                            ArrayList items = ((itemAndSup)OldL[i]).items;

                            int sup = ((itemAndSup)OldL[i]).sup;

                            string LResultLine = "";

                            for (int j = 0; j < items.Count; j++)

                            {

                                LResultLine = LResultLine + items[j].ToString() + ",";

                            }

                            LResultLine = LResultLine + "\t" + sup + "\n";

                            this.LResultView.Items.Add(LResultLine);

                        }

                    }

                //更新L说明

                    if (L0.Count != 0)

                        this.Msg.Text = "比较候选支持度计数与最小支持度计数";

                    else

                    {

                        this.Msg.Text = "由于L为空,算法终止";

                        this.Next.Text = "完成(重新开始)";

                        finish = true;

                    }

            }

            #endregion

            //计算C并显示C视图

            #region          

            //计算C

            else

            {

                //通过将Lk-1与Lk-1自身连接产生Ck,Lk-1中的items项的顺序已经排好为由小到大

                C0.Clear();

                for (int i = 0; i < L0.Count; i++)

                {

                    //items0(Lk)与Lk合并

                    ArrayList items0 = ((itemAndSup)L0[i]).items;

                    //将可以合并到items0的值items[k]添加到addItem中,以防止后面重复添加

                    ArrayList addItem = new ArrayList();

                    for (int j = 0; j < L0.Count; j++)

                    {

                        //当自身与自身组合时,跳过这一步

                        if (j == i)

                            continue;

                        //Lk本身,将被合并到items0

                        ArrayList items1 = ((itemAndSup)L0[j]).items;

                        for (int k = 0; k < items1.Count; k++)

                        {

                            //当前items1[k]是否比items0中最后一个数值小,如果小的话则进行下一次循环

                            if (((string)items1[k]).CompareTo((string)items0[items0.Count - 1]) <= 0)

                                continue;

                            //如果items[1]已经合并到items0的话,则进行下一次循环

                            if (addItem.Contains(items1[k]))

                                continue;

                            //对items0+items1[k]进行Ck与Lk-1测试,判断Ck是否是Lk-1的超集,

                            //如果不是超集 则合并后的项集的支持度肯定小于最小支持度阈值

                            bool mmm = ItemTest(items0, items1[k]);

                            if (ItemTest(items0, items1[k]))//测试通过

                            {

                                ArrayList items = new ArrayList(items0);

                                items.Add(items1[k]);

                                items.Sort();

                                int sup = FindItemSup(items);

                                itemAndSup temp = new itemAndSup();

                                temp.items = items;

                                temp.sup = sup;

                                C0.Add(temp);

                                addItem.Add(items1[k]);

                            }

                        }

                    }

                }

                //更新C视图

                    this.CResultView.Items.Clear();

                    this.CResultView.Items.Add("项集\t支持度计数\n");

                    for (int i = 0; i < C0.Count; i++)

                    {

                        ArrayList items = ((itemAndSup)C0[i]).items;

                        int sup = ((itemAndSup)C0[i]).sup;

                        string CResultLine = "";

                        for (int j = 0; j < items.Count; j++)

                        {

                            CResultLine = CResultLine + items[j].ToString() + ",";

                        }

                        CResultLine = CResultLine + "\t" + sup + "\n";

                        this.CResultView.Items.Add(CResultLine);

                    }              

                //更新C视图说明

                    if (C0.Count != 0)

                        this.Msg.Text = "由L产生C,并扫描D,对每个候选计数";

                    else

                    {

                        this.Msg.Text = "由于C为空,算法终止";

                        this.Next.Text = "完成(重新开始)";

                        finish = true;

                    }

            }

            #endregion

        }

        //计算项集的支持度Sup

        private int FindItemSup(ArrayList item)

        {

            //初始化支持度为0

            int count = 0;

            //对每一个事务进行查询

            for (int i = 0; i < tData.Count; i++)

            {

                trans t = (trans)tData[i];

                bool flag = true;

                //将传递过来的项集,将项集中的每一个项与事务进行对比,查看是否存在于事务中               

                for (int j = 0; j < item.Count; j++)

                {

                    //只要有一个项不存在于事务中,flag=0,则此项集不存于事务中

                    if (!(t.items.Contains(item[j])))

                    {

                        flag = false;

                        break;

                    }

                }

                //如果项集存在于事务中,则支持度加1

                if (flag == true)

                    count++;

            }

            //返回支持度计数

            return count;

        }

        //对items0+items1[k]进行Ck与Lk-1测试,判断Ck是否是Lk-1的超集,如果是超集,则返回true,如果不是则返回false

        private bool ItemTest(ArrayList items,object addItem)

        {           

            for (int i = 0; i < items.Count;i++ )

            {

                ArrayList newItems = new ArrayList(items);

                newItems.RemoveAt(i);

                newItems.Add(addItem);

                newItems.Sort();

                for (int j = 0; j < L0.Count; j++)

                {

                    bool flag2=true;

                    ArrayList tempItems = ((itemAndSup)L0[j]).items;

                    for (int k = 0; k < tempItems.Count;k++ )

                    {

                        if (newItems[k]!=tempItems[k])

                        {

                            flag2 = false;                       

                            break;

                        }

                    }

                    //只要有一个存在于Lk-1中即可返回true,结束本测试

                    if (flag2==true)

                    {

                        return true;

                    }

                }               

            }

            //如果所有对比均进行完毕,则返回false

            return false;          

        }

        //推导项里添加项集

        private void btnAddConf_Click(object sender, EventArgs e)

        {

            if (this.confList.SelectedIndex < 0)

                return;

            for (int i = 0; i < this.confEnd.Items.Count; i++)

            {

                if (this.confEnd.Items[i] == this.confList.SelectedItem)

                    return;

            }

            this.confEnd.Items.Add(this.confList.SelectedItem);

        }

        //推导项里删除项

        private void btnCancelConf_Click(object sender, EventArgs e)

        {

            if (this.confEnd.SelectedIndex < 0)

                return;

            this.confEnd.Items.RemoveAt(this.confEnd.SelectedIndex);

        }

        //关联项添加项集

        private void btnConf_Click(object sender, EventArgs e)

        {

            if (this.ListConf.SelectedIndex < 0)

                return;

            for (int i = 0; i < this.EndConf.Items.Count; i++)

            {

                if (this.EndConf.Items[i] == this.ListConf.SelectedItem)

                    return;

            }

            this.EndConf.Items.Add(this.ListConf.SelectedItem);

        }

        //关联项删除项

        private void BtnConfCancel_Click(object sender, EventArgs e)

        {

            if (this.EndConf.SelectedIndex < 0)

                return;

            this.EndConf.Items.RemoveAt(this.EndConf.SelectedIndex);

        }

        //计算置信度

        private void confBtn_Click(object sender, EventArgs e)

        {

            float sumconf = FindItemSup(insertSumConf());

            float refconf = FindItemSup(insertConf());

            float result = (sumconf / refconf) * tData.Count;

            txtConfidence.Text = (result).ToString();

        }

        //获取推导项集

        private ArrayList insertConf()

        {

            ArrayList items = new ArrayList();

            for (int i = 0; i < this.confEnd.Items.Count; i++)

            {

                string itemID = this.confEnd.Items[i].ToString();

                items.Add(itemID);

            }

            items.Sort();

            return items;

        }

        //获取关联项集

        private ArrayList insertSumConf()

        {

            ArrayList items = new ArrayList();

            for (int i = 0; i < this.EndConf.Items.Count; i++)

            {

                string itemID = this.EndConf.Items[i].ToString();

                items.Add(itemID);

            }

            items.Sort();

            return items;

        }

    }

}

时间: 2024-11-05 18:55:22

关联规则挖掘算法的相关文章

关联规则挖掘算法综述

摘  要  本文介绍了关联规则的基本概念和分类方法,列举了一些关联规则挖掘算法并简要分析了典型算法,展望了关联规则挖掘的未来研究方向. 关键词  数据挖掘,关联规则,频集,Apriori算法,FP-树 1 引言 关联规则挖掘发现大量数据中项集之间有趣的关联或相关联系.它在数据挖掘中是一个重要的课题,最近几年已被业界所广泛研究. 关联规则挖掘的一个典型例子是购物篮分析.关联规则研究有助于发现交易数据库中不同商品(项)之间的联系,找出顾客购买行为模式,如购买了某一商品对购买其他商品的影响.分析结果可

FP-Tree -关联规则挖掘算法

在关联规则挖掘领域最经典的算法法是Apriori,其致命的缺点是需要多次扫描事务数据库.于是人们提出了各种裁剪(prune)数据集的方法以减少I/O开支 本文参考地址:http://www.cnblogs.com/zhangchaoyang/articles/2198946.html

关联规则挖掘算法AFPIM

(参考文献来自An Efficient Approach for Maintaining Association Rules  based on Adjusting FP-tree Structure Jia-Ling Koh and Shui-Feng Shieh  Department of Information and Computer Education 其中有大量的删减,如果想直奔主题,看干货,可直接从3.调整FP_tree的策略 开始看起@OUYM) 1.Introduction

不产生候选集的关联规则挖掘算法FP-Tree

上篇博客讲述了Apriori算法的思想和java实现,http://blog.csdn.net/u010498696/article/details/45641719 Apriori算法是经典的关联规则算法,但是如上篇博客所述,它也有两个致命的性能瓶颈,一个是频繁集自连接产生候选集这一步骤中可能产生大量的候选集:另一个是从候选集得到频繁项集需要重复扫描数据库. 2000年,Han等提出了一个称为FP-tree的算法,有效解决了以上两个问题,它只需要扫描数据库2次,并不使用候选集,通过构造一棵频繁

Apriori算法--关联规则挖掘

我的数据挖掘算法代码:https://github.com/linyiqun/DataMiningAlgorithm 介绍 Apriori算法是一个经典的数据挖掘算法,Apriori的单词的意思是"先验的",说明这个算法是具有先验性质的,就是说要通过上一次的结果推导出下一次的结果,这个如何体现将会在下面的分析中会慢慢的体现出来.Apriori算法的用处是挖掘频繁项集的,频繁项集粗俗的理解就是找出经常出现的组合,然后根据这些组合最终推出我们的关联规则. Apriori算法原理 Aprio

数据挖掘算法之关联规则挖掘(一)---apriori算法

关联规则挖掘算法在生活中的应用处处可见,几乎在各个电子商务网站上都可以看到其应用 举个简单的例子 如当当网,在你浏览一本书的时候,可以在页面中看到一些套餐推荐,本书+有关系的书1+有关系的书2+...+其他物品=多少¥ 而这些套餐就很有可能符合你的胃口,原本只想买一本书的你可能会因为这个推荐而买了整个套餐 这与userCF和itemCF不同的是,前两种是推荐类似的,或者你可能喜欢的商品列表 而关联规则挖掘的是n个商品是不是经常一起被购买,如果是,那个n个商品之中,有一个商品正在被浏览(有被购买的

增量关联规则挖掘—FUP算法

一.背景介绍 关联规则( Association rule)概念最初由Agrawal提出,是数据挖掘的一个重要研究领域, 其目的是发现数据集中有用的频繁模式. 静态关联规则挖掘,是在固定数据集和支持度下,发现数据集中的频繁项集,如 Apriori.FP-Growth.Ecalt等.现实问题中,多数时候,支持度和数据集是会发生变化的,Cheung提出了FUP (Fast UPdate)算法,主要针对数据集增大的情况,FUP算法是第一个增量关联规则挖掘算法. 二.相关定义 数据集DB = {T1,T

浅谈数据挖掘中的关联规则挖掘

数据挖掘是指以某种方式分析数据源,从中发现一些潜在的有用的信息,所以数据挖掘又称作知识发现,而关联规则挖掘则是数据挖掘中的一个很重要的 课题,顾名思义,它是从数据背后发现事物之间可能存在的关联或者联系.举个最简单的例子,比如通过调查商场里顾客买的东西发现,30%的顾客会同时购买床 单和枕套,而购买床单的人中有80%购买了枕套,这里面就隐藏了一条关联:床单—>枕套,也就是说很大一部分顾客会同时购买床单和枕套,那么对于商 场来说,可以把床单和枕套放在同一个购物区,那样就方便顾客进行购物了.下面来讨论

分布式并行关联规则挖掘

经典的关联规则挖掘算法Apriori和FP-growth,在大数据或者海量数据面前,由于候选集和生成的FP树大而无法存储到内存,同时也由于算法本身单机的特点,决定了它串行处理数据的方式,这在效率上很难满足大数据处理的要求,数据迁移到平台需要传输和转储,在大数据面前,也是一大难题. 一般而言"关联规则的挖掘过程分为两步 1 找出所有的频繁项集,根据定义这些项集的每一个频繁出现次数至少与预定义的最小支持度计数 2 由频繁项集产生强关联规则.这些规则必须满足最小支持度和最小置信度. 由于第二步的开销远