全文检索 使用最新lucene3.0.3+最新盘古分词 pangu2.4 .net 实例

开发环境 vs2015 winform 程序

1 首先需要下载对应的DLL 文章后面统一提供程序下载地址 里面都有

2 配置pangu的参数 也可以不配置 采用默认的即可

3 创建索引,将索引存放到本地

4 根据关键字查询本地索引

5 取得查询结果并展示

以上是主要的步骤,下面贴上主要代码,拿来即可用

Form1.Designer.cs

namespace lucuneTest
{
    partial class Form1
    {
        /// <summary>
        /// 必需的设计器变量。
        /// </summary>
        private System.ComponentModel.IContainer components = null;

        /// <summary>
        /// 清理所有正在使用的资源。
        /// </summary>
        /// <param name="disposing">如果应释放托管资源,为 true;否则为 false。</param>
        protected override void Dispose(bool disposing)
        {
            if (disposing && (components != null))
            {
                components.Dispose();
            }
            base.Dispose(disposing);
        }

        #region Windows 窗体设计器生成的代码

        /// <summary>
        /// 设计器支持所需的方法 - 不要修改
        /// 使用代码编辑器修改此方法的内容。
        /// </summary>
        private void InitializeComponent()
        {
            this.btnSearch = new System.Windows.Forms.Button();
            this.panel1 = new System.Windows.Forms.Panel();
            this.textBox2 = new System.Windows.Forms.TextBox();
            this.panel2 = new System.Windows.Forms.Panel();
            this.txtWords = new System.Windows.Forms.TextBox();
            this.btnCutWords = new System.Windows.Forms.Button();
            this.txtWords2 = new System.Windows.Forms.TextBox();
            this.txtResult = new System.Windows.Forms.TextBox();
            this.label3 = new System.Windows.Forms.Label();
            this.label4 = new System.Windows.Forms.Label();
            this.panel1.SuspendLayout();
            this.panel2.SuspendLayout();
            this.SuspendLayout();
            //
            // btnSearch
            //
            this.btnSearch.Location = new System.Drawing.Point(413, 48);
            this.btnSearch.Name = "btnSearch";
            this.btnSearch.Size = new System.Drawing.Size(75, 23);
            this.btnSearch.TabIndex = 3;
            this.btnSearch.Text = "查询";
            this.btnSearch.UseVisualStyleBackColor = true;
            this.btnSearch.Click += new System.EventHandler(this.btnSearch_Click);
            //
            // panel1
            //
            this.panel1.Controls.Add(this.label3);
            this.panel1.Controls.Add(this.label4);
            this.panel1.Controls.Add(this.txtWords2);
            this.panel1.Controls.Add(this.btnCutWords);
            this.panel1.Controls.Add(this.txtWords);
            this.panel1.Controls.Add(this.textBox2);
            this.panel1.Controls.Add(this.btnSearch);
            this.panel1.Dock = System.Windows.Forms.DockStyle.Top;
            this.panel1.Location = new System.Drawing.Point(0, 0);
            this.panel1.Name = "panel1";
            this.panel1.Size = new System.Drawing.Size(884, 92);
            this.panel1.TabIndex = 5;
            //
            // textBox2
            //
            this.textBox2.Location = new System.Drawing.Point(36, 29);
            this.textBox2.Name = "textBox2";
            this.textBox2.Size = new System.Drawing.Size(328, 21);
            this.textBox2.TabIndex = 5;
            this.textBox2.Text = "天龙八部";
            //
            // panel2
            //
            this.panel2.Controls.Add(this.txtResult);
            this.panel2.Dock = System.Windows.Forms.DockStyle.Fill;
            this.panel2.Location = new System.Drawing.Point(0, 92);
            this.panel2.Name = "panel2";
            this.panel2.Size = new System.Drawing.Size(884, 378);
            this.panel2.TabIndex = 6;
            //
            // txtWords
            //
            this.txtWords.Location = new System.Drawing.Point(548, 12);
            this.txtWords.Multiline = true;
            this.txtWords.Name = "txtWords";
            this.txtWords.ScrollBars = System.Windows.Forms.ScrollBars.Both;
            this.txtWords.Size = new System.Drawing.Size(324, 38);
            this.txtWords.TabIndex = 7;
            //
            // btnCutWords
            //
            this.btnCutWords.Location = new System.Drawing.Point(413, 19);
            this.btnCutWords.Name = "btnCutWords";
            this.btnCutWords.Size = new System.Drawing.Size(75, 23);
            this.btnCutWords.TabIndex = 8;
            this.btnCutWords.Text = "分词--》";
            this.btnCutWords.UseVisualStyleBackColor = true;
            this.btnCutWords.Click += new System.EventHandler(this.btnCutWords_Click);
            //
            // txtWords2
            //
            this.txtWords2.Location = new System.Drawing.Point(548, 51);
            this.txtWords2.Multiline = true;
            this.txtWords2.Name = "txtWords2";
            this.txtWords2.ScrollBars = System.Windows.Forms.ScrollBars.Both;
            this.txtWords2.Size = new System.Drawing.Size(324, 38);
            this.txtWords2.TabIndex = 9;
            //
            // txtResult
            //
            this.txtResult.Dock = System.Windows.Forms.DockStyle.Fill;
            this.txtResult.Location = new System.Drawing.Point(0, 0);
            this.txtResult.Multiline = true;
            this.txtResult.Name = "txtResult";
            this.txtResult.ScrollBars = System.Windows.Forms.ScrollBars.Both;
            this.txtResult.Size = new System.Drawing.Size(884, 378);
            this.txtResult.TabIndex = 8;
            //
            // label3
            //
            this.label3.AutoSize = true;
            this.label3.Location = new System.Drawing.Point(513, 24);
            this.label3.Name = "label3";
            this.label3.Size = new System.Drawing.Size(29, 12);
            this.label3.TabIndex = 11;
            this.label3.Text = "盘古";
            //
            // label4
            //
            this.label4.AutoSize = true;
            this.label4.Location = new System.Drawing.Point(513, 51);
            this.label4.Name = "label4";
            this.label4.Size = new System.Drawing.Size(29, 12);
            this.label4.TabIndex = 10;
            this.label4.Text = "标准";
            //
            // Form1
            //
            this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 12F);
            this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
            this.ClientSize = new System.Drawing.Size(884, 470);
            this.Controls.Add(this.panel2);
            this.Controls.Add(this.panel1);
            this.Name = "Form1";
            this.Text = "Form1";
            this.panel1.ResumeLayout(false);
            this.panel1.PerformLayout();
            this.panel2.ResumeLayout(false);
            this.panel2.PerformLayout();
            this.ResumeLayout(false);

        }

        #endregion
        private System.Windows.Forms.Button btnSearch;
        private System.Windows.Forms.Panel panel1;
        private System.Windows.Forms.Panel panel2;
        private System.Windows.Forms.TextBox textBox2;
        private System.Windows.Forms.Button btnCutWords;
        private System.Windows.Forms.TextBox txtWords;
        private System.Windows.Forms.TextBox txtWords2;
        private System.Windows.Forms.TextBox txtResult;
        private System.Windows.Forms.Label label3;
        private System.Windows.Forms.Label label4;
    }
}

Form1.cs

using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Store;
using LN = Lucene.Net;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Windows.Forms;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Search;
using PanGu;
using Lucene.Net.QueryParsers;
using PanGu.HighLight;
using System.Diagnostics;

namespace lucuneTest
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
            //定义盘古分词的xml引用路径
            PanGu.Segment.Init(PanGuXmlPath);
            //创建索引
            createIndex();
        }

        /// <summary>
        /// 创建索引
        /// </summary>
        void createIndex()
        {

            //IndexWriter第三个参数:true指重新创建索引,false指从当前索引追加....此处为新建索引所以为true,后续应该建立的索引应采用追加
            IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Stopwatch sw = new Stopwatch();
            sw.Start();
            for (int i = 1; i < 101; i++)
            {
                AddIndex(writer, "我的标题" + i, i + "这是我的标题啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                AddIndex(writer, "射雕英雄传作者金庸" + i, i + "我是欧阳锋" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                AddIndex(writer, "天龙八部12" + i, i + "慕容废墟,上官静儿,打撒飞艾丝凡爱上,虚竹" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                AddIndex(writer, "倚天屠龙记12" + i, i + "张无忌机" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                AddIndex(writer, "三国演义" + i, i + "刘备,张飞,关羽还有谁来着 忘记啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
            }
            //释放资源
            writer.Optimize();
            writer.Dispose();
            string time = ((double)sw.ElapsedMilliseconds / 1000).ToString();
            sw.Stop();
            Console.WriteLine("创建100条记录需要时长:" + time + "秒");
        }

        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="analyzer"></param>
        /// <param name="title"></param>
        /// <param name="content"></param>
        private void AddIndex(IndexWriter writer, string title, string content, string date)
        {
            try
            {
                Document doc = new Document();
                doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                doc.Add(new Field("addtime", date, Field.Store.YES, Field.Index.NOT_ANALYZED));//不分词存储
                writer.AddDocument(doc);
            }
            catch (FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }

        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnCutWords_Click(object sender, EventArgs e)
        {
            this.txtWords.Text = "";
            Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            txtWords.Text = cutWords(this.textBox2.Text, PanGuAnalyzer);//盘古分词
            txtWords2.Text = cutWords(this.textBox2.Text, analyzer);    //自带标准分词
        }

        /// <summary>
        /// 分词方法
        /// </summary>
        /// <param name="words">待分词内容</param>
        /// <param name="analyzer"></param>
        /// <returns></returns>
        private string cutWords(string words, Analyzer analyzer)
        {
            string resultStr = "";
            System.IO.StringReader reader = new System.IO.StringReader(words);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                resultStr += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return resultStr;
        }

        protected IList<Article> list = new List<Article>();

        /// <summary>
        /// 查询多个字段
        /// </summary>
        private void SearchIndex(string searchKey)
        {
            Dictionary<string, string> dic = new Dictionary<string, string>();
            BooleanQuery bQuery = new BooleanQuery();

            #region 一个字段查询
            //if (!string.IsNullOrEmpty(title))
            //{
            //    title = GetKeyWordsSplitBySpace(title);
            //    QueryParser parse = new QueryParser(LN.Util.Version.LUCENE_30, "title", PanGuAnalyzer);//一个字段查询
            //    Query query = parse.Parse(title);
            //    parse.DefaultOperator = QueryParser.Operator.OR;
            //    bQuery.Add(query, new Occur());
            //    dic.Add("title", title);
            //}

            #endregion

            string[] fileds = { "title", "content" };//查询字段
            searchKey = GetKeyWordsSplitBySpace(searchKey);
            QueryParser parse = new MultiFieldQueryParser(LN.Util.Version.LUCENE_30, fileds, PanGuAnalyzer);//多个字段查询
            Query query = parse.Parse(searchKey);
            bQuery.Add(query, new Occur());

            dic.Add("title", searchKey);
            dic.Add("content", searchKey);

            if (bQuery != null && bQuery.GetClauses().Length > 0)
            {
                GetSearchResult(bQuery, dic);
            }
        }

        /// <summary>
        /// 获取
        /// </summary>
        /// <param name="bQuery"></param>
        private void GetSearchResult(BooleanQuery bQuery, Dictionary<string, string> dicKeywords)
        {
            IndexSearcher search = new IndexSearcher(direcotry, true);
            // Stopwatch stopwatch = Stopwatch.StartNew();
            //SortField构造函数第三个字段true为降序,false为升序
            Sort sort = new Sort(new SortField("addtime", SortField.DOC, true));
            int maxNum = 100;//查询条数
            TopDocs docs = search.Search(bQuery, (Filter)null, maxNum, sort);

            if (docs != null)
            {

                for (int i = 0; i < docs.TotalHits && i < maxNum; i++)
                {
                    Document doc = search.Doc(docs.ScoreDocs[i].Doc);
                    Article model = new Article()
                    {
                        Title = doc.Get("title").ToString(),
                        Content = doc.Get("content").ToString(),
                        AddTime = doc.Get("addtime").ToString()
                    };
                    list.Add(SetHighlighter(dicKeywords, model));

                }
            }

        }

        /// <summary>
        /// 索引存放目录
        /// </summary>
        protected string IndexDic
        {
            get
            {
                return Application.StartupPath + "/IndexDic";
            }
        }

        public LN.Store.Directory direcotry
        {
            get
            { //创建索引目录
                if (!System.IO.Directory.Exists(IndexDic))
                {
                    System.IO.Directory.CreateDirectory(IndexDic);
                }

                LN.Store.Directory direcotry = FSDirectory.Open(IndexDic);
                return direcotry;
            }

        }
        /// <summary>
        /// 盘古分词的配置文件
        /// </summary>
        protected string PanGuXmlPath
        {
            get
            {
                return Application.StartupPath + "/PanGu/PanGu.xml";
            }
        }

        /// <summary>
        /// 盘古分词器
        /// </summary>
        protected Analyzer PanGuAnalyzer
        {
            get { return new PanGuAnalyzer(); }

        }

        /// <summary>
        /// 处理关键字为索引格式
        /// </summary>
        /// <param name="keywords"></param>
        /// <returns></returns>
        private string GetKeyWordsSplitBySpace(string keywords)
        {
            PanGuTokenizer ktTokenizer = new PanGuTokenizer();
            StringBuilder result = new StringBuilder();
            ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
            }
            return result.ToString().Trim();
        }

        /// <summary>
        /// 设置关键字高亮
        /// </summary>
        /// <param name="dicKeywords">关键字列表</param>
        /// <param name="model">返回的数据模型</param>
        /// <returns></returns>
        private Article SetHighlighter(Dictionary<string, string> dicKeywords, Article model)
        {
            SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"green\">", "</font>");
            Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());
            highlighter.FragmentSize = 50;
            string strTitle = string.Empty;
            string strContent = string.Empty;
            dicKeywords.TryGetValue("title", out strTitle);
            dicKeywords.TryGetValue("content", out strContent);
            if (!string.IsNullOrEmpty(strTitle))
            {
                var transStr = highlighter.GetBestFragment(strTitle, model.Title);
                model.Title = string.IsNullOrEmpty(transStr) ? model.Title : transStr;
            }
            if (!string.IsNullOrEmpty(strContent))
            {
                var transStr = highlighter.GetBestFragment(strContent, model.Content);
                model.Content = string.IsNullOrEmpty(transStr) ? model.Content : transStr;
            }
            return model;
        }

        /// <summary>
        /// 查询方法
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnSearch_Click(object sender, EventArgs e)
        {
            list.Clear();
            this.txtResult.Text = "";
            SearchIndex(this.textBox2.Text);
            if (list.Count == 0)
            {
                this.txtResult.Text = "没有查询到结果";
                return;
            }
            for (int i = 0; i < list.Count; i++)
            {
                this.txtResult.Text += "标题:" + list[i].Title + " 内容:" + list[i].Content + " 时间:" + list[i].AddTime + "\r\n";
            }
        }

        #region 删除索引数据(根据id)
        /// <summary>
        /// 删除索引数据(根据id)
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public bool Delete(string id)
        {
            bool IsSuccess = false;
            Term term = new Term("id", id);
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //Version version = new Version();
            //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
            //Query query = parser.Parse("小王");  

            //IndexReader reader = IndexReader.Open(directory_luce, false);
            //reader.DeleteDocuments(term);
            //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");
            //reader.Dispose();  

            IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

        #region 删除全部索引数据
        /// <summary>
        /// 删除全部索引数据
        /// </summary>
        /// <returns></returns>
        public bool DeleteAll()
        {
            bool IsSuccess = true;
            try
            {

                IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteAll();
                writer.Commit();
                //writer.Optimize();//
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
            }
            catch
            {
                IsSuccess = false;
            }
            return IsSuccess;
        }
        #endregion

    }

    public class Article
    {
        public string Id
        {
            set;
            get;
        }

        public string Title
        {
            set;
            get;
        }

        public string Content
        {
            set;
            get;
        }

        public string AddTime
        {
            set;
            get;
        }
    }
}

实例下载地址:lucuneTest.zip

时间: 2025-01-16 01:30:41

全文检索 使用最新lucene3.0.3+最新盘古分词 pangu2.4 .net 实例的相关文章

让盘古分词支持最新的Lucene.Net 3.0.3

好多年没升级过的Lucene.Net最近居然升级了,到了3.0.3后接口发生了很大变化,原来好多分词库都不能用了,所以上次我把MMSeg给修改了一下支持了Lucene.Net 3.0.3(参考<基于MMSeg算法的中文分词类库>). 然后我知道了.Net下还有一个盘古分词(http://pangusegment.codeplex.com/),但也不支持Lucene.Net 3.0.3,网上也不少人在问,于是就下载了它的最新代码,基于Lucene.Net 3.0.3更新了,顺便把它的词库给放到d

spring4.0.6最新稳定版新特性学习,简单学习教程(一)

Spring Framework 4.0 学习整理. Spring框架的核心部分就是Ioc容器,而Ioc控制的就是各种Bean,一个Spring项目的水平往往从其XML配置文件内容就能略知一二,很多项目,往往是外包公司的项目,配置文件往往是乱七八糟,抱着能跑就行,不报错就行的态度去写,然后在项目中后期发现各种缺失又去一通乱补,其结果就是,整个文档可读性极差,毫无章法.这也不能怪写这个XML的人,拿着苦逼程序员的工资干着架构师的工作必然是这个结果.为了程序员的幸福,我认为有必要来一套简单快速的官方

Android应用之——百度地图最新SDK3.0应用,实现最常用的标注覆盖物以及弹出窗覆盖物

一.概述 最新版的百度地图SDK3.0,修改了很多方法,之前的很多方法被简化了,正好在做地图这一块,顺便就使用了最新版的sdk. 下载官方给的demo,发现变化还是挺大的,之前的一些方法都换了,地图的初始化也进行了调整.多了好几个类,具体用法参考下面的例子,详细的说明可参照官方的说明文档. 二.效果图 标注覆盖物效果图: 弹出窗覆盖物: 三.实现过程 大部分是根据官方给的demo来的. A.配置文件: 第一步:在工程里新建libs文件夹,将开发包里的baidumapapi_vX_X_X.jar拷

2015最新WebQQ3.0协议解析与实现

2015最新WebQQ3.0协议解析与实现(一) 2015最新webqq密码加密分析 2015最新WebQQ3.0协议解析与实现(三) 2015最新WebQQ3.0协议解析与实现(四) webqq协议综合篇,这个不是最新的,但是内容很全

toddyang3.0版本 最新EASYUI后台框架兼容所有浏览器

        toddyang3.0版本 最新EASYUI后台框架兼容所有浏览器

spring4.0.6最新稳定版新特性学习,注解自动扫描bean,自动注入bean(二)

Spring4.0的新特性我们在上一章已经介绍过了.包括它对jdk8的支持,Groovy Bean Definition DSL的支持,核心容器功能的改进,Web开发改进,测试框架改进等等.这张我们主要介绍spring4.0的自动扫描功能,以及对bean的过滤等特性进行学习. 好吧,废话少说,我们来看看代码吧. package com.herman.ss.test; import org.springframework.context.ApplicationContext; import org

lucene.net 3.0.3、结合盘古分词进行搜索的小例子(转)

lucene.net 3.0.3.结合盘古分词进行搜索的小例子(分页功能) 添加:2013-12-25 更新:2013-12-26 新增分页功能. 更新:2013-12-27 新增按分类查询功能,调整索引行新增记录的图片字段. //封装类 [csharp] view plaincopyprint? using System; using System.Collections.Generic; using System.Linq; using System.Web; using Lucene.Ne

Lucene.Net3.0.3+盘古分词器学习使用

一.Lucene.Net介绍 Lucene.net是Lucene的.net移植版本,是一个开源的全文检索引擎开发包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎.开发人员可以基于Lucene.net实现全文检索的功能. Lucene.net是Apache软件基金会赞助的开源项目,基于Apache License协议. Lucene.net并不是一个爬行搜索引擎,也不会自动地索引内容.我们得先将要索引的文档中的文本抽取出来,然后再将其加到Lucene.

lucene.net 3.0.3、结合盘古分词进行搜索的小例子(分页功能)

转自:http://blog.csdn.net/pukuimin1226/article/details/17558247 添加:2013-12-25 更新:2013-12-26 新增分页功能. 更新:2013-12-27 新增按分类查询功能,调整索引行新增记录的图片字段. 最新盘古分词dll和词典管理工具下载:http://pangusegment.codeplex.com/ 词典下载:http://pangusegment.codeplex.com/releases/view/47411 L