下面是基于KWIC 的关键字匹配算法(管道+过滤器模式下实现)
关键部分的管道+过滤器 软件体系下的实现, 在很多的关键字搜索平台都使用了这一 循环移位+排序输出的 关键字匹配算法:
具体需求如下:
1、使用管道-过滤器风格:
每个过滤器处理数据,然后将结果送至下一个过滤器,。要有数据传入,过滤器即开始工作。过滤器之间的数据共享被严格限制在管道传输
四个过滤器:
输入(Input filter):
从数据源读取输入文件,解析格式,将行写入输出管道
移位(CircularShifter filter):循环移位
排序(Alphabetizer filter):
输出(Output filter)
管道:
in_cs pipe
cs_al pipe
al_ou pile
例如:
代码如下:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; namespace KWIC { /// <summary> /// 管道类 /// </summary> public class Pipe { List<string> word; public List<string> read() { return word; } public void write(List<string> word) { this.word = word; } } /// <summary> /// 管道之间的过滤器接口 /// </summary> public abstract class Filter { public virtual void Transform() { } } /// <summary> /// 继承并实现实现管道接口 /// </summary> public class InputFilter : Filter { public Pipe outPipe; public List<string> word; public InputFilter(List<string> word, Pipe outPipe) { this.word = word; this.outPipe = outPipe; } public void Transform() { outPipe.write(word); } } /// <summary> /// 继承并实现过滤器接口 /// </summary> public class CircleShiftFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public CircleShiftFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } /// <summary> /// 关键的循环移位函数 /// </summary> public virtual void Transform() { List<string> word = inputPipe.read(); /////////////////////////////////////////////// 补充代码,将WORD数组中字符串循环移位//////////////////////////////////////////////////////// List<string> turned_words = new List<string>(); // 获得每一行字符串数据 foreach (string line in word) { // 拆分一句话 string[] words = line.Split(' '); // 获取单词数 ulong word_number = (ulong)words.LongLength; // 临时存储中间排序好的串 List<string> tmp_words = new List<string>(); tmp_words.Clear(); tmp_words.Add(line); string tmp_line = ""; for (ulong i = 0; i < word_number - 1; i++) { // 获取上一行串 tmp_line = tmp_words[tmp_words.Count - 1]; // 获取上一行串的最后一个单词 string last_word = tmp_line.Split(' ')[word_number -1]; // 获取上一行串的除了最后一个单词之外的所有单词 string left_words = tmp_line.Substring(0, (tmp_line.Length -last_word.Length-1 )); tmp_words.Add(last_word +" "+ left_words ); } // 移除原有的串 tmp_words.RemoveAt(0); // 将一句移位的串加到临时的list集合 turned_words.AddRange(tmp_words); } // 将所有移位的串加到原来list集合 word.AddRange(turned_words); ///////////////////////////////////// outPipe.write(word); } } /// <summary> /// 实现的排序过滤器类 /// </summary> public class AlphaFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public AlphaFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } /// <summary> /// 排序输出函数 /// </summary> public void Transform() { List<string> word = inputPipe.read(); ////////////////////////////////////// 补充代码,将word数组中单词排序输出///////////////////////////////////////////////// word.Sort(); outPipe.write(word); } } /// <summary> /// 实现输出过滤器接口类 /// </summary> public class OutputFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public OutputFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } public void Transform() { List<string> word = inputPipe.read(); outPipe.write(word); } } /// <summary> /// 程序的整体运行框架 /// </summary> public class KWIC_System { Pipe in_cs; // create three objects of Pipe Pipe cs_al; // and one object of type Pipe al_ou; // FileInputStream Pipe ou_ui; // FileInputStream InputFilter inputFilter; CircleShiftFilter shifter; AlphaFilter alpha; OutputFilter output; // output to screen public KWIC_System() { in_cs = new Pipe(); // create three objects of Pipe cs_al = new Pipe(); // and one object of type al_ou = new Pipe(); // FileInputStream ou_ui = new Pipe(); // FileInputStream List<string> word = new List<string>(); word.Add(Regex.Replace("I love you".Trim(), @"\s+", " ")); //正则会获取到所有类型的空格(比如制表符,新行等等),然后将其替换为一个空格 word.Add(Regex.Replace("me too".Trim(), @"\s+", " ")); word.Add(Regex.Replace("do you know".Trim(), @"\s+", " ")); inputFilter = new InputFilter(word, in_cs); shifter = new CircleShiftFilter(in_cs, cs_al); alpha = new AlphaFilter(cs_al, al_ou); output = new OutputFilter(al_ou,ou_ui); // output to screen } public List<string > GetResult() { inputFilter.Transform(); shifter.Transform(); alpha.Transform(); output.Transform(); return ou_ui.read(); } } }
(备注:如果想换行这里想换行输出,需要在结尾输出的每一行结尾加‘\r\n’)
在广泛的搜索技术中,其实这个关键字匹配算法应用范围很广,比如我们常见的Baidu和Google的搜索关键字 提示功能。
基于KWIC 的关键字匹配算法(管道+过滤器模式下实现)
时间: 2024-12-25 01:39:21