Trie(C#)

TrieSearch.cs


//#define WEB

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace TrieCS
{
class MatchInfoComparer : IEqualityComparer<MatchInfo>
{
public bool Equals(MatchInfo x, MatchInfo y)
{
return x.Index.Equals(y.Index);
}

public int GetHashCode(MatchInfo obj)
{
return obj.Index;
}
}

public class PerfInfo
{
public double FilterAddMs { get; internal set; }
public double TotalMs { get; internal set; }
}

class SearchResult
{
public int Index { get; set; }
public List<int> Positions { get; set; }
public int Priority { get; set; }
public string Content { get; set; }

public SearchResult()
{
Positions = new List<int>();
}
}

public class MatchInfo : IEquatable<MatchInfo>, IComparable<MatchInfo>
{
public int Index { get; set; }
public int Position { get; set; }
public int Priority { get; set; }
public char Data { get; set; }

public override string ToString()
{
return String.Format("[{0}.{1}]", Index, Position);
}

public bool Equals(MatchInfo other)
{
return Index == other.Index && Position == other.Position;
}

public int CompareTo(MatchInfo other)
{
return this.Index.CompareTo(other.Index);
}
}

class Node
{
public char Data { get; set; }
public List<MatchInfo> Infos { get; set; }
public Hashtable Next { get; set; }
public Node()
{
Next = new Hashtable(26);
Infos = new List<MatchInfo>();
}
public override string ToString()
{
return Data.ToString();
}
}

class TrieSearch
{
const string ALPHABET = "abcdefghijklmnopqrstyvwxyz";
MatchInfoComparer _comparer = new MatchInfoComparer();
HiPerfTimer _timer = new HiPerfTimer();

public PerfInfo LastPerfInfo { get; private set; }

public Node Root { get; private set; }

Hashtable _nodes = new Hashtable();

public Hashtable Nodes
{
get
{
return _nodes;
}
}

private string[] _keywords;
public string[] Keywords
{
get { return _keywords; }
set
{
_keywords = value;
Root = new Node();
_nodes = new Hashtable();
for (int i = 0; i < _keywords.Length; i++)
{
BuildTree(Root, _keywords[i], i);
}
}
}

private void BuildTree(Node root, string keyword, int index)
{
Stack<Node> parents = new Stack<Node>();
Node node = root;
bool flag_start = true;
for (int j = 0; j < keyword.Length; j++)
{
parents.Push(node);

var c = keyword[j];

var info = new MatchInfo
{
Index = index,
Position = j,
Priority = j == 0 ? 2 : 1,
Data = c,
};
if (flag_start)
{
flag_start = false;
info.Priority += 1;
}
if (Char.IsUpper(c))
info.Priority += 1;

c = Char.ToLower(c);

Node newNode = node.Next[c] as Node;
if (newNode == null)
{
#if WEB
if (_nodes[c] == null)
_nodes[c] = new Node();
newNode = _nodes[c] as Node;
#else
newNode = new Node();
#endif
newNode.Data = c;
node.Next[c] = newNode;
}
#if WEB
foreach (var p in parents)
{
if (p == newNode) continue;
p.Next[c] = newNode;
}
#endif
newNode.Infos.Add(info);

node = newNode;
}
}

public List<MatchInfo> Search(string data)
{
LastPerfInfo = new PerfInfo();
var infoList = new List<MatchInfo>();
int pos = 0;
bool first = true;
Search(data, ref pos, Root, infoList, ref first);
return infoList.ToList();
}

public SearchResult[] Convert(List<MatchInfo> matchInfos)
{
var results = new Dictionary<int, SearchResult>();
var groups = matchInfos.GroupBy(_=>_.Data);
foreach (var group in groups)
{
foreach (var sr in group)
{
SearchResult result;
if (!results.TryGetValue(sr.Index, out result))
{
result = new SearchResult();
result.Content = Keywords[sr.Index];
results[sr.Index] = result;
}
if (!result.Positions.Contains(sr.Position))
{
result.Positions.Add(sr.Position);
if (result.Priority < sr.Priority)
result.Priority = sr.Priority;
}
}
}

var c = new Comparison<int>((a, b) => { return a.CompareTo(b); });
var continues = new Func<IEnumerable<int>, bool>(list =>
{
int last = list.ElementAt(0);
foreach (var item in list.Skip(1))
{
if (item != last + 1)
return false;
last = item;
}
return true;
});
foreach (var item in results.Values)
{
item.Positions.Sort(c);
if (continues(item.Positions))
item.Priority += 2;
}
return results.Values.ToArray();
}
#if WEB
private void Search(string data, int pos, Node node, List<MatchInfo> infoList)
{
if (node == null) return;

var lastNode = node;
bool flag_first = true;

for (int i = pos; i < data.Length; i++)
{
var c = data[i];
node = node.Next[c] as Node;
if (node == null)
{
infoList.Clear();
break;
}
else
{
if (flag_first)
{
flag_first = false;
infoList.AddRange(node.Infos);
}
else
{
_timer.Start();

var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index);
infoList.AddRange(node.Infos);
for (int ii = 0; ii < infoList.Count; ii++)
{
if (!inter.Contains(infoList[ii].Index))
infoList.RemoveAt(ii--);
}

_timer.Stop();
LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0;
}
}
}
return;
}
#else
private void Search(string data, ref int pos, Node node, List<MatchInfo> infoList, ref bool flag_first)
{
if (node == null) return;

Node lastNode;

for (; pos < data.Length; pos++)
{
var c = data[pos];
lastNode = node;
node = node.Next[c] as Node;
if (node == null)
{
foreach (var nc in ALPHABET)
{
if (nc == c) continue;
if (lastNode.Next[nc] == null) continue;
Search(data, ref pos, lastNode.Next[nc] as Node, infoList, ref flag_first);
}
break;
}
else
{
if (flag_first)
{
flag_first = false;
infoList.AddRange(node.Infos);
}
else
{
_timer.Start();

var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index);
infoList.AddRange(node.Infos);
for (int ii = 0; ii < infoList.Count; ii++)
{
if (!inter.Contains(infoList[ii].Index))
infoList.RemoveAt(ii--);
}

_timer.Stop();
LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0;
}
}
}
return;
}
#endif
}
}

MainWindow.cs


using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;

namespace TrieCS
{
/// <summary>
/// MainWindow.xaml 的交互逻辑
/// </summary>
public partial class MainWindow : Window
{
TrieSearch _trie = new TrieSearch();

HiPerfTimer _timer = new HiPerfTimer();

public MainWindow()
{
InitializeComponent();

Initiate();
}

public void Initiate()
{
}

private void content_TextChanged_1(object sender, TextChangedEventArgs e)
{
_trie.Keywords = content.Text.Replace("\r\n", " ").Replace("\r", " ").Replace("\n", " ").Split(‘ ‘);
NodesListBox.ItemsSource = _trie.Nodes.OfType<DictionaryEntry>().Select(_=>_.Value).ToList();
DisplayResults();
}

private void keyword_TextChanged_1(object sender, TextChangedEventArgs e)
{
DisplayResults();
}

private void DisplayResults()
{
_timer.Start();
var results = _trie.Convert(_trie.Search(keyword.Text));
Array.Sort(results, new Comparison<SearchResult>((a, b) =>
{
return b.Priority.CompareTo(a.Priority);
}));
_timer.Stop();
time.Text = String.Format("Total:{0:0.00000}ms, FilterAdd:{1:0.00000}ms, Total words:{2}",
_timer.Duration * 1000.0,
_trie.LastPerfInfo.FilterAddMs,
_trie.Keywords.Length);
result.ItemsSource = results;
}
}
}

Mainwindow.xaml


<Window x:Class="TrieCS.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="MainWindow" Height="350" Width="525">
<Grid>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="*"/>
<ColumnDefinition Width="170"/>
</Grid.ColumnDefinitions>
<Grid.RowDefinitions>
<RowDefinition Height="*"/>
<RowDefinition Height="Auto"/>
<RowDefinition Height="*"/>
<RowDefinition Height="Auto"/>
</Grid.RowDefinitions>
<TextBox x:Name="content" Grid.Row="0" TextChanged="content_TextChanged_1" AcceptsReturn="True"/>
<TextBox x:Name="keyword" Grid.Row="1" TextChanged="keyword_TextChanged_1" Height="24"/>
<ListView x:Name="result" Grid.Row="2">
<ListView.View>
<GridView>
<GridViewColumn Width="50" DisplayMemberBinding="{Binding Priority}" Header="匹配度" />
<GridViewColumn Width="50" DisplayMemberBinding="{Binding Index}" Header="序号" />
<!--<GridViewColumn Width="50" DisplayMemberBinding="{Binding Position}" Header="位置" />-->
<GridViewColumn Width="250" DisplayMemberBinding="{Binding Content}" Header="内容" />
</GridView>
</ListView.View>
</ListView>
<TextBlock x:Name="time" Grid.Row="3" Height="24"/>

<Grid Grid.Column="1" Grid.RowSpan="3">
<Grid.RowDefinitions>
<RowDefinition Height="*"/>
<RowDefinition Height="*"/>
<RowDefinition Height="*"/>
</Grid.RowDefinitions>
<ListBox
x:Name="NodesListBox"
DisplayMemberPath="Data"></ListBox>
<ListBox
x:Name="MatchInfoListBox"
DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}"
ItemsSource="{Binding Infos}"
Grid.Row="1"/>
<ListBox
x:Name="NextNodesListBox"
DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}"
ItemsSource="{Binding Next}"
DisplayMemberPath="Value"
Grid.Row="2"/>
</Grid>
</Grid>
</Window>

HiPerfTimer


/* High precision timer class
* - http://www.eggheadcafe.com/articles/20021111.asp
*
* (Thanks to the author!)
*/
using System;
using System.Runtime.InteropServices;
using System.ComponentModel;
using System.Threading;

namespace TrieCS
{
public class HiPerfTimer
{
[DllImport("Kernel32.dll")]
private static extern bool QueryPerformanceCounter(out long lpPerformanceCount);
[DllImport("Kernel32.dll")]
private static extern bool QueryPerformanceFrequency(out long lpFrequency);
private long startTime;
private long stopTime;
private long freq;
/// <summary>
/// ctor
/// </summary>
public HiPerfTimer()
{
startTime = 0;
stopTime = 0;
freq = 0;
if (QueryPerformanceFrequency(out freq) == false)
{
throw new Win32Exception(); // timer not supported
}
}
/// <summary>
/// Start the timer
/// </summary>
/// <returns>long - tick count</returns>
public long Start()
{
QueryPerformanceCounter(out startTime);
return startTime;
}
/// <summary>
/// Stop timer
/// </summary>
/// <returns>long - tick count</returns>
public long Stop()
{
QueryPerformanceCounter(out stopTime);
return stopTime;
}
/// <summary>
/// Return the duration of the timer (in seconds)
/// </summary>
/// <returns>double - duration</returns>
public double Duration
{
get
{
return (double)(stopTime - startTime) / (double)freq;
}
}
/// <summary>
/// Frequency of timer (no counts in one second on this machine)
/// </summary>
///<returns>long - Frequency</returns>
public long Frequency
{
get
{
QueryPerformanceFrequency(out freq);
return freq;
}
}
}
}

Trie(C#)

时间: 2024-11-06 10:23:33

Trie(C#)的相关文章

HDU 1075 What Are You Talking About (Trie树)

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=1075 map可以过...我上的字典树,小bug有点尴尬,题目没有明确给出数据范围也是无奈. 贡献了几次RE 一次WA.尴尬.discuss里面有个说注意前缀的到是给了点tip.总体来说不错 代码: 1 #define _CRT_SECURE_NO_WARNINGS 2 #include <functional> 3 #include <algorithm> 4 #include <

POJ2778 DNA Sequence Trie+矩阵乘法

题意:给定N个有A C G T组成的字符串,求长度为L的仅由A C G T组成的字符串中有多少个是不含给定的N个字符串的题解: 首先我们把所有的模式串(给定的DNA序列)建Trie,假定我们有一个匹配串,并且在匹配过程到S[i]这个字符时匹配到了Trie上的某个节点t,那么有两种可能: 匹配失败:t->child[S[i]]为空,跳转到t->fail,因此t->fail一定不能是某个模式串的结尾: 匹配成功:跳转到t->child[S[i+1]],因此t->child[S[i

poj3630 Phone List (trie树模板题)

Phone List Time Limit: 1000MS   Memory Limit: 65536K Total Submissions: 26328   Accepted: 7938 Description Given a list of phone numbers, determine if it is consistent in the sense that no number is the prefix of another. Let's say the phone catalogu

从Trie谈到AC自动机

ZJOI的SAM让我深受打击,WJZ大神怒D陈老师之T3是SAM裸题orz...我还怎么混?暂且写篇`从Trie谈到AC自动机`骗骗经验. Trie Trie是一种好玩的数据结构.它的每个结点存的是字母,因此得名`字母树`. 出一张图让大家感受下. (image powered by SaiBu NaoCu) 上面那是一棵插入了 ape,app,applicant,application,bake,ban,banana 等词的Trie.红色结点表示接受态. 显然,查找时只需顺着链照下来,插入只需

【BZOJ2741】【块状链表+可持久化trie】FOTILE模拟赛L

Description FOTILE得到了一个长为N的序列A,为了拯救地球,他希望知道某些区间内的最大的连续XOR和. 即对于一个询问,你需要求出max(Ai xor Ai+1 xor Ai+2 ... xor Aj),其中l<=i<=j<=r. 为了体现在线操作,对于一个询问(x,y): l = min ( ((x+lastans) mod N)+1 , ((y+lastans) mod N)+1 ).r = max ( ((x+lastans) mod N)+1 , ((y+last

[算法系列之二十]字典树(Trie)

一 概述 又称单词查找树,Trie树,是一种树形结构,是一种哈希树的变种.典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计. 二 优点 利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希表高. 三 性质 (1)根节点不包含字符,除根节点外每一个节点都只包含一个字符: (2)从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串: (3)每个节点的所有子节点包含的字符都不相同. 单词列表为"apps&

跳跃表,字典树(单词查找树,Trie树),后缀树,KMP算法,AC 自动机相关算法原理详细汇总

第一部分:跳跃表 本文将总结一种数据结构:跳跃表.前半部分跳跃表性质和操作的介绍直接摘自<让算法的效率跳起来--浅谈"跳跃表"的相关操作及其应用>上海市华东师范大学第二附属中学 魏冉.之后将附上跳跃表的源代码,以及本人对其的了解.难免有错误之处,希望指正,共同进步.谢谢. 跳跃表(Skip List)是1987年才诞生的一种崭新的数据结构,它在进行查找.插入.删除等操作时的期望时间复杂度均为O(logn),有着近乎替代平衡树的本领.而且最重要的一点,就是它的编程复杂度较同类

Trie树学习2

数组实现的Trie树 字符容量有限,可以使用链表实现更为大容量的Trie #include <iostream> #include <cstdio> #include <string> #include <cstring> #include <vector> #include <map> #include <set> #include <algorithm> #include <cstdlib> #

trie树(字典树)

1. trie树,又名字典树,顾名思义,它是可以用来作字符串查找的数据结构,它的查找效率比散列表还要高. trie树的建树: 比如有字符串"ab" ,"adb","adc"   可以建立字典树如图: 树的根节点head不存储信息,它有26个next指针,分别对应着字符a,b,c等.插入字符串ab时,next['a'-'a']即next[0]为空,这是申请一个结点放在next[0]的位置,插入字符串db时,next['d'-'a']即next[3]

Trie 字典树

1.UVa 1401 Remember the Word 题意:给出n个字符串集合,问其有多少种组合方式形成目标字符串. 思路:对n个字符串集合建立Trie树,保存每个结点的字符串的顺序编号.然后对这棵树查找目标字符串每一个后缀的前缀字符串,累加. 1 #include<cstdio> 2 #include<cstring> 3 #include<algorithm> 4 #include<iostream> 5 #include<vector>