TrieFilter类
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
namespace SaaS.Web.Base
{
public class TrieNode
{
public bool m_end;
public Dictionary<Char, TrieNode> m_values;
public TrieNode()
{
m_values = new Dictionary<Char, TrieNode>();
}
}
public class TrieFilter : TrieNode
{
/// <summary>
/// 添加关键字
/// </summary>
/// <param name="key"></param>
public void AddKey(string key)
{
if (string.IsNullOrEmpty(key))
{
return;
}
TrieNode node = this;
for (int i = 0; i < key.Length; i++)
{
char c = key[i];
TrieNode subnode;
if (!node.m_values.TryGetValue(c, out subnode))
{
subnode = new TrieNode();
node.m_values.Add(c, subnode);
}
node = subnode;
}
node.m_end = true;
}
/// <summary>
/// 检查是否包含非法字符
/// </summary>
/// <param name="text">输入文本</param>
/// <returns>找到的第1个非法字符.没有则返回string.Empty</returns>
public bool HasBadWord(string text)
{
for (int i = 0; i < text.Length; i++)
{
TrieNode node;
if (m_values.TryGetValue(text[i], out node))
{
for (int j = i + 1; j < text.Length; j++)
{
if (node.m_values.TryGetValue(text[j], out node))
{
if (node.m_end)
{
return true;
}
}
else
{
break;
}
}
}
}
return false;
}
/// <summary>
/// 检查是否包含非法字符
/// </summary>
/// <param name="text">输入文本</param>
/// <returns>找到的第1个非法字符.没有则返回string.Empty</returns>
public string FindOne(string text)
{
for (int i = 0; i < text.Length; i++)
{
char c = text[i];
TrieNode node;
if (m_values.TryGetValue(c, out node))
{
for (int j = i + 1; j < text.Length; j++)
{
if (node.m_values.TryGetValue(text[j], out node))
{
if (node.m_end)
{
return text.Substring(i, j + 1 - i);
}
}
else
{
break;
}
}
}
}
return string.Empty;
}
//查找所有非法字符
public IEnumerable<string> FindAll(string text)
{
for (int i = 0; i < text.Length; i++)
{
TrieNode node;
if (m_values.TryGetValue(text[i], out node))
{
for (int j = i + 1; j < text.Length; j++)
{
if (node.m_values.TryGetValue(text[j], out node))
{
if (node.m_end)
{
yield return text.Substring(i, (j + 1 - i));
}
}
else
{
break;
}
}
}
}
}
/// <summary>
/// 替换非法字符
/// </summary>
/// <param name="text"></param>
/// <param name="c">用于代替非法字符</param>
/// <returns>替换后的字符串</returns>
public string Replace(string text, char c)
//public string Replace(string text, char c = ‘*‘)
{
char[] chars = null;
for (int i = 0; i < text.Length; i++)
{
TrieNode subnode;
if (m_values.TryGetValue(text[i], out subnode))
{
for (int j = i + 1; j < text.Length; j++)
{
if (subnode.m_values.TryGetValue(text[j], out subnode))
{
if (subnode.m_end)
{
if (chars == null) chars = text.ToArray();
for (int t = i; t <= j; t++)
{
chars[t] = c;
}
i = j;
}
}
else
{
break;
}
}
}
}
return chars == null ? text : new string(chars);
}
}
}
调用执行方法类:
#region 过滤关键字
Stopwatch sw2 = new Stopwatch();
sw2.Start();
int time_cap = 2000;
string urlAddress = HttpContext.Server.MapPath("~/App_Data/KeyWord.txt");
TrieFilter tf = new TrieFilter();
using (StreamReader sw = new StreamReader(System.IO.File.OpenRead(urlAddress)))
{
string key = sw.ReadLine();
while (key != null)
{
if (key != string.Empty)
{
tf.AddKey(key);
}
key = sw.ReadLine();
}
}
if (!string.IsNullOrEmpty(content))
content = tf.Replace(content, ‘*‘);
#region 测试运行时间
//System.Diagnostics.Stopwatch sw1 = new System.Diagnostics.Stopwatch();
//sw1.Start();
//System.Threading.Thread.Sleep(time_cap);
//sw1.Stop();
//TimeSpan ts2 = sw1.Elapsed;
//double t = ts2.TotalMilliseconds;//运行时间
sw2.Stop();
TimeSpan ts3 = sw2.Elapsed;
double times = ts3.TotalMilliseconds;
Console.WriteLine("Stopwatch总共花费{0}ms.", ts3.TotalMilliseconds);
#endregion
#endregion