搜索文件夹内容的小工具
两种搜索模式的算法:
BoyerMooreSearch.cs
using System.Threading; using System.Collections.Generic; using System.Linq; namespace Finder.Algorithms { /// <summary> /// An implemention of Boyer-Moore algorithm. /// <para/>author : Ornithopter /// </summary> class BoyerMooreSearch : SearchBase { /// <summary> /// /// </summary> /// <param name="source"></param> /// <param name="pattern"></param> /// <param name="token"></param> /// <returns>An array of matched index</returns> public int[] Search(string source, string pattern, CancellationToken token) { var matchIndexes = new List<int>(); // step increasment. int delta; // prepare a map providing delta for each char in pattern string. var deltaMap = CreateDeltaMap(pattern); // start searching. for (var i = pattern.Length - 1; i < source.Length; i += delta) { token.ThrowIfCancellationRequested(); // find next match and update delta. if (FindNext(source, pattern, i, deltaMap, token, out delta)) { // add to result list if found. matchIndexes.Add(i - (pattern.Length - 1)); } } return matchIndexes.ToArray(); } private static bool Match(string source, int[] deltaMap, string pattern, CancellationToken token) { // step increasment. int delta; // start searching. for (var i = pattern.Length - 1; i < source.Length; i += delta) { token.ThrowIfCancellationRequested(); // find next match and update delta. if (FindNext(source, pattern, i, deltaMap, token, out delta)) { return true; } } return false; } /// <summary> /// Find the next matched index and update delte at the same time. /// </summary> /// <param name="source"></param> /// <param name="pattern"></param> /// <param name="start"></param> /// <param name="deltaMap"></param> /// <param name="delta"></param> /// <returns>true if found one, otherwise false.</returns> private static bool FindNext(string source, string pattern, int start, int[] deltaMap, CancellationToken token, out int delta) { int i = pattern.Length - 1, index = 0; // start comparing from the last char in pattern. while (source[start - index] == pattern[i - index]) { token.ThrowIfCancellationRequested(); if (index != pattern.Length - 1) { index++; } else { // matchs to the end. So it‘s a search result. delta = pattern.Length; return true; } } // found one dismatched char at (start - index), get delta from map. var c = source[start - index]; delta = /*c > 128 ? 0 : */deltaMap[c]; if (delta == 0) { // this means the source[start] char is the last char in pattern // and only appears once. So delta should be the length of pattern. delta = pattern.Length; } return false; } static int[] CreateDeltaMap(string pattern) { const int alphabetSize = 0xffff; var patternLength = pattern.Length; var deltaMap = new int[alphabetSize]; // initialize the map. for (var i = 0; i < alphabetSize; i++) { deltaMap[i] = patternLength; } // start from 0, which means any duplicated char will only have // the index nearest to the end. for (var i = 0; i < patternLength; i++) { var index = pattern[i]; deltaMap[index] = patternLength - i - 1; } return deltaMap; } protected override void Build(CancellationToken token) { //throw new NotImplementedException(); } public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token) { var fileList = FileList; var deltaMap = CreateDeltaMap(keyword); return fileList.Where(filePath => Match(ReadContent(filePath), deltaMap, keyword, token)).ToArray(); } } }
Trie.cs
using System.Threading; using System; using System.Collections.Generic; using System.Linq; namespace Finder.Algorithms { class Trie : SearchBase { class Node { public Dictionary<char, Node> Next { get; private set; } public Dictionary<int, bool> FileIndex { get; private set; } public Node() { FileIndex = new Dictionary<int, bool>(); Next = new Dictionary<char, Node>(); } } private static readonly HashSet<char> Ignorance = new HashSet<char> { ‘[‘, ‘(‘, ‘{‘, }; private static readonly HashSet<char> Spliters = new HashSet<char> { ‘ ‘, ‘,‘, ‘\n‘, ‘\r‘, ‘;‘, ‘]‘, ‘)‘, ‘}‘, }; private Node _root; protected override void Build(CancellationToken token) { _root = new Node(); var index = 0; foreach (var filePath in FileList) { token.ThrowIfCancellationRequested(); Build(index++, ReadContent(filePath)); } } private void Build(int fileIndex, string content) { var node = _root; foreach (var t in content) { var c = t; if (Ignorance.Contains(c)) { continue; } if (Spliters.Contains(c)) { node.FileIndex[fileIndex] = true; node = _root; continue; } /*if (!Char.IsLetter(c)) continue;*/ if (Char.IsUpper(c)) c = Char.ToLower(c); Node next; if (!node.Next.TryGetValue(c, out next)) { next = new Node(); node.Next[c] = next; } node = next; node.FileIndex[fileIndex] = false; } node.FileIndex[fileIndex] = true; } private string[] Search(string keyword, bool matchWholeWord, CancellationToken token) { var node = _root; foreach (var t in keyword) { token.ThrowIfCancellationRequested(); var c = t; if (Char.IsUpper(c)) c = Char.ToLower(c); Node next; if (!node.Next.TryGetValue(c, out next)) { return new string[0]; } node = next; } if (matchWholeWord) { return node.FileIndex.Where(_ => _.Value).Select(_ => FileList[_.Key]).ToArray(); } return node.FileIndex.Select(_ => FileList[_.Key]).ToArray(); } public const string ConfigMatchWholeWord = "ConfigMatchWholeWord"; public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token) { var matchWholeWord = (bool)config[ConfigMatchWholeWord]; return Search(keyword, matchWholeWord, token); } } }
Finder(文件内容搜索工具)
时间: 2024-10-14 20:14:49