TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm

unit Searches;

(*-----------------------------------------------------------------------------*
|  Components        TSearch & TFileSearch                                     |
|  Version:          2.2                                                       |
|  Last Update:      10 June 2004                                              |
|  Compilers:        Delphi 3 - Delphi 7                                       |
|  Author:           Angus Johnson - angusj-AT-myrealbox-DOT-com               |
|  Copyright:        © 2001 -2004  Angus Johnson                               |
|                                                                              |
|  Description:      Delphi implementation of the                              |
|                    Boyer-Moore-Horspool search algorithm.                    |
*-----------------------------------------------------------------------------*)

//10.06.04: Added support for widestring searches

interface

uses
  windows, sysutils, classes;

type

  TBaseSearch = class(TComponent)
  private
     fPos            : pchar;
     fEnd            : pchar;
     fPattern        : string;
     fPatLen         : integer;
     fPatInitialized : boolean;
     fCaseSensitive  : boolean;
     JumpShift       : integer;
     Shift           : array[#0..#255] of integer;
     CaseBlindTable  : array[#0..#255] of char;
     procedure InitPattern;
     procedure MakeCaseBlindTable;
     procedure SetCaseSensitive(CaseSens: boolean);
     procedure SetPattern(const Pattern: string);
     procedure SetWsPattern(const WsPattern: widestring);
     function  FindCaseSensitive: integer;
     function  FindCaseInsensitive: integer;
  protected
     fStart          : pchar;
     fDataLength     : integer;
     procedure ClearData;
     procedure SetData(Data: pchar; DataLength: integer); virtual;
  public
     constructor Create(aowner: tcomponent); override;
     destructor  Destroy; override;
     //The following Find functions return the 0 based offset of Pattern
     //else POSITION_EOF (-1) if the Pattern is not found  ...
     function  FindFirst: integer;
     function  FindNext: integer;
     function  FindFrom(StartPos: integer): integer;
     //To simplify searching for widestring patterns -
     //assign the WsPattern property instead of the Pattern property
     property WsPattern: widestring write SetWsPattern;
     property Data: pchar read fStart;
     property DataSize: integer read fDataLength;
  published
     property CaseSensitive: boolean read fCaseSensitive write SetCaseSensitive;
     property  Pattern: string read fPattern write SetPattern;
  end;

  TSearch = class(TBaseSearch)
  public
    //Changes visibility of base SetData() method to public ...
    //Note: TSearch does NOT own the data. To avoid the overhead of
    //copying it, it just gets a pointer to it.
    procedure SetData(Data: pchar; DataLength: integer); override;
  end;

  TFileSearch = class(TBaseSearch)
  private
    fFilename: string;
    procedure SetFilename(const Filename: string);
    procedure Closefile;
  public
    destructor Destroy; override;
  published
    //Assigning ‘Filename‘ creates a memory map of the named file.
    //This memory mapping will be closed when either the Filename property is
    //assigned to ‘‘ or the FileSearch object is destroyed.
    property Filename: string read fFilename write SetFilename;
  end;

procedure Register;

const
  POSITION_EOF = -1;

implementation

procedure Register;
begin
  RegisterComponents(‘Samples‘, [TSearch, TFileSearch]);
end;

//------------------------------------------------------------------------------
// TBaseSearch methods ...
//------------------------------------------------------------------------------

procedure TBaseSearch.MakeCaseBlindTable;
var
  i: char;
begin
  for i:= #0 to #255 do
     CaseBlindTable[i]:= ansilowercase(i)[1];
end;
//------------------------------------------------------------------------------

constructor TBaseSearch.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);
  fStart := nil;
  fPattern := ‘‘;
  fPatLen := 0;
  MakeCaseBlindTable;
  fCaseSensitive := false;      //Default to case insensitive searches.
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

destructor TBaseSearch.Destroy;
begin
  ClearData;
  inherited Destroy;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.ClearData;
begin
  fStart := nil;
  fPos := nil;
  fEnd := nil;
  fDataLength := 0;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetPattern(const Pattern: string);
begin
  if fPattern = Pattern then exit;
  fPattern := Pattern;
  fPatLen := length(Pattern);
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetWsPattern(const WsPattern: widestring);
begin
  fPatLen := length(WsPattern)*2;
  fPatInitialized := false;
  if fPatLen = 0 then exit;
  SetString(fPattern, pchar(pointer(WsPattern)), fPatLen);
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetData(Data: pchar; DataLength: integer);
begin
  ClearData;
  if (Data = nil) or (DataLength < 1) then exit;
  fStart := Data;
  fDataLength := DataLength;
  fEnd := fStart + fDataLength;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.SetCaseSensitive(CaseSens: boolean);
begin
  if fCaseSensitive = CaseSens then exit;
  fCaseSensitive := CaseSens;
  fPatInitialized := false;
end;
//------------------------------------------------------------------------------

procedure TBaseSearch.InitPattern;
var
  j: integer;
  i: char;
begin
  if fPatLen = 0 then exit;
  for i := #0 to #255 do Shift[i]:= fPatLen;
  if fCaseSensitive then
  begin
    for j := 1 to fPatLen-1 do
      Shift[fPattern[j]]:= fPatLen - j;
    JumpShift := Shift[fPattern[fPatLen]];
    Shift[fPattern[fPatLen]] := 0;
  end else
  begin
    for j := 1 to fPatLen-1 do
      Shift[CaseBlindTable[fPattern[j]]]:= fPatLen - j;
    JumpShift := Shift[CaseBlindTable[fPattern[fPatLen]]];
    Shift[CaseBlindTable[fPattern[fPatLen]]] := 0;
  end;
  fPatInitialized := true;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindFirst: integer;
begin
  fPos := fStart+fPatLen-1;
  result := FindNext;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindFrom(StartPos: integer): integer;
begin
  if StartPos < fPatLen-1 then  //ie: StartPos must never be less than fPatLen-1
    fPos := fStart+fPatLen-1 else
    fPos := fStart+StartPos;
  result := FindNext;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindNext: integer;
begin
  if not fPatInitialized then InitPattern;
  if (fPatLen = 0) or (fPatLen >= fDataLength) or (fPos >= fEnd) then
  begin
     fPos := fEnd;
     result := POSITION_EOF;
     exit;
  end;
  if fCaseSensitive then
    result := FindCaseSensitive else
    result := FindCaseInsensitive;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindCaseSensitive: integer;
var
  i: integer;
  j: pchar;
begin
  result:= POSITION_EOF;
  while fPos < fEnd do
  begin
    i := Shift[fPos^];        //test last character first
    if i <> 0 then            //last char does not match
      inc(fPos,i)
    else
    begin                     //last char matches at least
      i := 1;
      j := fPos - fPatLen;
      while (i < fPatLen) and (fPattern[i] = (j+i)^) do inc(i);
      if (i = fPatLen) then
      begin
         result:= fPos-fStart-fPatLen+1;
         inc(fPos,fPatLen);
         break;               //FOUND!
      end
      else
        inc(fPos,JumpShift);
    end;
  end;
end;
//------------------------------------------------------------------------------

function TBaseSearch.FindCaseInsensitive: integer;
var
  i: integer;
  j: pchar;
begin
  result:= POSITION_EOF;
  while fPos < fEnd do
  begin
    i := Shift[CaseBlindTable[fPos^]];   //test last character first
    if i <> 0 then                       //last char does not match
      inc(fPos,i)
    else
    begin                                //last char matches at least
      i := 1;
      j := fPos - fPatLen;
      while (i < fPatLen) and
            (CaseBlindTable[fPattern[i]] = CaseBlindTable[(j+i)^]) do inc(i);
      if (i = fPatLen) then
      begin
         result:= fPos-fStart-fPatLen+1;
         inc(fPos,fPatLen);
         break;                          //FOUND!
      end
      else
        inc(fPos,JumpShift);
    end;
  end;
end;

//------------------------------------------------------------------------------
// TSearch methods ...
//------------------------------------------------------------------------------

procedure TSearch.SetData(Data: pchar; DataLength: integer);
begin
  inherited; //changes visibility of base method from protected to public
end;

//------------------------------------------------------------------------------
// TFileSearch methods ...
//------------------------------------------------------------------------------

destructor TFileSearch.Destroy;
begin
  CloseFile;
  inherited Destroy;
end;
//------------------------------------------------------------------------------

procedure TFileSearch.SetFilename(const Filename: string);
var
   filehandle: integer;
   filemappinghandle: thandle;
   size, highsize: integer;
begin
  if (csDesigning in ComponentState) then
  begin
    fFilename := Filename;
    exit;
  end;
  CloseFile;
  if (Filename = ‘‘) or not FileExists(Filename) then exit;
  filehandle := sysutils.FileOpen(Filename, fmopenread or fmsharedenynone);
  if filehandle = 0 then exit;                //error
  size := GetFileSize(filehandle, @highsize);
  if (size <= 0) or (highsize <> 0) then      //nb: files >2 gig not supported
  begin
     CloseHandle(filehandle);
     exit;
  end;
  filemappinghandle :=
    CreateFileMapping(filehandle, nil, page_readonly, 0, 0, nil);
  if GetLastError = error_already_exists then filemappinghandle := 0;
  if filemappinghandle <> 0 then
    SetData(MapViewOfFile(filemappinghandle,file_map_read,0,0,0),size);
  if fStart <> nil then fFilename := Filename;
  CloseHandle(filemappinghandle);
  CloseHandle(filehandle);
end;
//------------------------------------------------------------------------------

procedure TFileSearch.CloseFile;
begin
   if (csDesigning in ComponentState) then exit;
   if (fStart <> nil) then UnmapViewOfFile(fStart);
   fFilename := ‘‘;
   ClearData;
end;
//------------------------------------------------------------------------------

end.
时间: 2024-12-18 23:37:04

TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm的相关文章

LeetcodeOJ: Implement strStr() [ Boyer–Moore string search algorithm ]

1 class Solution { 2 public: 3 int strStr(char *haystack, char *needle) { 4 5 int i = 0 , skip[256]; 6 char *str = haystack, *substr = needle; 7 int len_src = strlen(str), len_sub = strlen(substr); 8 // preprocess 9 for (i = 0; i < 256; i++) 10 skip[

Moore&#39;s voting algorithm

算法的基本思想 这个算法是解决这样一个问题:从一个数组中找出出现半数以上的元素.他的基本思想是:每次都找出一对不同的元素,从数组中删掉,直到数组为空或只有一种元素. 不难证明,如果存在元素e出现频率超过半数,那么数组中最后剩下的就只有e. 算法的实现 1 int majorityElement(vector<int> &num) 2 { 3 int curIdx = 0, count = 1; 4 for (int i = 1; i < num.size(); ++i) 5 {

Boyer Moore算法(字符串匹配)

上一篇文章,我介绍了KMP算法. 但是,它并不是效率最高的算法,实际采用并不多.各种文本编辑器的"查找"功能(Ctrl+F),大多采用Boyer-Moore算法. Boyer-Moore算法不仅效率高,而且构思巧妙,容易理解.1977年,德克萨斯大学的Robert S. Boyer教授和J Strother Moore教授发明了这种算法. 下面,我根据Moore教授自己的例子来解释这种算法. 1. 假定字符串为"HERE IS A SIMPLE EXAMPLE",搜

Task 待学习内容【Moore majority vote algorithm(摩尔投票算法)】

https://www.cnblogs.com/zhonghuasong/p/6536665.html 参见:https://my.oschina.net/u/2277632/blog/2873164 原文地址:https://www.cnblogs.com/leodaxin/p/11355590.html

leetcode 229. Majority Element II(多数投票算法)

就是简单的应用多数投票算法(Boyer–Moore majority vote algorithm),参见这道题的题解. class Solution { public: vector<int> majorityElement(vector<int>& nums) { int cnt1=0,cnt2=0,ans1=0,ans2=1; for(auto n:nums){ if(n==ans1){ cnt1++; } else if(n==ans2){ cnt2++; } el

[LeetCode] 229. Majority Element II 多数元素 II

Given an integer array of size n, find all elements that appear more than ⌊ n/3 ⌋ times. Note: The algorithm should run in linear time and in O(1) space. Example 1: Input: [3,2,3] Output: [3] Example 2: Input: [1,1,1,3,3,2,2,2] Output: [1,2] 169. Maj

ESX Patch Version

For hosts with RDM disks, please set the RDM disk  parameter " IsPerenniallyReserved" to true before the patching. http://pingforinfo.com/powercli-script-to-set-perennial-reservation-to-all-rdm-luns-in-a-cluster/ Three useful website for VMware

字符串匹配算法总结

转自:http://blog.csdn.net/zdl1016/archive/2009/10/11/4654061.aspx 我想说一句“我日,我讨厌KMP!”.KMP虽然经典,但是理解起来极其复杂,好不容易理解好了,便起码来巨麻烦!老子就是今天图书馆在写了几个小时才勉强写了一个有bug的.效率不高的KMP,特别是计算next数组的部分. 其实,比KMP算法速度快的算法大把大把,而且理解起来更简单,为何非要抓住KMP呢?笔试出现字符串模式匹配时直接上sunday算法,既简单又高效,何乐而不为?

字符串匹配(BF,BM,Sunday,KMP算法解析)

字符串匹配一直是计算机领域热门的研究问题之一,多种算法层出不穷.字符串匹配算法有着很强的实用价值,应用于信息搜索,拼写检查,生物信息学等多个领域. 今天介绍几种比较有名的算法: 1. BF 2. BM 3. Sunday 4. KMP -,BF算法 BF(Brute Force)算法又称为暴力匹配算法,是普通模式匹配算法. 其算法思想很简单,从主串S的第pos个字符开始,和模式串T的第一个字符进行比较,若相等,则主串和模式串都后移一个字符继续比较:若不相同,则回溯到主串S的第pos+1个字符重新