Быстрый поиск фразы в текст файле.
Расскажите плиз как этим чудом пользоваться???
unit BMUni;
(* -------------------------------------------------------------------
Ïîèñê ñòðîêè ìåòîäîì Boyer-Moore.
Ýòî - îäèí èç ñàìûõ áûñòðûõ àëãîðèòìîâ ïîèñêà ñòðîêè.
See a description in:
R. Boyer è S. Moore.
Áûñòðûé àëãîðèòì ïîèñêà ñòðîêè.
Communications of the ACM 20, 1977, ñòðàíèöû 762-772
------------------------------------------------------------------- *)
interface
type
{$IFDEF WINDOWS}
size_t = Word;
{$ELSE}
size_t = LongInt;
{$ENDIF}
type
TTranslationTable = array[char] of char; { òàáëèöà ïåðåâîäà }
TSearchBM = class(TObject)
private
FTranslate: TTranslationTable; { òàáëèöà ïåðåâîäà }
FJumpTable: array[char] of Byte; { òàáëèöà ïåðåõîäîâ }
FShift_1: integer;
FPattern: pchar;
FPatternLen: size_t;
public
procedure Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean);
procedure PrepareStr(const Pattern: string; IgnoreCase: Boolean);
function Search(Text: pchar; TextLen: size_t): pchar;
function Pos(const S: string): integer;
end;
implementation
uses SysUtils;
(* -------------------------------------------------------------------
Èãíîðèðóåì ðåãèñòð òàáëèöû ïåðåâîäà
------------------------------------------------------------------- *)
procedure CreateTranslationTable(var T: TTranslationTable; IgnoreCase: Boolean);
var
c: char;
begin
for c := #0 to #255 do
T[c] := c;
if not IgnoreCase then
exit;
for c := 'a' to 'z' do
T[c] := UpCase(c);
{ Ñâÿçûâàåì âñå íèæíèå ñèìâîëû ñ èõ ýêâèâàëåíòîì âåðõíåãî ðåãèñòðà }
T['Á'] := 'A';
T['À'] := 'A';
T['Ä'] := 'A';
T['Â'] := 'A';
T['á'] := 'A';
T['à'] := 'A';
T['ä'] := 'A';
T['â'] := 'A';
T['É'] := 'E';
T['È'] := 'E';
T['Ë'] := 'E';
T['Ê'] := 'E';
T['é'] := 'E';
T['è'] := 'E';
T['ë'] := 'E';
T['ê'] := 'E';
T['Í'] := 'I';
T['Ì'] := 'I';
T['Ï'] := 'I';
T['Î'] := 'I';
T['í'] := 'I';
T['ì'] := 'I';
T['ï'] := 'I';
T['î'] := 'I';
T['Ó'] := 'O';
T['Ò'] := 'O';
T['Ö'] := 'O';
T['Ô'] := 'O';
T['ó'] := 'O';
T['ò'] := 'O';
T['ö'] := 'O';
T['ô'] := 'O';
T['Ú'] := 'U';
T['Ù'] := 'U';
T['Ü'] := 'U';
T['Û'] := 'U';
T['ú'] := 'U';
T['ù'] := 'U';
T['ü'] := 'U';
T['û'] := 'U';
T['ñ'] := 'Ñ';
end;
(* -------------------------------------------------------------------
Ïîäãîòîâêà òàáëèöû ïåðåõîäîâ
------------------------------------------------------------------- *)
procedure TSearchBM.Prepare(Pattern: pchar; PatternLen: size_t;
IgnoreCase: Boolean);
var
i: integer;
c, lastc: char;
begin
FPattern := Pattern;
FPatternLen := PatternLen;
if FPatternLen < 1 then
FPatternLen := strlen(FPattern);
{ Äàííûé àëãîðèòì áàçèðóåòñÿ íà íàáîðå èç 256 ñèìâîëîâ }
if FPatternLen > 256 then
exit;
{ 1. Ïîäãîòîâêà òàáëèöû ïåðåâîäà }
CreateTranslationTable(FTranslate, IgnoreCase);
{ 2. Ïîäãîòîâêà òàáëèöû ïåðåõîäîâ }
for c := #0 to #255 do
FJumpTable[c] := FPatternLen;
for i := FPatternLen - 1 downto 0 do
begin
c := FTranslate[FPattern[i]];
if FJumpTable[c] >= FPatternLen - 1 then
FJumpTable[c] := FPatternLen - 1 - i;
end;
FShift_1 := FPatternLen - 1;
lastc := FTranslate[Pattern[FPatternLen - 1]];
for i := FPatternLen - 2 downto 0 do
if FTranslate[FPattern[i]] = lastc then
begin
FShift_1 := FPatternLen - 1 - i;
break;
end;
if FShift_1 = 0 then
FShift_1 := 1;
end;
procedure TSearchBM.PrepareStr(const Pattern: string; IgnoreCase: Boolean);
var
str: pchar;
begin
if Pattern <> '' then
begin
{$IFDEF Windows}
str := @Pattern[1];
{$ELSE}
str := pchar(Pattern);
{$ENDIF}
Prepare(str, Length(Pattern), IgnoreCase);
end;
end;
{ Ïîèñê ïîñëåäíåãî ñèìâîëà & ïðîñìîòð ñïðàâà íàëåâî }
function TSearchBM.Search(Text: pchar; TextLen: size_t): pchar;
var
shift, m1, j: integer;
jumps: size_t;
begin
result := nil;
if FPatternLen > 256 then
exit;
if TextLen < 1 then
TextLen := strlen(Text);
m1 := FPatternLen - 1;
shift := 0;
jumps := 0;
{ Ïîèñê ïîñëåäíåãî ñèìâîëà }
while jumps <= TextLen do
begin
Inc(Text, shift);
shift := FJumpTable[FTranslate[Text^]];
while shift <> 0 do
begin
Inc(jumps, shift);
if jumps > TextLen then
exit;
Inc(Text, shift);
shift := FJumpTable[FTranslate[Text^]];
end;
{ Ñðàâíèâàåì ñïðàâà íàëåâî FPatternLen - 1 ñèìâîëîâ }
if jumps >= m1 then
begin
j := 0;
while FTranslate[FPattern[m1 - j]] = FTranslate[(Text - j)^] do
begin
Inc(j);
if j = FPatternLen then
begin
result := Text - m1;
exit;
end;
end;
end;
shift := FShift_1;
Inc(jumps, shift);
end;
end;
function TSearchBM.Pos(const S: string): integer;
var
str, p: pchar;
begin
result := 0;
if S <> '' then
begin
{$IFDEF Windows}
str := @S[1];
{$ELSE}
str := pchar(S);
{$ENDIF}
p := Search(str, Length(S));
if p <> nil then
result := 1 + p - str;
end;
end;
end.
|