|
There is my implementation of the Boyer-Moore string search algorithm in C++. The way the good-match table (skip[]) is created is the fastest I could imagine. It sure is faster than the way it's done in the example implementation I used as a starting point. It was from en.wikipedia.org. #include <limits.h>
#define max(a,b) ((a) < (b) ? (b) : (a))
/* pattern: word searched
* pl : length of pattern in characters
* text : text in which we search the word
* tl : length of text in characters
*
* Search pattern in text and returns the position of the first
* match, returns -1 if there is no match.
*/
int search(const char * pattern, int pl, const char * text, int tl)
{
int * skip = new int[pl-1]; // good-match table
int occ[UCHAR_MAX+1]; // shifts for different characters
int i;
if (pl > tl || pl <= 1 || !pattern || !text) return -1;
for (i = 0; i < pl-1; i++) skip[i] = pl;
// ends: position of the last character in the compared substring
bool before = false;
for (int ends = pl-2; ends >= 0 && !before; ends--)
{
bool foundDiff = false;
// i: length of the searched substring
for (i = 0; i < pl && !foundDiff && ends-i >= 0; i++)
{
// ends-i: start of the compared substring
// pl-i-1: start of the searched substring
if (pattern[ends-i] != pattern[pl-i-1])
{
foundDiff = true;
if (skip[i] == pl)
{
skip[i] = pl - ends - 1;
}
}
}
if (!foundDiff && ends-i < 0)
{
before = true;
for (int j = i; j < pl - 1; j++) skip[j] = pl - ends - 1;
}
}
for(i=0; i < UCHAR_MAX+1; i++) occ[i] = -1;
for (i = 0; i < pl-1; i++) occ[pattern[i]] = i;
int hpos = 0;
while (hpos <= tl-pl)
{
int npos = pl-1;
while (pattern[npos] == text[npos+hpos])
{
if (npos == 0) return hpos;
npos--;
}
hpos += max(skip[npos], npos - occ[text[npos+hpos]]);
}
delete skip;
return -1;
}
Algorithm from: Robert S. Boyer and J. Strother Moore. A Fast String Searching Algorithm. Communications of the ACM, vol. 20 no 10 , pp. 762-772 (1977).
|