|
There is my implementation of the Boyer-Moore string search algorithm in C++. The way the good-match table (skip[]) is created is the fastest I could imagine. It sure is faster than the way it's done in the example implementation I used as a starting point. It was from en.wikipedia.org. #include <limits.h> #define max(a,b) ((a) < (b) ? (b) : (a)) /* pattern: word searched * pl : length of pattern in characters * text : text in which we search the word * tl : length of text in characters * * Search pattern in text and returns the position of the first * match, returns -1 if there is no match. */ int search(const char * pattern, int pl, const char * text, int tl) { int * skip = new int[pl-1]; // good-match table int occ[UCHAR_MAX+1]; // shifts for different characters int i; if (pl > tl || pl <= 1 || !pattern || !text) return -1; for (i = 0; i < pl-1; i++) skip[i] = pl; // ends: position of the last character in the compared substring bool before = false; for (int ends = pl-2; ends >= 0 && !before; ends--) { bool foundDiff = false; // i: length of the searched substring for (i = 0; i < pl && !foundDiff && ends-i >= 0; i++) { // ends-i: start of the compared substring // pl-i-1: start of the searched substring if (pattern[ends-i] != pattern[pl-i-1]) { foundDiff = true; if (skip[i] == pl) { skip[i] = pl - ends - 1; } } } if (!foundDiff && ends-i < 0) { before = true; for (int j = i; j < pl - 1; j++) skip[j] = pl - ends - 1; } } for(i=0; i < UCHAR_MAX+1; i++) occ[i] = -1; for (i = 0; i < pl-1; i++) occ[pattern[i]] = i; int hpos = 0; while (hpos <= tl-pl) { int npos = pl-1; while (pattern[npos] == text[npos+hpos]) { if (npos == 0) return hpos; npos--; } hpos += max(skip[npos], npos - occ[text[npos+hpos]]); } delete skip; return -1; } Algorithm from: Robert S. Boyer and J. Strother Moore. A Fast String Searching Algorithm. Communications of the ACM, vol. 20 no 10 , pp. 762-772 (1977).
|