pcosmos.ca

The way the good-match table (skip[]) is created is the fastest I could imagine. It sure is faster than the way it's done in the example implementation I used as a starting point. It was from en.wikipedia.org.

#include <climits>
#define max(a,b)   ((a) < (b) ? (b) : (a))

/* word: word searched
 * wl  : length of word in characters
 * text: text in which we search the word
 * tl  : length of text in characters
 *
 * Search word in text and returns the index of the first  
 * match, returns -1 if there is no match or invalid parameters.
 */
int search(const char * word, int wl, const char * text, int tl)
{
   if (wl > tl || wl <= 1 || !word || !text) return -1;

   int * skip = new int[wl];  // good-match table
   int occ[UCHAR_MAX+1];  // shifts for different characters
   int i;

   for (i = 0; i < wl; i++) skip[i] = wl;

   for (int ends = 0; ends < wl - 1; ends++)
   {
      /* The pattern we examine goes from indexes wl-i-1 to wl-1
       * the first character of the pattern is not word[wl-i-1].
       * We try to match it with the subword going from indexes
       * ends-i to ends.
       */
      i = 0;
      bool match = true;

      while (match && ends-i >= 0)
      {
         match = word[ends-i] == word[wl-i-1];
         if (!match) skip[wl-1-i] = wl - ends - 1;
         // (wl - ends - 1) is (wl-1-i) - (ends-i)

         i++;
      }

      if (match && ends-i < 0)
      {
         while (i < wl)
         {
            skip[wl-1-i] = wl - ends - 1;
            i++;
         }
      }
   }


   for (i = 0; i < UCHAR_MAX+1; i++) occ[i] = -1;
   for (i = 0; i < wl-1; i++) occ[word[i]] = i;


   int hpos = 0;
   while (hpos <= tl-wl)
   {
      int npos = wl-1;
      while (word[npos] == text[npos+hpos])
      {
         if (npos == 0)
         {
            delete [] skip;
            return hpos;
         }
         npos--;
      }
      // added cast or else index < 0 can happen
      hpos += max(skip[npos], npos - occ[(unsigned char) text[npos+hpos]]);
   }

   delete [] skip;
   return -1;
}

Algorithm from: Robert S. Boyer and J. Strother Moore. A Fast String Searching Algorithm. Communications of the ACM, vol. 20 no 10 , pp. 762-772 (1977).

back to computer science

Mobile