/* ** Boyer-Moore-Horspool pattern match ** Case-insensitive with accented character translation ** ** public domain by Raymond Gardner 7/92 ** ** limitation: pattern length + subject length must be less than 32767 ** ** 10/21/93 rdg Fixed bug found by Jeff Dunlop */ #include /* rdg 10/93 */ #include #include typedef unsigned char uchar; #define LOWER_ACCENTED_CHARS unsigned char lowervec[UCHAR_MAX+1] = { /* rdg 10/93 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/', '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', '@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', 'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_', '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127, #ifdef LOWER_ACCENTED_CHARS 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a', 'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159, 'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175, #else 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, #endif 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, }; #define lowerc(c) lowervec[(uchar)(c)] #define LARGE 32767 static int patlen; static int skip[UCHAR_MAX+1]; /* rdg 10/93 */ static int skip2; static uchar *pat; void bmh_init(const char *pattern) { int i, j; pat = (uchar *)pattern; patlen = strlen(pattern); for (i = 0; i <= UCHAR_MAX; ++i) /* rdg 10/93 */ { skip[i] = patlen; for (j = patlen - 1; j >= 0; --j) { if (lowerc(i) == lowerc(pat[j])) break; } if (j >= 0) skip[i] = patlen - j - 1; if (j == patlen - 1) skip[i] = LARGE; } skip2 = patlen; for (i = 0; i < patlen - 1; ++i) { if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) ) skip2 = patlen - i - 1; } } char *bmh_search(const char *string, const int stringlen) { int i, j; char *s; i = patlen - 1 - stringlen; if (i >= 0) return NULL; string += stringlen; for ( ;; ) { while ((i += skip[((uchar *)string)[i]]) < 0) ; /* mighty fast inner loop */ if (i < (LARGE - stringlen)) return NULL; i -= LARGE; j = patlen - 1; s = (char *)string + (i - j); while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j])) ; if ( j < 0 ) /* rdg 10/93 */ return s; /* rdg 10/93 */ if ( (i += skip2) >= 0 ) /* rdg 10/93 */ return NULL; /* rdg 10/93 */ } }