summaryrefslogtreecommitdiff
path: root/reference/C/CONTRIB/SNIP/bmhasrch.c
blob: 6eb21dac67c8b4d63bd6f8dcbcced5a5c25efc93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*
**  Boyer-Moore-Horspool pattern match
**  Case-insensitive with accented character translation
**
**  public domain by Raymond Gardner 7/92
**
**  limitation: pattern length + subject length must be less than 32767
**
**  10/21/93 rdg  Fixed bug found by Jeff Dunlop
*/
#include <limits.h>                                         /* rdg 10/93 */
#include <stddef.h>
#include <string.h>

typedef unsigned char uchar;

#define LOWER_ACCENTED_CHARS

unsigned char lowervec[UCHAR_MAX+1] = {                     /* rdg 10/93 */
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,
#ifdef LOWER_ACCENTED_CHARS
'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a',
'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159,
'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175,
#else
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
#endif
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
};

#define lowerc(c) lowervec[(uchar)(c)]

#define LARGE 32767

static int patlen;
static int skip[UCHAR_MAX+1];                               /* rdg 10/93 */
static int skip2;
static uchar *pat;

void bmh_init(const char *pattern)
{
      int i, j;
      pat = (uchar *)pattern;
      patlen = strlen(pattern);
      for (i = 0; i <= UCHAR_MAX; ++i)                      /* rdg 10/93 */
      {
            skip[i] = patlen;
            for (j = patlen - 1; j >= 0; --j)
            {
                  if (lowerc(i) == lowerc(pat[j]))
                        break;
            }
            if (j >= 0)
                  skip[i] = patlen - j - 1;
            if (j == patlen - 1)
                  skip[i] = LARGE;
      }
      skip2 = patlen;
      for (i = 0; i < patlen - 1; ++i)
      {
            if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) )
                  skip2 = patlen - i - 1;
      }
}

char *bmh_search(const char *string, const int stringlen)
{
      int i, j;
      char *s;

      i = patlen - 1 - stringlen;
      if (i >= 0)
            return NULL;
      string += stringlen;
      for ( ;; )
      {
            while ((i += skip[((uchar *)string)[i]]) < 0)
                  ;                           /* mighty fast inner loop */
            if (i < (LARGE - stringlen))
                  return NULL;
            i -= LARGE;
            j = patlen - 1;
            s = (char *)string + (i - j);
            while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j]))
                  ;
            if ( j < 0 )                                    /* rdg 10/93 */
                  return s;                                 /* rdg 10/93 */
            if ( (i += skip2) >= 0 )                        /* rdg 10/93 */
                  return NULL;                              /* rdg 10/93 */
      }
}