summaryrefslogtreecommitdiff
path: root/reference/C/CONTRIB/SNIP/approx.c
diff options
context:
space:
mode:
Diffstat (limited to 'reference/C/CONTRIB/SNIP/approx.c')
-rwxr-xr-xreference/C/CONTRIB/SNIP/approx.c168
1 files changed, 168 insertions, 0 deletions
diff --git a/reference/C/CONTRIB/SNIP/approx.c b/reference/C/CONTRIB/SNIP/approx.c
new file mode 100755
index 0000000..319525c
--- /dev/null
+++ b/reference/C/CONTRIB/SNIP/approx.c
@@ -0,0 +1,168 @@
+/***************************************************************
+ *
+ * Fuzzy string searching subroutines
+ *
+ * Author: John Rex
+ * Date: August, 1988
+ * References: (1) Computer Algorithms, by Sara Baase
+ * Addison-Wesley, 1988, pp 242-4.
+ * (2) Hall PAV, Dowling GR: "Approximate string matching",
+ * ACM Computing Surveys, 12:381-402, 1980.
+ *
+ * Verified on:
+ * Datalite, DeSmet, Ecosoft, Lattice, MetaWare, MSC, Turbo, Watcom
+ *
+ * Compile time preprocessor switches:
+ * DEBUG - if defined, include test driver
+ *
+ * Usage:
+ *
+ * char *pattern, *text; - search for pattern in text
+ * int degree; - degree of allowed mismatch
+ * char *start, *end;
+ * int howclose;
+ *
+ * void App_init(pattern, text, degree); - setup routine
+ * void App_next(&start, &end, &howclose); - find next match
+ *
+ * - searching is done when App_next() returns start==NULL
+ *
+ **************************************************************/
+
+#define DEBUG 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* local, static data */
+
+static char *Text, *Pattern; /* pointers to search strings */
+static int Textloc; /* current search position in Text */
+static int Plen; /* length of Pattern */
+static int Degree; /* max degree of allowed mismatch */
+static int *Ldiff, *Rdiff; /* dynamic difference arrays */
+static int *Loff, *Roff; /* used to calculate start of match */
+
+void App_init(char *pattern, char *text, int degree)
+{
+ int i;
+
+ /* save parameters */
+
+ Text = text;
+ Pattern = pattern;
+ Degree = degree;
+
+ /* initialize */
+
+ Plen = strlen(pattern);
+ Ldiff = (int *) malloc(sizeof(int) * (Plen + 1) * 4);
+ Rdiff = Ldiff + Plen + 1;
+ Loff = Rdiff + Plen + 1;
+ Roff = Loff + Plen + 1;
+ for (i = 0; i <= Plen; i++)
+ {
+ Rdiff[i] = i; /* initial values for right-hand column */
+ Roff[i] = 1;
+ }
+
+ Textloc = -1; /* current offset into Text */
+}
+
+void App_next(char **start, char **end, int *howclose)
+{
+ int *temp, a, b, c, i;
+
+ *start = NULL;
+ while (*start == NULL) /* start computing columns */
+ {
+ if (Text[++Textloc] == '\0') /* out of text to search! */
+ break;
+
+ temp = Rdiff; /* move right-hand column to left ... */
+ Rdiff = Ldiff; /* ... so that we can compute new ... */
+ Ldiff = temp; /* ... right-hand column */
+ Rdiff[0] = 0; /* top (boundary) row */
+
+ temp = Roff; /* and swap offset arrays, too */
+ Roff = Loff;
+ Loff = temp;
+ Roff[1] = 0;
+
+ for (i = 0; i < Plen; i++) /* run through pattern */
+ {
+ /* compute a, b, & c as the three adjacent cells ... */
+
+ if (Pattern[i] == Text[Textloc])
+ a = Ldiff[i];
+ else a = Ldiff[i] + 1;
+ b = Ldiff[i+1] + 1;
+ c = Rdiff[i] + 1;
+
+ /* ... now pick minimum ... */
+
+ if (b < a)
+ a = b;
+ if (c < a)
+ a = c;
+
+ /* ... and store */
+
+ Rdiff[i+1] = a;
+ }
+
+ /* now update offset array */
+ /* the values in the offset arrays are added to the
+ current location to determine the beginning of the
+ mismatched substring. (see text for details) */
+
+ if (Plen > 1) for (i=2; i<=Plen; i++)
+ {
+ if (Ldiff[i-1] < Rdiff[i])
+ Roff[i] = Loff[i-1] - 1;
+ else if (Rdiff[i-1] < Rdiff[i])
+ Roff[i] = Roff[i-1];
+ else if (Ldiff[i] < Rdiff[i])
+ Roff[i] = Loff[i] - 1;
+ else /* Ldiff[i-1] == Rdiff[i] */
+ Roff[i] = Loff[i-1] - 1;
+ }
+
+ /* now, do we have an approximate match? */
+
+ if (Rdiff[Plen] <= Degree) /* indeed so! */
+ {
+ *end = Text + Textloc;
+ *start = *end + Roff[Plen];
+ *howclose = Rdiff[Plen];
+ }
+ }
+
+ if (start == NULL) /* all done - free dynamic arrays */
+ free(Ldiff);
+}
+
+#ifdef DEBUG
+
+void main(int argc, char **argv)
+{
+ char *begin, *end;
+ int howclose;
+
+ if (argc != 4)
+ {
+ puts("Usage is: approx pattern text degree\n");
+ exit(0);
+ }
+
+ App_init(argv[1], argv[2], atoi(argv[3]));
+ App_next(&begin, &end, &howclose);
+ while (begin != NULL)
+ {
+ printf("Degree %d: %.*s\n", howclose, end-begin+1, begin);
+ App_next(&begin, &end, &howclose);
+ }
+}
+
+#endif