/* EPSHeader File: match.c Author: J. Kercheval Created: Sat, 01/05/1991 22:21:49 */ /* EPSRevision History J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them) J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche() J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code J. Kercheval Sun, 03/10/1991 20:37:11 beef up main() J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain */ /* Wildcard Pattern Matching */ #include "match.h" int matche_after_star (register char *pattern, register char *text); int fast_match_after_star (register char *pattern, register char *text); /*---------------------------------------------------------------------------- * * Return TRUE if PATTERN has any special wildcard characters * ----------------------------------------------------------------------------*/ BOOLEAN is_pattern (char *p) { while (*p) { switch (*p++) { case '?': case '*': case '[': case '\\': return TRUE; } } return FALSE; } /*---------------------------------------------------------------------------- * * Return TRUE if PATTERN has is a well formed regular expression according * to the above syntax * * error_type is a return code based on the type of pattern error. Zero is * returned in error_type if the pattern is a valid one. error_type return * values are as follows: * * PATTERN_VALID - pattern is well formed * PATTERN_ESC - pattern has invalid escape ('\' at end of pattern) * PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-]) * PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g ) * PATTERN_EMPTY - [..] construct is empty (ie []) * ----------------------------------------------------------------------------*/ BOOLEAN is_valid_pattern (char *p, int *error_type) { /* init error_type */ *error_type = PATTERN_VALID; /* loop through pattern to EOS */ while (*p) { /* determine pattern type */ switch (*p) { /* check literal escape, it cannot be at end of pattern */ case '\\': if (!*++p) { *error_type = PATTERN_ESC; return FALSE; } p++; break; /* the [..] construct must be well formed */ case '[': p++; /* if the next character is ']' then bad pattern */ if (*p == ']') { *error_type = PATTERN_EMPTY; return FALSE; } /* if end of pattern here then bad pattern */ if (!*p) { *error_type = PATTERN_CLOSE; return FALSE; } /* loop to end of [..] construct */ while (*p != ']') { /* check for literal escape */ if (*p == '\\') { p++; /* if end of pattern here then bad pattern */ if (!*p++) { *error_type = PATTERN_ESC; return FALSE; } } else p++; /* if end of pattern here then bad pattern */ if (!*p) { *error_type = PATTERN_CLOSE; return FALSE; } /* if this a range */ if (*p == '-') { /* we must have an end of range */ if (!*++p || *p == ']') { *error_type = PATTERN_RANGE; return FALSE; } else { /* check for literal escape */ if (*p == '\\') p++; /* if end of pattern here then bad pattern */ if (!*p++) { *error_type = PATTERN_ESC; return FALSE; } } } } break; /* all other characters are valid pattern elements */ case '*': case '?': default: p++; /* "normal" character */ break; } } return TRUE; } /*---------------------------------------------------------------------------- * * Match the pattern PATTERN against the string TEXT; * * returns MATCH_VALID if pattern matches, or an errorcode as follows * otherwise: * * MATCH_PATTERN - bad pattern * MATCH_LITERAL - match failure on literal mismatch * MATCH_RANGE - match failure on [..] construct * MATCH_ABORT - premature end of text string * MATCH_END - premature end of pattern string * MATCH_VALID - valid match * * * A match means the entire string TEXT is used up in matching. * * In the pattern string: * `*' matches any sequence of characters (zero or more) * `?' matches any character * [SET] matches any character in the specified set, * [!SET] or [^SET] matches any character not in the specified set. * * A set is composed of characters or ranges; a range looks like * character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the * minimal set of characters allowed in the [..] pattern construct. * Other characters are allowed (ie. 8 bit characters) if your system * will support them. * * To suppress the special syntactic significance of any of `[]*?!^-\', * and match the character exactly, precede it with a `\'. * ----------------------------------------------------------------------------*/ int matche (register char *p, register char *t) { register char range_start, range_end; /* start and end in range */ BOOLEAN invert; /* is this [..] or [!..] */ BOOLEAN member_match; /* have I matched the [..] construct? */ BOOLEAN loop; /* should I terminate? */ for ( ; *p; p++, t++) { /* if this is the end of the text then this is the end of the match */ if (!*t) { return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT; } /* determine and react to pattern type */ switch (*p) { case '?': /* single any character match */ break; case '*': /* multiple any character match */ return matche_after_star (p, t); /* [..] construct, single member/exclusion character match */ case '[': { /* move to beginning of range */ p++; /* check if this is a member match or exclusion match */ invert = FALSE; if (*p == '!' || *p == '^') { invert = TRUE; p++; } /* if closing bracket here or at range start then we have a malformed pattern */ if (*p == ']') { return MATCH_PATTERN; } member_match = FALSE; loop = TRUE; while (loop) { /* if end of construct then loop is done */ if (*p == ']') { loop = FALSE; continue; } /* matching a '!', '^', '-', '\' or a ']' */ if (*p == '\\') { range_start = range_end = *++p; } else range_start = range_end = *p; /* if end of pattern then bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* check for range bar */ if (*++p == '-') { /* get the range end */ range_end = *++p; /* if end of pattern or construct then bad pattern */ if (range_end == '\0' || range_end == ']') return MATCH_PATTERN; /* special character range end */ if (range_end == '\\') { range_end = *++p; /* if end of text then we have a bad pattern */ if (!range_end) return MATCH_PATTERN; } /* move just beyond this range */ p++; } /* if the text character is in range then match found. make sure the range letters have the proper relationship to one another before comparison */ if (range_start < range_end) { if (*t >= range_start && *t <= range_end) { member_match = TRUE; loop = FALSE; } } else { if (*t >= range_end && *t <= range_start) { member_match = TRUE; loop = FALSE; } } } /* if there was a match in an exclusion set then no match */ /* if there was no match in a member set then no match */ if ((invert && member_match) || !(invert || member_match)) return MATCH_RANGE; /* if this is not an exclusion then skip the rest of the [...] construct that already matched. */ if (member_match) { while (*p != ']') { /* bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* skip exact match */ if (*p == '\\') { p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; } /* move to next pattern char */ p++; } } break; } case '\\': /* next character is quoted and must match exactly */ /* move pattern pointer to quoted char and fall through */ p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; /* must match this character exactly */ default: if (*p != *t) return MATCH_LITERAL; } } /* if end of text not reached then the pattern fails */ if (*t) return MATCH_END; else return MATCH_VALID; } /*---------------------------------------------------------------------------- * * recursively call matche() with final segment of PATTERN and of TEXT. * ----------------------------------------------------------------------------*/ int matche_after_star (register char *p, register char *t) { register int match = 0; register nextp; /* pass over existing ? and * in pattern */ while ( *p == '?' || *p == '*' ) { /* take one char for each ? and + */ if (*p == '?') { /* if end of text then no match */ if (!*t++) return MATCH_ABORT; } /* move to next char in pattern */ p++; } /* if end of pattern we have matched regardless of text left */ if (!*p) return MATCH_VALID; /* get the next character to match which must be a literal or '[' */ nextp = *p; if (nextp == '\\') { nextp = p[1]; /* if end of text then we have a bad pattern */ if (!nextp) return MATCH_PATTERN; } /* Continue until we run out of text or definite result seen */ do { /* a precondition for matching is that the next character in the pattern match the next character in the text or that the next pattern char is the beginning of a range. Increment text pointer as we go here */ if (nextp == *t || nextp == '[') match = matche(p, t); /* if the end of text is reached then no match */ if (!*t++) match = MATCH_ABORT; } while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN); /* return result */ return match; } /*---------------------------------------------------------------------------- * * match() is a shell to matche() to return only BOOLEAN values. * ----------------------------------------------------------------------------*/ BOOLEAN match( char *p, char *t ) { int error_type; error_type = matche(p,t); return (error_type == MATCH_VALID ) ? TRUE : FALSE; } #ifdef TEST /* ** This test main expects as first arg the pattern and as second arg ** the match string. Output is yaeh or nay on match. If nay on ** match then the error code is parsed and written. */ #include int main(int argc, char *argv[]) { int error; int is_valid_error; if (argc != 3) printf("Usage: MATCH Pattern Text\n"); else { printf("Pattern: %s\n", argv[1]); printf("Text : %s\n", argv[2]); if (!is_pattern(argv[1])) printf(" First Argument Is Not A Pattern\n"); else { error = matche(argv[1],argv[2]); is_valid_pattern(argv[1],&is_valid_error); switch (error) { case MATCH_VALID: printf(" Match Successful"); if (is_valid_error != PATTERN_VALID) printf(" -- is_valid_pattern() " "is complaining\n"); else printf("\n"); break; case MATCH_LITERAL: printf(" Match Failed on Literal\n"); break; case MATCH_RANGE: printf(" Match Failed on [..]\n"); break; case MATCH_ABORT: printf(" Match Failed on Early " "Text Termination\n"); break; case MATCH_END: printf(" Match Failed on Early " "Pattern Termination\n"); break; case MATCH_PATTERN: switch (is_valid_error) { case PATTERN_VALID: printf(" Internal Disagreement " "On Pattern\n"); break; case PATTERN_ESC: printf(" Literal Escape at " "End of Pattern\n"); break; case PATTERN_RANGE: printf(" No End of Range in " "[..] Construct\n"); break; case PATTERN_CLOSE: printf(" [..] Construct is Open\n"); break; case PATTERN_EMPTY: printf(" [..] Construct is Empty\n"); break; default: printf(" Internal Error in " "is_valid_pattern()\n"); } break; default: printf(" Internal Error in matche()\n"); break; } } } return(0); } #endif