From 7e0f021a9aec35fd8e6725e87e3313b101d26f5e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sun, 27 Jan 2008 11:37:44 +0100 Subject: Initial import (2.0.2-6) --- reference/C/CONTRIB/SNIP/match.c | 585 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 585 insertions(+) create mode 100755 reference/C/CONTRIB/SNIP/match.c (limited to 'reference/C/CONTRIB/SNIP/match.c') diff --git a/reference/C/CONTRIB/SNIP/match.c b/reference/C/CONTRIB/SNIP/match.c new file mode 100755 index 0000000..55e1229 --- /dev/null +++ b/reference/C/CONTRIB/SNIP/match.c @@ -0,0 +1,585 @@ +/* + EPSHeader + + File: match.c + Author: J. Kercheval + Created: Sat, 01/05/1991 22:21:49 +*/ +/* + EPSRevision History + + J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain + J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them) + J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche() + J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code + J. Kercheval Sun, 03/10/1991 20:37:11 beef up main() + J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain +*/ + +/* + Wildcard Pattern Matching +*/ + + +#include "match.h" + +int matche_after_star (register char *pattern, register char *text); +int fast_match_after_star (register char *pattern, register char *text); + +/*---------------------------------------------------------------------------- +* +* Return TRUE if PATTERN has any special wildcard characters +* +----------------------------------------------------------------------------*/ + +BOOLEAN is_pattern (char *p) +{ + while (*p) + { + switch (*p++) + { + case '?': + case '*': + case '[': + case '\\': + return TRUE; + } + } + return FALSE; +} + + +/*---------------------------------------------------------------------------- +* +* Return TRUE if PATTERN has is a well formed regular expression according +* to the above syntax +* +* error_type is a return code based on the type of pattern error. Zero is +* returned in error_type if the pattern is a valid one. error_type return +* values are as follows: +* +* PATTERN_VALID - pattern is well formed +* PATTERN_ESC - pattern has invalid escape ('\' at end of pattern) +* PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-]) +* PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g ) +* PATTERN_EMPTY - [..] construct is empty (ie []) +* +----------------------------------------------------------------------------*/ + +BOOLEAN is_valid_pattern (char *p, int *error_type) +{ + /* init error_type */ + *error_type = PATTERN_VALID; + + /* loop through pattern to EOS */ + while (*p) + { + /* determine pattern type */ + switch (*p) + { + /* check literal escape, it cannot be at end of pattern */ + case '\\': + if (!*++p) + { + *error_type = PATTERN_ESC; + return FALSE; + } + p++; + break; + + /* the [..] construct must be well formed */ + case '[': + p++; + + /* if the next character is ']' then bad pattern */ + if (*p == ']') + { + *error_type = PATTERN_EMPTY; + return FALSE; + } + + /* if end of pattern here then bad pattern */ + if (!*p) + { + *error_type = PATTERN_CLOSE; + return FALSE; + } + + /* loop to end of [..] construct */ + while (*p != ']') + { + /* check for literal escape */ + if (*p == '\\') + { + p++; + + /* if end of pattern here then bad pattern */ + if (!*p++) + { + *error_type = PATTERN_ESC; + return FALSE; + } + } + else p++; + + /* if end of pattern here then bad pattern */ + if (!*p) + { + *error_type = PATTERN_CLOSE; + return FALSE; + } + + /* if this a range */ + if (*p == '-') + { + /* we must have an end of range */ + if (!*++p || *p == ']') + { + *error_type = PATTERN_RANGE; + return FALSE; + } + else + { + + /* check for literal escape */ + if (*p == '\\') + p++; + + /* if end of pattern here + then bad pattern */ + if (!*p++) + { + *error_type = PATTERN_ESC; + return FALSE; + } + } + } + } + break; + + /* all other characters are valid pattern elements */ + case '*': + case '?': + default: + p++; /* "normal" character */ + break; + } + } + return TRUE; +} + + +/*---------------------------------------------------------------------------- +* +* Match the pattern PATTERN against the string TEXT; +* +* returns MATCH_VALID if pattern matches, or an errorcode as follows +* otherwise: +* +* MATCH_PATTERN - bad pattern +* MATCH_LITERAL - match failure on literal mismatch +* MATCH_RANGE - match failure on [..] construct +* MATCH_ABORT - premature end of text string +* MATCH_END - premature end of pattern string +* MATCH_VALID - valid match +* +* +* A match means the entire string TEXT is used up in matching. +* +* In the pattern string: +* `*' matches any sequence of characters (zero or more) +* `?' matches any character +* [SET] matches any character in the specified set, +* [!SET] or [^SET] matches any character not in the specified set. +* +* A set is composed of characters or ranges; a range looks like +* character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the +* minimal set of characters allowed in the [..] pattern construct. +* Other characters are allowed (ie. 8 bit characters) if your system +* will support them. +* +* To suppress the special syntactic significance of any of `[]*?!^-\', +* and match the character exactly, precede it with a `\'. +* +----------------------------------------------------------------------------*/ + +int matche (register char *p, register char *t) +{ + register char range_start, range_end; /* start and end in range */ + + BOOLEAN invert; /* is this [..] or [!..] */ + BOOLEAN member_match; /* have I matched the [..] construct? */ + BOOLEAN loop; /* should I terminate? */ + + for ( ; *p; p++, t++) + { + /* if this is the end of the text + then this is the end of the match */ + + if (!*t) + { + return ( *p == '*' && *++p == '\0' ) ? + MATCH_VALID : MATCH_ABORT; + } + + /* determine and react to pattern type */ + + switch (*p) + { + case '?': /* single any character match */ + break; + + case '*': /* multiple any character match */ + return matche_after_star (p, t); + + /* [..] construct, single member/exclusion character match */ + case '[': + { + /* move to beginning of range */ + + p++; + + /* check if this is a member match or exclusion match */ + + invert = FALSE; + if (*p == '!' || *p == '^') + { + invert = TRUE; + p++; + } + + /* if closing bracket here or at range start then we have a + malformed pattern */ + + if (*p == ']') + { + return MATCH_PATTERN; + } + + member_match = FALSE; + loop = TRUE; + + while (loop) + { + /* if end of construct then loop is done */ + + if (*p == ']') + { + loop = FALSE; + continue; + } + + /* matching a '!', '^', '-', '\' or a ']' */ + + if (*p == '\\') + { + range_start = range_end = *++p; + } + else range_start = range_end = *p; + + /* if end of pattern then bad pattern (Missing ']') */ + + if (!*p) + return MATCH_PATTERN; + + /* check for range bar */ + if (*++p == '-') + { + /* get the range end */ + + range_end = *++p; + + /* if end of pattern or construct + then bad pattern */ + + if (range_end == '\0' || range_end == ']') + return MATCH_PATTERN; + + /* special character range end */ + if (range_end == '\\') + { + range_end = *++p; + + /* if end of text then + we have a bad pattern */ + if (!range_end) + return MATCH_PATTERN; + } + + /* move just beyond this range */ + p++; + } + + /* if the text character is in range then match found. + make sure the range letters have the proper + relationship to one another before comparison */ + + if (range_start < range_end) + { + if (*t >= range_start && *t <= range_end) + { + member_match = TRUE; + loop = FALSE; + } + } + else + { + if (*t >= range_end && *t <= range_start) + { + member_match = TRUE; + loop = FALSE; + } + } + } + + /* if there was a match in an exclusion set then no match */ + /* if there was no match in a member set then no match */ + + if ((invert && member_match) || !(invert || member_match)) + return MATCH_RANGE; + + /* if this is not an exclusion then skip the rest of + the [...] construct that already matched. */ + + if (member_match) + { + while (*p != ']') + { + /* bad pattern (Missing ']') */ + if (!*p) + return MATCH_PATTERN; + + /* skip exact match */ + if (*p == '\\') + { + p++; + + /* if end of text then + we have a bad pattern */ + + if (!*p) + return MATCH_PATTERN; + } + + /* move to next pattern char */ + + p++; + } + } + break; + } + case '\\': /* next character is quoted and must match exactly */ + + /* move pattern pointer to quoted char and fall through */ + + p++; + + /* if end of text then we have a bad pattern */ + + if (!*p) + return MATCH_PATTERN; + + /* must match this character exactly */ + + default: + if (*p != *t) + return MATCH_LITERAL; + } + } + /* if end of text not reached then the pattern fails */ + + if (*t) + return MATCH_END; + else return MATCH_VALID; +} + + +/*---------------------------------------------------------------------------- +* +* recursively call matche() with final segment of PATTERN and of TEXT. +* +----------------------------------------------------------------------------*/ + +int matche_after_star (register char *p, register char *t) +{ + register int match = 0; + register nextp; + + /* pass over existing ? and * in pattern */ + + while ( *p == '?' || *p == '*' ) + { + /* take one char for each ? and + */ + + if (*p == '?') + { + /* if end of text then no match */ + if (!*t++) + return MATCH_ABORT; + } + + /* move to next char in pattern */ + + p++; + } + + /* if end of pattern we have matched regardless of text left */ + + if (!*p) + return MATCH_VALID; + + /* get the next character to match which must be a literal or '[' */ + + nextp = *p; + if (nextp == '\\') + { + nextp = p[1]; + + /* if end of text then we have a bad pattern */ + + if (!nextp) + return MATCH_PATTERN; + } + + /* Continue until we run out of text or definite result seen */ + + do + { + /* a precondition for matching is that the next character + in the pattern match the next character in the text or that + the next pattern char is the beginning of a range. Increment + text pointer as we go here */ + + if (nextp == *t || nextp == '[') + match = matche(p, t); + + /* if the end of text is reached then no match */ + + if (!*t++) + match = MATCH_ABORT; + + } while ( match != MATCH_VALID && + match != MATCH_ABORT && + match != MATCH_PATTERN); + + /* return result */ + + return match; +} + + +/*---------------------------------------------------------------------------- +* +* match() is a shell to matche() to return only BOOLEAN values. +* +----------------------------------------------------------------------------*/ + +BOOLEAN match( char *p, char *t ) +{ + int error_type; + + error_type = matche(p,t); + return (error_type == MATCH_VALID ) ? TRUE : FALSE; +} + + +#ifdef TEST + +/* +** This test main expects as first arg the pattern and as second arg +** the match string. Output is yaeh or nay on match. If nay on +** match then the error code is parsed and written. +*/ + +#include + +int main(int argc, char *argv[]) +{ + int error; + int is_valid_error; + + if (argc != 3) + printf("Usage: MATCH Pattern Text\n"); + else + { + printf("Pattern: %s\n", argv[1]); + printf("Text : %s\n", argv[2]); + + if (!is_pattern(argv[1])) + printf(" First Argument Is Not A Pattern\n"); + else + { + error = matche(argv[1],argv[2]); + is_valid_pattern(argv[1],&is_valid_error); + + switch (error) + { + case MATCH_VALID: + printf(" Match Successful"); + if (is_valid_error != PATTERN_VALID) + printf(" -- is_valid_pattern() " + "is complaining\n"); + else printf("\n"); + break; + + case MATCH_LITERAL: + printf(" Match Failed on Literal\n"); + break; + + case MATCH_RANGE: + printf(" Match Failed on [..]\n"); + break; + + case MATCH_ABORT: + printf(" Match Failed on Early " + "Text Termination\n"); + break; + + case MATCH_END: + printf(" Match Failed on Early " + "Pattern Termination\n"); + break; + + case MATCH_PATTERN: + switch (is_valid_error) + { + case PATTERN_VALID: + printf(" Internal Disagreement " + "On Pattern\n"); + break; + + case PATTERN_ESC: + printf(" Literal Escape at " + "End of Pattern\n"); + break; + + + case PATTERN_RANGE: + printf(" No End of Range in " + "[..] Construct\n"); + break; + + case PATTERN_CLOSE: + printf(" [..] Construct is Open\n"); + break; + + case PATTERN_EMPTY: + printf(" [..] Construct is Empty\n"); + break; + + default: + printf(" Internal Error in " + "is_valid_pattern()\n"); + } + break; + + default: + printf(" Internal Error in matche()\n"); + break; + } + } + } + return(0); +} + +#endif -- cgit v1.2.3-54-g00ecf