home *** CD-ROM | disk | FTP | other *** search
-
- /* This is a routine to test if a string matches a regular expression.
- *
- *
- * The regular expression sytax is a slight extension of the regular
- * AmigaDos wildcard patterns. Apparently 2.0 has added a few new things
- * (well, at least a not operator) to the game. This may or may not
- * be consistant with those additions (it's the way they otta done it.)
- *
- * Here it is:
- *
- * ? matches any one character
- *
- * #(pat) matches zero or more occurances of the pattern pat.
- *
- * (pat) seperates out a piece of the pattern.
- *
- * pat1|pat2 matches to either pat1 or pat2
- *
- * ' escapes the special meaning of these symbols.
- *
- * % matches the null string.
- *
- * EXTENSIONS
- *
- * [chars] will match any single character in the set chars,
- * specified as single characters or ranges seperated
- * by a -. Ex. [af-i+] would match a, f, g, h, i, and
- * +. If ~ is the first character in the set then the
- * set matched is the complement of the set specified.
- * Ex. [~a-z] would match any one character that is
- * not a (lower case if case sensitivity is on) letter.
- * Note that a [~a] is NOT the same as a ~[a]. The former
- * would match a 'b' but not a 'ab', whereas the latter
- * would match both.
- *
- * ~(pat) will match anything that doesn't match the pattern.
- * Note: it is illegal to repeat a not. ie #~a is illegal.
- * (So is #(~(a)) so don't even try it.) You can repeat
- * something with a not IN it, as long as it can't be
- * reduced to the form #~(pattern). (A #[~a] is OK.)
- * However ~#a has the expected effect (matches any
- * non-null string not composed entirely of a's.)
- *
- * * same as #?.
- *
- *
- */
-
- #include "sregexp.h"
-
- const char wilds[] = { /* string of the wildcards, for easy access */
- CHR_REPEAT,
- CHR_NOT,
- CHR_OPENBRACE,
- CHR_CLOSEBRACE,
- CHR_OPENSET,
- CHR_CLOSESET,
- CHR_ANYCHAR,
- CHR_NULL,
- CHR_OR,
- CHR_ESCAPE,
- /* CHR_RANGE, <--- special case */
- CHR_STAR,
- 0
- };
-
- #ifdef __DEBUG__
- extern void puts(char *);
- extern void printf(char *, ...);
- #endif
-
-
- struct SregExp *
- parsesregexp(expr)
- char *expr;
- /* This is the entry point into the parsing subroutines. */
- {
- return parsesub(&expr,0);
- }
-
- static struct SregExp *
- parsesub(p,end)
- char **p,end;
- /* This routine will parse a sub expression up until the character
- 'end' is encontered. This is 0 for the whole pattern and ')'
- for a subpattern */
- {
- struct SregList *list,*head=NULL,*orlist,*orhead=NULL;
- struct SregExp *sreg;
- int num = 0,ornum = 0;
-
- while (**p != end) {
- if (**p == CHR_OR) { /* deal with stuff like (ab||cd) == (ab|%|cd) */
- if (!(sreg = makenull()))
- goto cleanup;
- } else {
- if (!(sreg = parseone(p,TRUE))) /* parse one wildcard */
- goto cleanup;
- }
- if (head) { /* if there's already a list, just stick it on */
- if (!(list = (list->srl_next = getmem(sizeof(struct SregList))))) {
- report(MEM_ERROR);
- goto cleanup;
- }
- } else { /* otherwise, make a new list */
- if (!(list = (head = getmem(sizeof(struct SregList))))) {
- report(MEM_ERROR);
- goto cleanup;
- }
- }
- list->srl_sreg = sreg;
- list->srl_next = NULL;
- num++;
- if (**p == CHR_OR) { /* deal with an or */
- if (!(sreg = makesum(head,num)))
- goto cleanup;
- if (orhead) { /* either add onto the or list */
- if (!(orlist = (orlist->srl_next = getmem(sizeof(struct SregList))))) {
- report(MEM_ERROR);
- goto cleanup;
- }
- } else { /* or make a new or list */
- if (!(orlist = (orhead = getmem(sizeof(struct SregList))))) {
- report(MEM_ERROR);
- goto cleanup;
- }
- }
- orlist->srl_sreg = sreg;
- orlist->srl_next = NULL;
- ornum++;
- (*p)++;
- head = NULL;
- num = 0;
- }
- }
- if (head) {
- if (!(sreg = makesum(head,num)))
- goto cleanup;
- } else {
- if (!(sreg = makenull()))
- goto cleanup;
- }
- head = NULL;
- if (orhead) { /* if this expression had an or, clean it up */
- if (!(orlist = (orlist->srl_next = getmem(sizeof(struct SregList))))) {
- report(MEM_ERROR);
- freesregexp(sreg);
- goto cleanup;
- }
- orlist->srl_sreg = sreg;
- orlist->srl_next = NULL;
- ornum++;
- if (!(sreg = makeor(orhead,ornum)))
- goto cleanup;
- }
-
- return sreg; /* sub expression successfully matched. */
-
- cleanup: /* troubles all head here to clean up */
-
- list = head;
- while (list) { /* free all the bits of the current sum */
- head = list->srl_next;
- freesregexp(list->srl_sreg);
- freemem(list,sizeof(struct SregList));
- list = head;
- }
- list = orhead;
- while (list) { /* and all of the current or parts */
- head = list->srl_next;
- freesregexp(list->srl_sreg);
- freemem(list,sizeof(struct SregList));
- list = head;
- }
-
- return NULL; /* return the failure */
- }
-
- static struct SregExp *
- makesum(list,num)
- struct SregList *list;
- int num;
- /* This routine takes a linked list of sreg's and joins them together
- Under the auspices of one big SRP_SUM type sreg */
- {
- struct SregList *lnk;
- struct SregExp *sreg;
- int i,len=0;
- char fixed = SRF_FIXLEN;
-
- if (num == 0) {
- report(ILLEGAL_ERR);
- return NULL;
- } else if (num == 1) {
- sreg = list->srl_sreg;
- freemem(list,sizeof(struct SregList));
- return sreg;
- } if (!(sreg = getmem(sizeof(struct SregExp) + num*sizeof(struct SregExp *)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_SUM;
- sreg->sre_Flag = 0;
- sreg->sre_Data.number = num;
- for (i = 0; i < num; i++) {
- len += realen(list->srl_sreg);
- fixed &= isfixed(list->srl_sreg) ? SRF_FIXLEN : 0;
- sreg->sre_List[i] = list->srl_sreg;
- lnk = list->srl_next;
- freemem(list,sizeof(struct SregList));
- list = lnk;
- }
- sreg->sre_MinLen = len;
- sreg->sre_Flag |= fixed;
- return sreg;
- }
-
- static struct SregExp *
- makeor(list,num)
- struct SregList *list;
- int num;
- /* This does basically the same thing as makesum, except it makes
- an SRP_OR type sreg to join the list and doesn't deal with some
- special cases */
- {
- struct SregList *lnk;
- struct SregExp *sreg;
- int i,len;
- char fixed = SRF_FIXLEN;
-
- if (!(sreg = getmem(sizeof(struct SregExp) + num*sizeof(struct SregExp *)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_OR;
- sreg->sre_Flag = 0;
- sreg->sre_Data.number = num;
- for (i = 0; i < num; i++) {
- if (i == 0)
- len = realen(list->srl_sreg);
- else if (len != realen(list->srl_sreg)) {
- fixed = 0;
- if (len > realen(list->srl_sreg))
- len = realen(list->srl_sreg);
- }
- fixed &= isfixed(list->srl_sreg) ? SRF_FIXLEN : 0;
- sreg->sre_List[i] = list->srl_sreg;
- lnk = list->srl_next;
- freemem(list,sizeof(struct SregList));
- list = lnk;
- }
- sreg->sre_MinLen = len;
- sreg->sre_Flag |= fixed;
- return sreg;
- }
-
- static struct SregExp *
- parseone(p,cat)
- char **p,cat;
- /* This routine parses one wildcard character from the string *p,
- leaving it set up pointing to the begining of the next
- wildcard. If 'cat' is true then a string of characters will
- be grouped together as a SRP_STRING type sreg. It may be
- false because of the scope rules of ~ and #, which only
- repeat the very next pattern. */
- {
- struct SregExp *sreg;
-
- switch (**p) {
- case (CHR_REPEAT) :
- (*p)++;
- if (!(sreg = parseone(p,FALSE)))
- return NULL;
- if (sreg->sre_Flag & SRF_NOT) {
- report(ILLEGAL_ERR);
- freesregexp(sreg);
- return NULL;
- }
- sreg->sre_Flag |= SRF_REPEAT;
- break;
- case (CHR_NOT) :
- (*p)++;
- if (!(sreg = parseone(p,FALSE)))
- return NULL;
- sreg->sre_Flag |= SRF_NOT;
- break;
- case (CHR_OPENBRACE) :
- (*p)++;
- sreg = parsesub(p,CHR_CLOSEBRACE);
- (*p)++;
- break;
- case (CHR_OPENSET) :
- (*p)++;
- if (!(sreg = getmem(sizeof(struct SregExp)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_SETCHAR;
- sreg->sre_Flag = SRF_FIXLEN;
- sreg->sre_MinLen = 1;
- if (!(sreg->sre_Data.setchar = makeset(p))) {
- freemem(sreg,sizeof(sreg));
- return NULL;
- }
- (*p)++;
- break;
- case (CHR_ANYCHAR) :
- (*p)++;
- if (!(sreg = getmem(sizeof(struct SregExp)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_ANYCHAR;
- sreg->sre_Flag = SRF_FIXLEN;
- sreg->sre_MinLen = 1;
- break;
- case (CHR_STAR) :
- (*p)++;
- if (!(sreg = getmem(sizeof(struct SregExp)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_ANYCHAR;
- sreg->sre_Flag = SRF_FIXLEN | SRF_REPEAT;
- sreg->sre_MinLen = 1;
- break;
- case (CHR_NULL) :
- (*p)++;
- if (!(sreg = makenull()))
- return NULL;
- break;
- case (CHR_CLOSEBRACE) :
- case (CHR_CLOSESET) :
- case (CHR_OR) :
- case (0) :
- report(ILLEGAL_ERR);
- return NULL;
- default :
- if (!(sreg = getmem(sizeof(struct SregExp)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Flag = SRF_FIXLEN;
- if (!cat) {
- sreg->sre_Data.onechar = onechar(p,FALSE);
- sreg->sre_Type = SRP_ONECHAR;
- sreg->sre_MinLen = 1;
- } else {
- char *q=*p;
- int len=0;
-
- while (onechar(&q,FALSE))
- len++;
- sreg->sre_MinLen = len;
- if (len == 1) {
- sreg->sre_Data.onechar = onechar(p,FALSE);
- sreg->sre_Type = SRP_ONECHAR;
- } else {
- sreg->sre_Type = SRP_STRING;
- if (!(q = sreg->sre_Data.string = getmem(len+1))) {
- report(MEM_ERROR);
- freemem(sreg,sizeof(sreg));
- return NULL;
- }
- while (*q++ = onechar(p,FALSE)) ;
- }
- }
- }
- return sreg;
- }
-
- static struct SregExp *
- makenull()
- /* This routine makes an node matching the null string. */
- {
- struct SregExp *sreg;
-
- if (!(sreg = getmem(sizeof(struct SregExp)))) {
- report(MEM_ERROR);
- return NULL;
- }
- sreg->sre_Type = SRP_NULL;
- sreg->sre_Flag = SRF_FIXLEN;
- sreg->sre_MinLen = 0;
- }
-
- static char
- onechar(p,minus)
- char **p,minus;
- /* this routine picks one character off the string *p. It handles
- escaping the special meaning of the wildcard characters, and
- returns 0 if we're at the end of the string or the next
- char is a wildcard. if 'minus' is true, then the '-' is
- treated as a wildcard, otherwise it isn't */
- {
- if (**p == CHR_ESCAPE)
- (*p)++;
- else if (strchr(wilds,**p) || (minus && **p == CHR_RANGE))
- return 0;
- return *(*p)++;
- }
-
- static char *
- makeset(p)
- char **p;
- /* this makes a table of match flags from the character specifier
- that goes in between [ and ]. It handles a leading '~' and
- leaves *p pointing to the closing brace. Note: This routine
- checks to make sure the specifier ends in a ] and fails if
- it doesn't */
- {
- char *set,not=FALSE,ok=FALSE,s,e;
-
- if (**p == CHR_NOT) {
- (*p)++;
- not = TRUE;
- }
- if (!(set = getmem(32))) {
- report(MEM_ERROR);
- return NULL;
- }
- for (s = 0; s < 32; s++)
- set[s] = 0;
- while (s = onechar(p,TRUE)) {
- ok = TRUE;
- if (**p == CHR_RANGE) {
- (*p)++;
- if (!(e = onechar(p,TRUE))) {
- report(ILLEGAL_ERR);
- freemem(set,32);
- return NULL;
- }
- for ( ; s <= e; s++)
- set[s/8] |= 1 << (s % 8);
- } else
- set[s/8] |= 1 << (s % 8);
- }
- if (!ok || **p != CHR_CLOSESET) {
- report(ILLEGAL_ERR);
- freemem(set,32);
- return NULL;
- }
- if (not) {
- for(s = 0; s < 32; s++)
- set[s] = ~set[s];
- }
- return set;
- }
-
- void
- freesregexp(sreg)
- struct SregExp *sreg;
- /* this routine frees up a sreg. It correctly handles freeing the
- subpattern elements in a SRP_SUM or SRP_OR sreg and frees the
- match table or string from a SRP_SETCHAR or SRP_STRING sreg.
- This is one of the externally visible routines. */
- {
- int add = 0;
-
- if (sreg->sre_Type == SRP_SUM || sreg->sre_Type == SRP_OR) {
- int i;
-
- add = sreg->sre_Data.number * sizeof(struct SregExp *);
- for (i = 0; i < sreg->sre_Data.number; i++)
- freesregexp(sreg->sre_List[i]);
-
- } else if (sreg->sre_Type == SRP_STRING)
- freemem(sreg->sre_Data.string,strlen(sreg->sre_Data.string)+1);
- else if (sreg->sre_Type == SRP_SETCHAR)
- freemem(sreg->sre_Data.setchar,32);
- freemem(sreg,sizeof(struct SregExp) + add);
- }
-
- int
- matchsregexp(p,sreg,up)
- char *p;
- struct SregExp *sreg;
- int up;
- /* This is the externally visible stub to the pattern matching part
- of sreg. 'p' is the string to match to, sreg is the preparsed
- sreg, and up indicates if the match is case sensitive. */
- {
- return matchnsregexp(p,sreg,up,strlen(p));
- }
-
- int
- matchnsregexp(p,sreg,up,len)
- char *p;
- struct SregExp *sreg;
- int up,len;
- /* This routine will match a sreg to a string of length 'len'. */
- /* The length, rather than NULL terminated is used to make a
- lot of things a lot simpler */
- {
- int res,i;
-
- if (sreg->sre_Flag & SRF_REPEAT) { /* deal with repeaters. */
- char not,*h;
-
- if (len == 0 || sreg->sre_Type == SRP_ANYCHAR) { /* always true */
- res = TRUE;
- goto end;
- }
- /* check if the lengths match up */
- if (len < sreg->sre_MinLen || (sreg->sre_Flag & SRF_FIXLEN &&
- (sreg->sre_MinLen == 0 || len % sreg->sre_MinLen != 0))) {
- res = FALSE;
- goto end;
- }
- not = sreg->sre_Flag & SRF_NOT;
- sreg->sre_Flag &= ~(SRF_REPEAT | SRF_NOT);
- if (sreg->sre_Flag & SRF_FIXLEN) { /* handle fixed lenght matches */
- h = p;
- while (h-p < len) {
- if (!(res = matchnsregexp(h,sreg,up,sreg->sre_MinLen)))
- goto end;
- h += sreg->sre_MinLen;
- }
- } else { /* and variable length ones */
- for (i = len; i >= (sreg->sre_MinLen ? sreg->sre_MinLen : 1); i--) {
- if (res = matchnsregexp(p,sreg,up,i)) {
- sreg->sre_Flag |= SRF_REPEAT;
- if (res = matchnsregexp(p+i,sreg,up,len-i))
- break;
- sreg->sre_Flag &= ~SRF_REPEAT;
- }
- }
- }
- sreg->sre_Flag |= SRF_REPEAT | not;
- goto end;
- }
-
- /* check the lengths first for quick rejection */
- if (len < sreg->sre_MinLen || (sreg->sre_Flag & SRF_FIXLEN && len != sreg->sre_MinLen)) {
- res = FALSE;
- goto end;
- }
-
- switch (sreg->sre_Type) {
- case (SRP_SUM) :
- res = matchsum(&(sreg->sre_List[0]),sreg->sre_Data.number,p,len,up);
- break;
- case (SRP_ONECHAR) :
- res = len == 1 && (up ? *p == sreg->sre_Data.onechar :
- toupper(*p) == toupper(sreg->sre_Data.onechar));
- break;
- case (SRP_SETCHAR) :
- res = len == 1 && matchset(sreg,*p) || (up ? FALSE :
- (isupper(*p) ? matchset(sreg,tolower(*p)) :
- matchset(sreg,toupper(*p))));
- break;
- case (SRP_ANYCHAR) :
- res = len == 1;
- break;
- case (SRP_STRING) :
- if (up)
- res = !strncmp(p,sreg->sre_Data.string,len);
- else
- res = !strnicmp(p,sreg->sre_Data.string,len);
- break;
- case (SRP_NULL) :
- res = len == 0;
- break;
- case (SRP_OR) :
- for (i = 0; i < sreg->sre_Data.number; i++)
- if (res = matchnsregexp(p,sreg->sre_List[i],up,len)) {
- break;
- }
- break;
- }
-
- end:
-
- if (sreg->sre_Flag & SRF_NOT)
- return !res;
- else
- return res;
- }
-
- static int
- matchsum(list,num,p,len,up)
- struct SregExp *list[];
- int num,len,up;
- char *p;
- /* This routine is called by match to match the elements of a sum
- of sregs. It tries to be as effecient as possible, and keep
- recursion to a minimum. It will match any fixed length peices
- at the begining and end first, then match any fixed length
- peices in the middle to break the string up. Only then does
- it do the tiresome matches to the non-fixed length peices. */
- {
- int i,res,o;
-
- while (num && isfixed(list[0])) {
- if (!(res = matchnsregexp(p,list[0],up,list[0]->sre_MinLen)))
- return FALSE;
- p += list[0]->sre_MinLen;
- len -= list[0]->sre_MinLen;
- list++;
- num--;
- }
- while (num && isfixed(list[num-1])) {
- if (!(res = matchnsregexp(p+len-list[num-1]->sre_MinLen,list[num-1],up,
- list[num-1]->sre_MinLen)))
- return FALSE;
- len -= list[num-1]->sre_MinLen;
- num--;
- }
- if (num == 0)
- return TRUE;
-
- o = 0;
- for (i = 1; i < num-1; i++) {
- if (isfixed(list[i])) {
- for ( ; len-o >= list[i]->sre_MinLen; o++) {
- if (res = matchnsregexp(p+o,list[i],up,list[i]->sre_MinLen)) {
- if (!(res = matchsum(list,i,p,o,up)))
- return FALSE;
- if (!(res = matchsum(list+(i+1),num-i-1,
- p+o+list[i]->sre_MinLen,len-o-list[i]->sre_MinLen,up)))
- return FALSE;
- return TRUE;
- }
- }
- return FALSE;
- } else
- o += realen(list[i]);
- }
-
- if (num == 1)
- return matchnsregexp(p,list[0],up,len);
-
- for (o = len; o >= list[0]->sre_MinLen; o--)
- if (matchnsregexp(p,list[0],up,o) && matchsum(list+1,num-1,p+o,len-o,up))
- return TRUE;
-
- return FALSE;
- }
-
- int
- iswild(p)
- char *p;
- /* this is a very simple routine, but it is externally visible. Returns
- true if the string has wildcard characters (unescaped) false if
- not */
- {
- while (onechar(&p,FALSE)) ;
- return *p != 0;
- }
-
- void
- report(i)
- int i;
- /* This routine now does set the IoErr value to more information.
- For now it does nothing. */
- {
- struct Process *myself;
-
- if (myself = (struct Process *)FindTask(0))
- myself->pr_Result2 = i;
- }
-