home *** CD-ROM | disk | FTP | other *** search
-
- // Regular Expressions
- // Copyright (c) 2000-2001 by Digital Mars
- // All Rights Reserved
- // Written by Walter Bright
-
- #ifndef REGEXP_H
- #define REGEXP_H
-
- #include <tchar.h>
-
- /*
- Escape sequences:
-
- \nnn starts out a 1, 2 or 3 digit octal sequence,
- where n is an octal digit. If nnn is larger than
- 0377, then the 3rd digit is not part of the sequence
- and is not consumed.
- For maximal portability, use exactly 3 digits.
-
- \xXX starts out a 1 or 2 digit hex sequence. X
- is a hex character. If the first character after the \x
- is not a hex character, the value of the sequence is 'x'
- and the XX are not consumed.
- For maximal portability, use exactly 2 digits.
-
- \uUUUU is a unicode sequence. There are exactly
- 4 hex characters after the \u, if any are not, then
- the value of the sequence is 'u', and the UUUU are not
- consumed.
-
- Character classes:
-
- [a-b], where a is greater than b, will produce
- an error.
- */
-
- typedef struct regmatch
- {
- int rm_so; // index of start of match
- int rm_eo; // index past end of match
- } regmatch_t;
-
- struct Range;
- struct RegBuffer;
-
- struct RegExp
- {
- RegExp();
- ~RegExp();
-
- unsigned re_nsub; // number of parenthesized subexpression matches
- regmatch_t *pmatch; // array [re_nsub + 1]
-
- TCHAR *input; // the string to search
-
- // per instance:
-
- int ref; // !=0 means don't make our own copy of pattern
- TCHAR *pattern; // source text of the regular expression
-
- TCHAR flags[3 + 1]; // source text of the attributes parameter
- // (3 TCHARs max plus terminating 0)
- int errors;
-
- unsigned attributes;
-
- #define REAglobal 1 // has the g attribute
- #define REAignoreCase 2 // has the i attribute
- #define REAmultiline 4 // if treat as multiple lines separated
- // by newlines, or as a single line
- #define REAdotmatchlf 8 // if . matches \n
-
- int compile(TCHAR *pattern, TCHAR *attributes, int ref);
- int test(TCHAR *string, int startindex = 0);
-
- TCHAR *replace(TCHAR *format);
- TCHAR *replace2(TCHAR *format);
- static TCHAR *replace3(TCHAR *format, TCHAR *input,
- unsigned re_nsub, regmatch_t *pmatch);
- static TCHAR *replace4(TCHAR *input, regmatch_t *match, TCHAR *replacement);
-
- private:
- TCHAR *src; // current source pointer
- TCHAR *src_start; // starting position for match
- TCHAR *p; // position of parser in pattern
- regmatch_t match; // match for the entire regular expression
- // (serves as storage for pmatch[0])
-
- char *program;
- RegBuffer *buf;
-
- void printProgram(char *prog);
- int trymatch(char *prog, char *progend);
- int parseRegexp();
- int parsePiece();
- int parseAtom();
- int parseRange();
- int escape();
- void error(char *msg);
- void optimize();
- int startchars(Range *r, char *prog, char *progend);
- };
-
- #endif
-
-