home *** CD-ROM | disk | FTP | other *** search
Text File | 1991-09-09 | 40.2 KB | 1,423 lines |
- Newsgroups: alt.sources
- Path: wupost!zaphod.mps.ohio-state.edu!think.com!news.bbn.com!mips2!bubba!jtsillas
- From: jtsillas@bubba.ma30.bull.com (James Tsillas)
- Subject: mxgdb 1.0.3 (part 7/10)
- Organization: Bull HN, Worldwide Information Systems, Billerica, Mass., USA
- Distribution: alt
- Date: 6 Sep 91 14:35:32
- Message-ID: <JTSILLAS.91Sep6143532@bubba.ma30.bull.com>
- Sender: news@mips2.ma30.bull.com (Usenet News Manager)
-
- ---- Cut Here and feed the following to sh ----
- #!/bin/sh
- # this is mxgdb.07 (part 7 of a multipart archive)
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file mxgdb/regex.c continued
- #
- if test ! -r _shar_seq_.tmp; then
- echo 'Please unpack part 1 first!'
- exit 1
- fi
- (read Scheck
- if test "$Scheck" != 7; then
- echo Please unpack part "$Scheck" next!
- exit 1
- else
- exit 0
- fi
- ) < _shar_seq_.tmp || exit 1
- if test ! -f _shar_wnt_.tmp; then
- echo 'x - still skipping mxgdb/regex.c'
- else
- echo 'x - continuing file mxgdb/regex.c'
- sed 's/^X//' << 'SHAR_EOF' >> 'mxgdb/regex.c' &&
- X BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
- NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
- WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
- RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
- WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
- AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
- DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
- CORRECTION.
- X
- X IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
- STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
- WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
- LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
- OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
- USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
- DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
- A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
- PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
- DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- X
- X GENERAL PUBLIC LICENSE TO COPY
- X
- X 1. You may copy and distribute verbatim copies of this source file
- as you receive it, in any medium, provided that you conspicuously and
- appropriately publish on each copy a valid copyright notice "Copyright
- (C) 1985 Free Software Foundation, Inc."; and include following the
- copyright notice a verbatim copy of the above disclaimer of warranty
- and of this License. You may charge a distribution fee for the
- physical act of transferring a copy.
- X
- X 2. You may modify your copy or copies of this source file or
- any portion of it, and copy and distribute such modifications under
- the terms of Paragraph 1 above, provided that you also do the following:
- X
- X a) cause the modified files to carry prominent notices stating
- X that you changed the files and the date of any change; and
- X
- X b) cause the whole of any work that you distribute or publish,
- X that in whole or in part contains or is a derivative of this
- X program or any part thereof, to be licensed at no charge to all
- X third parties on terms identical to those contained in this
- X License Agreement (except that you may choose to grant more extensive
- X warranty protection to some or all third parties, at your option).
- X
- X c) You may charge a distribution fee for the physical act of
- X transferring a copy, and you may at your option offer warranty
- X protection in exchange for a fee.
- X
- Mere aggregation of another unrelated program with this program (or its
- derivative) on a volume of a storage or distribution medium does not bring
- the other program under the scope of these terms.
- X
- X 3. You may copy and distribute this program (or a portion or derivative
- of it, under Paragraph 2) in object code or executable form under the terms
- of Paragraphs 1 and 2 above provided that you also do one of the following:
- X
- X a) accompany it with the complete corresponding machine-readable
- X source code, which must be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X b) accompany it with a written offer, valid for at least three
- X years, to give any third party free (except for a nominal
- X shipping charge) a complete machine-readable copy of the
- X corresponding source code, to be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X c) accompany it with the information you received as to where the
- X corresponding source code may be obtained. (This alternative is
- X allowed only for noncommercial distribution and only if you
- X received the program in object code or executable form alone.)
- X
- For an executable file, complete source code means all the source code for
- all modules it contains; but, as a special exception, it need not include
- source code for modules which are standard libraries that accompany the
- operating system on which the executable file runs.
- X
- X 4. You may not copy, sublicense, distribute or transfer this program
- except as expressly provided under this License Agreement. Any attempt
- otherwise to copy, sublicense, distribute or transfer this program is void and
- your rights to use the program under this License agreement shall be
- automatically terminated. However, parties who have received computer
- software programs from you with this License Agreement will not have
- their licenses terminated so long as such parties remain in full compliance.
- X
- X 5. If you wish to incorporate parts of this program into other free
- programs whose distribution conditions are different, write to the Free
- Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
- worked out a simple rule that can be stated here, but we will often permit
- this. We will be guided by the two goals of preserving the free status of
- all derivatives of our free software and of promoting the sharing and reuse of
- software.
- X
- X
- In other words, you are welcome to use, share and improve this program.
- You are forbidden to forbid anyone else to use, share and improve
- what you give them. Help stamp out software-hoarding! */
- X
- X
- /* To test, compile with -Dtest.
- X This Dtestable feature turns this into a self-contained program
- X which reads a pattern, describes how it compiles,
- X then reads a string and searches for it. */
- X
- /*
- X * Modification: alloca.h included for sparc machines
- X * By : Po Cheung, po@cerc.utexas.edu
- X * Date : July 27, 1990
- X */
- #ifdef sparc
- #include <alloca.h>
- #endif
- #define FAILURE_STACK 20000 /* max failure stack size */
- X
- #ifdef __GNUC__
- #define alloca __builtin_alloca
- #endif
- X
- #ifdef emacs
- X
- /* The `emacs' switch turns on certain special matching commands
- X that make sense only in emacs. */
- X
- #include "config.h"
- #include "lisp.h"
- #include "buffer.h"
- #include "syntax.h"
- X
- #else /* not emacs */
- X
- /*
- X * Define the syntax stuff, so we can do the \<...\> things.
- X */
- X
- #ifndef Sword /* must be non-zero in some of the tests below... */
- #define Sword 1
- #endif
- X
- #define SYNTAX(c) re_syntax_table[c]
- X
- #ifdef SYNTAX_TABLE
- X
- char *re_syntax_table;
- X
- #else
- X
- static char re_syntax_table[256];
- X
- static void
- init_syntax_once ()
- {
- X register int c;
- X static int done = 0;
- X
- X if (done)
- X return;
- X
- X bzero (re_syntax_table, sizeof re_syntax_table);
- X
- X for (c = 'a'; c <= 'z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = 'A'; c <= 'Z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = '0'; c <= '9'; c++)
- X re_syntax_table[c] = Sword;
- X
- X done = 1;
- }
- X
- #endif /* SYNTAX_TABLE */
- #endif /* not emacs */
- X
- #include "regex.h"
- X
- /* Number of failure points to allocate space for initially,
- X when matching. If this number is exceeded, more space is allocated,
- X so it is not a hard limit. */
- X
- #ifndef NFAILURES
- #define NFAILURES 80
- #endif NFAILURES
- X
- /* width of a byte in bits */
- X
- #define BYTEWIDTH 8
- X
- #ifndef SIGN_EXTEND_CHAR
- #define SIGN_EXTEND_CHAR(x) (x)
- #endif
- X
- /* compile_pattern takes a regular-expression descriptor string in the
- X user's format and converts it into a buffer full of byte commands
- X for matching.
- X
- X pattern is the address of the pattern string
- X size is the length of it.
- X bufp is a struct re_pattern_buffer * which points to the info
- X on where to store the byte commands.
- X This structure contains a char * which points to the
- X actual space, which should have been obtained with malloc.
- X compile_pattern may use realloc to grow the buffer space.
- X
- X The number of bytes of commands can be found out by looking in
- X the struct re_pattern_buffer that bufp pointed to,
- X after compile_pattern returns.
- */
- X
- #define PATPUSH(ch) (*b++ = (char) (ch))
- X
- #define PATFETCH(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; \
- X if (translate) c = translate[c]; }
- X
- #define PATFETCH_RAW(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; }
- X
- #define PATUNFETCH p--
- X
- #define EXTEND_BUFFER \
- X { char *old_buffer = bufp->buffer; \
- X if (bufp->allocated == (1<<16)) goto too_big; \
- X bufp->allocated *= 2; \
- X if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
- X if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
- X goto memory_exhausted; \
- X c = bufp->buffer - old_buffer; \
- X b += c; \
- X if (fixup_jump) \
- X fixup_jump += c; \
- X if (laststart) \
- X laststart += c; \
- X begalt += c; \
- X if (pending_exact) \
- X pending_exact += c; \
- X }
- X
- static int store_jump (), insert_jump ();
- X
- char *
- re_compile_pattern (pattern, size, bufp)
- X char *pattern;
- X int size;
- X struct re_pattern_buffer *bufp;
- {
- X register char *b = bufp->buffer;
- X register char *p = pattern;
- X char *pend = pattern + size;
- X register unsigned c, c1;
- X char *p1;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X /* address of the count-byte of the most recently inserted "exactn" command.
- X This makes it possible to tell whether a new exact-match character
- X can be added to that command or requires a new "exactn" command. */
- X
- X char *pending_exact = 0;
- X
- X /* address of the place where a forward-jump should go
- X to the end of the containing expression.
- X Each alternative of an "or", except the last, ends with a forward-jump
- X of this sort. */
- X
- X char *fixup_jump = 0;
- X
- X /* address of start of the most recently finished expression.
- X This tells postfix * where to find the start of its operand. */
- X
- X char *laststart = 0;
- X
- X /* In processing a repeat, 1 means zero matches is allowed */
- X
- X char zero_times_ok;
- X
- X /* In processing a repeat, 1 means many matches is allowed */
- X
- X char many_times_ok;
- X
- X /* address of beginning of regexp, or inside of last \( */
- X
- X char *begalt = b;
- X
- X /* Stack of information saved by \( and restored by \).
- X Four stack elements are pushed by each \(:
- X First, the value of b.
- X Second, the value of fixup_jump.
- X Third, the value of regnum.
- X Fourth, the value of begalt. */
- X
- X int stackb[40];
- X int *stackp = stackb;
- X int *stacke = stackb + 40;
- X int *stackt;
- X
- X /* Counts \('s as they are encountered. Remembered for the matching \),
- X where it becomes the "register number" to put in the stop_memory command */
- X
- X int regnum = 1;
- X
- X bufp->fastmap_accurate = 0;
- X
- #ifndef emacs
- #ifndef SYNTAX_TABLE
- X /*
- X * Initialize the syntax table.
- X */
- X init_syntax_once();
- #endif
- #endif
- X
- X if (bufp->allocated == 0)
- X {
- X bufp->allocated = 28;
- X if (bufp->buffer)
- X /* EXTEND_BUFFER loses when bufp->allocated is 0 */
- X bufp->buffer = (char *) realloc (bufp->buffer, 28);
- X else
- X /* Caller did not allocate a buffer. Do it for him */
- X bufp->buffer = (char *) malloc (28);
- X if (!bufp->buffer) goto memory_exhausted;
- X begalt = b = bufp->buffer;
- X }
- X
- X while (p != pend)
- X {
- X if (b - bufp->buffer > bufp->allocated - 10)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X PATFETCH (c);
- X
- X switch (c)
- X {
- X case '$':
- X /* $ means succeed if at end of line, but only in special contexts.
- X If randonly in the middle of a pattern, it is a normal character. */
- X if (p == pend || (*p == '\\' && (p[1] == ')' || p[1] == '|')))
- X {
- X PATPUSH (endline);
- X break;
- X }
- X goto normal_char;
- X
- X case '^':
- X /* ^ means succeed if at beg of line, but only if no preceding pattern. */
- X if (laststart) goto normal_char;
- X PATPUSH (begline);
- X break;
- X
- X case '*':
- X case '+':
- X case '?':
- X /* If there is no previous pattern, char not special. */
- X if (!laststart)
- X goto normal_char;
- X /* If there is a sequence of repetition chars,
- X collapse it down to equivalent to just one. */
- X zero_times_ok = 0;
- X many_times_ok = 0;
- X while (1)
- X {
- X zero_times_ok |= c != '+';
- X many_times_ok |= c != '?';
- X if (p == pend)
- X break;
- X PATFETCH (c);
- X if (!(c == '*' || c == '+' || c == '?'))
- X {
- X PATUNFETCH;
- X break;
- X }
- X }
- X
- X /* Now we know whether 0 matches is allowed,
- X and whether 2 or more matches is allowed. */
- X if (many_times_ok)
- X {
- X /* If more than one repetition is allowed,
- X put in a backward jump at the end. */
- X store_jump (b, maybe_finalize_jump, laststart - 3);
- X b += 3;
- X }
- X insert_jump (on_failure_jump, laststart, b + 3, b);
- X pending_exact = 0;
- X b += 3;
- X if (!zero_times_ok)
- X {
- X /* At least one repetition required: insert before the loop
- X a skip over the initial on-failure-jump instruction */
- X insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
- X b += 3;
- X }
- X break;
- X
- X case '.':
- X laststart = b;
- X PATPUSH (anychar);
- X break;
- X
- X case '[':
- X if (b - bufp->buffer
- X > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X laststart = b;
- X if (*p == '^')
- X PATPUSH (charset_not), p++;
- X else
- X PATPUSH (charset);
- X p1 = p;
- X
- X PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Clear the whole map */
- X bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Read in characters and ranges, setting map bits */
- X while (1)
- X {
- X PATFETCH (c);
- X if (c == ']' && p != p1 + 1) break;
- X if (*p == '-')
- X {
- X PATFETCH (c1);
- X PATFETCH (c1);
- X while (c <= c1)
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
- X }
- X else
- X {
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
- X }
- X }
- X /* Discard any bitmap bytes that are all 0 at the end of the map.
- X Decrement the map-length byte too. */
- X while (b[-1] > 0 && b[b[-1] - 1] == 0)
- X b[-1]--;
- X b += b[-1];
- X break;
- X
- X case '\\':
- X if (p == pend) goto invalid_pattern;
- X PATFETCH_RAW (c);
- X switch (c)
- X {
- X case '(':
- X if (stackp == stacke) goto nesting_too_deep;
- X if (regnum < RE_NREGS)
- X {
- X PATPUSH (start_memory);
- X PATPUSH (regnum);
- X }
- X *stackp++ = b - bufp->buffer;
- X *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
- X *stackp++ = regnum++;
- X *stackp++ = begalt - bufp->buffer;
- X fixup_jump = 0;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- X case ')':
- X if (stackp == stackb) goto unmatched_close;
- X begalt = *--stackp + bufp->buffer;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X if (stackp[-1] < RE_NREGS)
- X {
- X PATPUSH (stop_memory);
- X PATPUSH (stackp[-1]);
- X }
- X stackp -= 2;
- X fixup_jump = 0;
- X if (*stackp)
- X fixup_jump = *stackp + bufp->buffer - 1;
- X laststart = *--stackp + bufp->buffer;
- X break;
- X
- X case '|':
- X insert_jump (on_failure_jump, begalt, b + 6, b);
- X pending_exact = 0;
- X b += 3;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X fixup_jump = b;
- X b += 3;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- #ifdef emacs
- X case '=':
- X PATPUSH (at_dot);
- X break;
- X
- X case 's':
- X laststart = b;
- X PATPUSH (syntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- X
- X case 'S':
- X laststart = b;
- X PATPUSH (notsyntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- #endif emacs
- X
- X case 'w':
- X laststart = b;
- X PATPUSH (wordchar);
- X break;
- X
- X case 'W':
- X laststart = b;
- X PATPUSH (notwordchar);
- X break;
- X
- X case '<':
- X PATPUSH (wordbeg);
- X break;
- X
- X case '>':
- X PATPUSH (wordend);
- X break;
- X
- X case 'b':
- X PATPUSH (wordbound);
- X break;
- X
- X case 'B':
- X PATPUSH (notwordbound);
- X break;
- X
- X case '`':
- X PATPUSH (begbuf);
- X break;
- X
- X case '\'':
- X PATPUSH (endbuf);
- X break;
- X
- X case '1':
- X case '2':
- X case '3':
- X case '4':
- X case '5':
- X case '6':
- X case '7':
- X case '8':
- X case '9':
- X c1 = c - '0';
- X if (c1 >= regnum)
- X goto normal_char;
- X for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
- X if (*stackt == c1)
- X goto normal_char;
- X laststart = b;
- X PATPUSH (duplicate);
- X PATPUSH (c1);
- X break;
- X default:
- X /* You might think it wuld be useful for \ to mean
- X not to translate; but if we don't translate it
- X it will never match anything. */
- X if (translate) c = translate[c];
- X goto normal_char;
- X }
- X break;
- X
- X default:
- X normal_char:
- X if (!pending_exact || pending_exact + *pending_exact + 1 != b
- X || *pending_exact == 0177 || *p == '*' || *p == '^'
- X || *p == '+' || *p == '?')
- X {
- X laststart = b;
- X PATPUSH (exactn);
- X pending_exact = b;
- X PATPUSH (0);
- X }
- X PATPUSH (c);
- X (*pending_exact)++;
- X }
- X }
- X
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X
- X if (stackp != stackb) goto unmatched_open;
- X
- X bufp->used = b - bufp->buffer;
- X return 0;
- X
- X invalid_pattern:
- X return "Invalid regular expression";
- X
- X unmatched_open:
- X return "Unmatched \\(";
- X
- X unmatched_close:
- X return "Unmatched \\)";
- X
- X end_of_pattern:
- X return "Premature end of regular expression";
- X
- X nesting_too_deep:
- X return "Nesting too deep";
- X
- X too_big:
- X return "Regular expression too big";
- X
- X memory_exhausted:
- X return "Memory exhausted";
- }
- X
- /* Store where `from' points a jump operation to jump to where `to' points.
- X `opcode' is the opcode to store. */
- X
- static int
- store_jump (from, opcode, to)
- X char *from, *to;
- X char opcode;
- {
- X from[0] = opcode;
- X from[1] = (to - (from + 3)) & 0377;
- X from[2] = (to - (from + 3)) >> 8;
- }
- X
- /* Open up space at char FROM, and insert there a jump to TO.
- X CURRENT_END gives te end of the storage no in use,
- X so we know how much data to copy up.
- X OP is the opcode of the jump to insert.
- X
- X If you call this function, you must zero out pending_exact. */
- X
- static int
- insert_jump (op, from, to, current_end)
- X char op;
- X char *from, *to, *current_end;
- {
- X register char *pto = current_end + 3;
- X register char *pfrom = current_end;
- X while (pfrom != from)
- X *--pto = *--pfrom;
- X store_jump (from, op, to);
- }
- X
- /* Given a pattern, compute a fastmap from it.
- X The fastmap records which of the (1 << BYTEWIDTH) possible characters
- X can start a string that matches the pattern.
- X This fastmap is used by re_search to skip quickly over totally
- X implausible text.
- X
- X The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
- X as bufp->fastmap.
- X The other components of bufp describe the pattern to be used. */
- X
- void
- re_compile_fastmap (bufp)
- X struct re_pattern_buffer *bufp;
- {
- X unsigned char *pattern = (unsigned char *) bufp->buffer;
- X int size = bufp->used;
- X register char *fastmap = bufp->fastmap;
- X register unsigned char *p = pattern;
- X register unsigned char *pend = pattern + size;
- X register int j, k;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X unsigned char *stackb[NFAILURES];
- X unsigned char **stackp = stackb;
- X
- X bzero (fastmap, (1 << BYTEWIDTH));
- X bufp->fastmap_accurate = 1;
- X bufp->can_be_null = 0;
- X
- X while (p)
- X {
- X if (p == pend)
- X {
- X bufp->can_be_null = 1;
- X break;
- X }
- #ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- #else
- X switch ((enum regexpcode) *p++)
- #endif
- X {
- X case exactn:
- X if (translate)
- X fastmap[translate[p[1]]] = 1;
- X else
- X fastmap[p[1]] = 1;
- X break;
- X
- X case begline:
- X case before_dot:
- X case at_dot:
- X case after_dot:
- X case begbuf:
- X case endbuf:
- X case wordbound:
- X case notwordbound:
- X case wordbeg:
- X case wordend:
- X continue;
- X
- X case endline:
- X if (translate)
- X fastmap[translate['\n']] = 1;
- X else
- X fastmap['\n'] = 1;
- X if (bufp->can_be_null != 1)
- X bufp->can_be_null = 2;
- X break;
- X
- X case finalize_jump:
- X case maybe_finalize_jump:
- X case jump:
- X case dummy_failure_jump:
- X bufp->can_be_null = 1;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (j > 0)
- X continue;
- X /* Jump backward reached implies we just went through
- X the body of a loop and matched nothing.
- X Opcode jumped to should be an on_failure_jump.
- X Just treat it like an ordinary jump.
- X For a * loop, it has pushed its failure point already;
- X if so, discard that as redundant. */
- X if ((enum regexpcode) *p != on_failure_jump)
- X continue;
- X p++;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (stackp != stackb && *stackp == p)
- X stackp--;
- X continue;
- X
- X case on_failure_jump:
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X *++stackp = p + j;
- X continue;
- X
- X case start_memory:
- X case stop_memory:
- X p++;
- X continue;
- X
- X case duplicate:
- X bufp->can_be_null = 1;
- X fastmap['\n'] = 1;
- X case anychar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (j != '\n')
- X fastmap[j] = 1;
- X if (bufp->can_be_null)
- X return;
- X /* Don't return; check the alternative paths
- X so we can set can_be_null if appropriate. */
- X break;
- X
- X case wordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == Sword)
- X fastmap[j] = 1;
- X break;
- X
- X case notwordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != Sword)
- X fastmap[j] = 1;
- X break;
- X
- #ifdef emacs
- X case syntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- X
- X case notsyntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- #endif emacs
- X
- X case charset:
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X
- X case charset_not:
- X /* Chars beyond end of map must be allowed */
- X for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X }
- X
- X /* Get here means we have successfully found the possible starting
- X characters of one path of the pattern. We need not follow this
- X path any farther. Instead, look at the next alternative
- X remembered in the stack. */
- X if (stackp != stackb)
- X p = *stackp--;
- X else
- X break;
- X }
- }
- X
- /* Like re_search_2, below, but only one string is specified. */
- X
- int
- re_search (pbufp, string, size, startpos, range, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, startpos, range;
- X struct re_registers *regs;
- {
- X return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
- }
- X
- /* Like re_match_2 but tries first a match starting at index STARTPOS,
- X then at STARTPOS + 1, and so on.
- X RANGE is the number of places to try before giving up.
- X If RANGE is negative, the starting positions tried are
- X STARTPOS, STARTPOS - 1, etc.
- X It is up to the caller to make sure that range is not so large
- X as to take the starting position outside of the input strings.
- X
- The value returned is the position at which the match was found,
- X or -1 if no match was found,
- X or -2 if error (such as failure stack overflow). */
- X
- int
- re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs,
- X mstop)
- X struct re_pattern_buffer *pbufp;
- X char *string1, *string2;
- X int size1, size2;
- X int startpos;
- X register int range;
- X struct re_registers *regs;
- X int mstop;
- {
- X register char *fastmap = pbufp->fastmap;
- X register unsigned char *translate = (unsigned char *) pbufp->translate;
- X int total = size1 + size2;
- X int val;
- X
- X /* Update the fastmap now if not correct already */
- X if (fastmap && !pbufp->fastmap_accurate)
- X re_compile_fastmap (pbufp);
- X
- X /* Don't waste time in a long search for a pattern
- X that says it is anchored. */
- X if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
- X && range > 0)
- X {
- X if (startpos > 0)
- X return -1;
- X else
- X range = 1;
- X }
- X
- X while (1)
- X {
- X /* If a fastmap is supplied, skip quickly over characters
- X that cannot possibly be the start of a match.
- X Note, however, that if the pattern can possibly match
- X the null string, we must test it at each starting point
- X so that we take the first null string we get. */
- X
- X if (fastmap && startpos < total && pbufp->can_be_null != 1)
- X {
- X if (range > 0)
- X {
- X register int lim = 0;
- X register unsigned char *p;
- X int irange = range;
- X if (startpos < size1 && startpos + range >= size1)
- X lim = range - (size1 - startpos);
- X
- X p = ((unsigned char *)
- X &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
- X
- X if (translate)
- X {
- X while (range > lim && !fastmap[translate[*p++]])
- X range--;
- X }
- X else
- X {
- X while (range > lim && !fastmap[*p++])
- X range--;
- X }
- X startpos += irange - range;
- X }
- X else
- X {
- X register unsigned char c;
- X if (startpos >= size1) c = string2[startpos - size1];
- X else c = string1[startpos];
- X c &= 0xff;
- X if (translate ? !fastmap[translate[c]] : !fastmap[c])
- X goto advance;
- X }
- X }
- X
- X if (range >= 0 && startpos == total
- X && fastmap && pbufp->can_be_null == 0)
- X return -1;
- X
- X val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
- X regs, mstop);
- X if (0 <= val)
- X {
- X if (val == -2)
- X return -2;
- X return startpos;
- X }
- X
- #ifdef C_ALLOCA
- X alloca (0);
- #endif /* C_ALLOCA */
- X
- X advance:
- X if (!range) break;
- X if (range > 0) range--, startpos++; else range++, startpos--;
- X }
- X return -1;
- }
- X
- #ifndef emacs /* emacs never uses this */
- int
- re_match (pbufp, string, size, pos, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, pos;
- X struct re_registers *regs;
- {
- X return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
- }
- #endif /* emacs */
- X
- /* Maximum size of failure stack. Beyond this, overflow is an error. */
- /*
- X * Modification: failure stack size increased from 2000 to FAILURE_STACK
- X * By : Po Cheung, po@cerc.utexas.edu
- X * Date : April 15, 1990
- X */
- X
- int re_max_failures = FAILURE_STACK;
- X
- /* Match the pattern described by PBUFP
- X against data which is the virtual concatenation of STRING1 and STRING2.
- X SIZE1 and SIZE2 are the sizes of the two data strings.
- X Start the match at position POS.
- X Do not consider matching past the position MSTOP.
- X
- X If pbufp->fastmap is nonzero, then it had better be up to date.
- X
- X The reason that the data to match are specified as two components
- X which are to be regarded as concatenated
- X is so this function can be used directly on the contents of an Emacs buffer.
- X
- X -1 is returned if there is no match. -2 is returned if there is
- X an error (such as match stack overflow). Otherwise the value is the length
- X of the substring which was matched. */
- X
- int
- re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
- X struct re_pattern_buffer *pbufp;
- X char *string1, *string2;
- X int size1, size2;
- X int pos;
- X struct re_registers *regs;
- X int mstop;
- {
- X register char *p = pbufp->buffer;
- X register char *pend = p + pbufp->used;
- X /* End of first string */
- X char *end1;
- X /* End of second string */
- X char *end2;
- X /* Pointer just past last char to consider matching */
- X char *end_match_1, *end_match_2;
- X register char *d, *dend;
- X register int mcnt;
- X unsigned char *translate = (unsigned char *) pbufp->translate;
- X
- X /* Failure point stack. Each place that can handle a failure further
- X down the line pushes a failure point on this stack. It consists of
- X two char *'s.
- X The first one pushed is where to resume scanning the pattern;
- X the second pushed is where to resume scanning the strings.
- X If the latter is zero, the failure point is a "dummy".
- X If a failure happens and the innermost failure point is dormant,
- X it discards that failure point and tries the next one. */
- X
- X char **stackb = (char **) alloca (2 * NFAILURES * sizeof (char *));
- X char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
- X
- X /* Information on the "contents" of registers.
- X These are pointers into the input strings; they record
- X just what was matched (on this attempt) by some part of the pattern.
- X The start_memory command stores the start of a register's contents
- X and the stop_memory command stores the end.
- X
- X At that point, regstart[regnum] points to the first character in the
- X register, regend[regnum] points to the first character beyond the end
- X of the register,
- X regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
- X and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
- X
- X char *regstart[RE_NREGS];
- X char *regend[RE_NREGS];
- X char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
- X
- X /* Set up pointers to ends of strings.
- X Don't allow the second string to be empty unless both are empty. */
- X if (!size2)
- X {
- X string2 = string1;
- X size2 = size1;
- X string1 = 0;
- X size1 = 0;
- X }
- X end1 = string1 + size1;
- X end2 = string2 + size2;
- X
- X /* Compute where to stop matching, within the two strings */
- X if (mstop <= size1)
- X {
- X end_match_1 = string1 + mstop;
- X end_match_2 = string2;
- X }
- X else
- X {
- X end_match_1 = end1;
- X end_match_2 = string2 + mstop - size1;
- X }
- X
- X /* Initialize \( and \) text positions to -1
- X to mark ones that no \( or \) has been seen for. */
- X
- X for (mcnt = 0; mcnt < sizeof (regstart) / sizeof (*regstart); mcnt++)
- X regstart[mcnt] = (char *) -1;
- X
- X /* `p' scans through the pattern as `d' scans through the data.
- X `dend' is the end of the input string that `d' points within.
- X `d' is advanced into the following input string whenever necessary,
- X but this happens before fetching;
- X therefore, at the beginning of the loop,
- X `d' can be pointing at the end of a string,
- X but it cannot equal string2. */
- X
- X if (pos <= size1)
- X d = string1 + pos, dend = end_match_1;
- X else
- X d = string2 + pos - size1, dend = end_match_2;
- X
- /* Write PREFETCH; just before fetching a character with *d. */
- #define PREFETCH \
- X while (d == dend) \
- X { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
- X d = string2; /* end of string1 => advance to string2. */ \
- X dend = end_match_2; }
- X
- X /* This loop loops over pattern commands.
- X It exits by returning from the function if match is complete,
- X or it drops through if match fails at this starting point in the
- X input data. */
- X
- X while (1)
- X {
- X if (p == pend)
- X /* End of pattern means we have succeeded! */
- X {
- X /* If caller wants register contents data back, convert it to indices */
- X if (regs)
- X {
- X regs->start[0] = pos;
- X if (dend == end_match_1)
- X regs->end[0] = d - string1;
- X else
- X regs->end[0] = d - string2 + size1;
- X for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
- X {
- X if (regstart[mcnt] == (char *) -1)
- X {
- X regs->start[mcnt] = -1;
- X regs->end[mcnt] = -1;
- X continue;
- X }
- X if (regstart_seg1[mcnt])
- X regs->start[mcnt] = regstart[mcnt] - string1;
- X else
- X regs->start[mcnt] = regstart[mcnt] - string2 + size1;
- X if (regend_seg1[mcnt])
- X regs->end[mcnt] = regend[mcnt] - string1;
- X else
- X regs->end[mcnt] = regend[mcnt] - string2 + size1;
- X }
- X }
- X if (dend == end_match_1)
- X return (d - string1 - pos);
- X else
- X return d - string2 + size1 - pos;
- X }
- X
- X /* Otherwise match next pattern command */
- #ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- #else
- X switch ((enum regexpcode) *p++)
- #endif
- X {
- X
- X /* \( is represented by a start_memory, \) by a stop_memory.
- X Both of those commands contain a "register number" argument.
- X The text matched within the \( and \) is recorded under that number.
- X Then, \<digit> turns into a `duplicate' command which
- X is followed by the numeric value of <digit> as the register number. */
- X
- X case start_memory:
- X regstart[*p] = d;
- X regstart_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case stop_memory:
- X regend[*p] = d;
- X regend_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case duplicate:
- X {
- X int regno = *p++; /* Get which register to match against */
- X register char *d2, *dend2;
- X
- X d2 = regstart[regno];
- X dend2 = (regstart_seg1[regno] == regend_seg1[regno])
- X ? regend[regno]
- X : end_match_1;
- X while (1)
- X {
- X /* Advance to next segment in register contents, if necessary */
- X while (d2 == dend2)
- X {
- X if (dend2 == end_match_2) break;
- X if (dend2 == regend[regno]) break;
- /* end of string1 => advance to string2. */
- X d2 = string2, dend2 = regend[regno];
- X }
- X /* At end of register contents => success */
- X if (d2 == dend2) break;
- X
- X /* Advance to next segment in data being matched, if necessary */
- X PREFETCH;
- X
- X /* mcnt gets # consecutive chars to compare */
- X mcnt = dend - d;
- X if (mcnt > dend2 - d2)
- X mcnt = dend2 - d2;
- X /* Compare that many; failure if mismatch, else skip them. */
- X if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
- X goto fail;
- X d += mcnt, d2 += mcnt;
- X }
- X }
- X break;
- X
- X case anychar:
- X /* fetch a data character */
- X PREFETCH;
- X /* Match anything but a newline. */
- X if ((translate ? translate[*(unsigned char *)d++] : *d++) == '\n')
- X goto fail;
- X break;
- X
- X case charset:
- X case charset_not:
- X {
- X /* Nonzero for charset_not */
- X int not = 0;
- X register int c;
- X if (*(p - 1) == (char) charset_not)
- X not = 1;
- X
- X /* fetch a data character */
- X PREFETCH;
- X
- X if (translate)
- X c = translate [*(unsigned char *)d];
- X else
- X c = *(unsigned char *)d;
- X
- X if (c < *p * BYTEWIDTH
- X && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X
- X p += 1 + *p;
- X
- X if (!not) goto fail;
- X d++;
- X break;
- X }
- X
- X case begline:
- X if (d == string1 || d[-1] == '\n')
- X break;
- X goto fail;
- X
- X case endline:
- X if (d == end2
- X || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
- X break;
- X goto fail;
- X
- X /* "or" constructs ("|") are handled by starting each alternative
- X with an on_failure_jump that points to the start of the next alternative.
- X Each alternative except the last ends with a jump to the joining point.
- X (Actually, each jump except for the last one really jumps
- X to the following jump, because tensioning the jumps is a hassle.) */
- X
- X /* The start of a stupid repeat has an on_failure_jump that points
- X past the end of the repeat text.
- X This makes a failure point so that, on failure to match a repetition,
- X matching restarts past as many repetitions have been found
- X with no way to fail and look for another one. */
- X
- X /* A smart repeat is similar but loops back to the on_failure_jump
- X so that each repetition makes another failure point. */
- X
- X case on_failure_jump:
- X if (stackp == stacke)
- X {
- X char **stackx;
- X if (stacke - stackb > re_max_failures)
- X return -2;
- X stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp += stackx - stackb;
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*p) << 8;
- X p++;
- X *stackp++ = mcnt + p;
- X *stackp++ = d;
- X break;
- X
- X /* The end of a smart repeat has an maybe_finalize_jump back.
- X Change it either to a finalize_jump or an ordinary jump. */
- X
- X case maybe_finalize_jump:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*p) << 8;
- X p++;
- X /* Compare what follows with the begining of the repeat.
- X If we can establish that there is nothing that they would
- X both match, we can change to finalize_jump */
- X if (p == pend)
- X p[-3] = (char) finalize_jump;
- X else if (*p == (char) exactn || *p == (char) endline)
- X {
- X register int c = *p == (char) endline ? '\n' : p[2];
- X register char *p1 = p + mcnt;
- X /* p1[0] ... p1[2] are an on_failure_jump.
- X Examine what follows that */
- X if (p1[3] == (char) exactn && p1[5] != c)
- X p[-3] = (char) finalize_jump;
- X else if (p1[3] == (char) charset || p1[3] == (char) charset_not)
- X {
- X int not = p1[3] == (char) charset_not;
- X if (c < p1[4] * BYTEWIDTH
- X && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X /* not is 1 if c would match */
- X /* That means it is not safe to finalize */
- X if (!not)
- X p[-3] = (char) finalize_jump;
- X }
- X }
- X p -= 2;
- X if (p[-1] != (char) finalize_jump)
- X {
- X p[-1] = (char) jump;
- X goto nofinalize;
- X }
- X
- X /* The end of a stupid repeat has a finalize-jump
- X back to the start, where another failure point will be made
- X which will point after all the repetitions found so far. */
- X
- X case finalize_jump:
- X stackp -= 2;
- X
- X case jump:
- X nofinalize:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*p) << 8;
- X p += mcnt + 1; /* The 1 compensates for missing ++ above */
- X break;
- X
- X case dummy_failure_jump:
- X if (stackp == stacke)
- X {
- X char **stackx = (char **) alloca (2 * (stacke - stackb) *
- X sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp += stackx - stackb;
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X *stackp++ = 0;
- X *stackp++ = 0;
- X goto nofinalize;
- X
- X case wordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X break;
- X if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *(unsigned char *)string2 :
- X *(unsigned char *)d) == Sword))
- X break;
- X goto fail;
- X
- X case notwordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X goto fail;
- X if ((SYNTAX (((unsigned char *)d)[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *(unsigned char *)string2 :
- X *(unsigned char *)d) == Sword))
- X goto fail;
- X break;
- X
- X case wordbeg:
- X if (d == end2 /* Points to end */
- X || (d == end1 && size2 == 0) /* Points to end */
- X || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) !=
- X Sword) /* Next char not a letter */
- X goto fail;
- X /* prev char not letter */
- SHAR_EOF
- true || echo 'restore of mxgdb/regex.c failed'
- fi
- echo 'End of part 7'
- echo 'File mxgdb/regex.c is continued in part 8'
- echo 8 > _shar_seq_.tmp
- exit 0
-