home *** CD-ROM | disk | FTP | other *** search
- /* String search routines for GNU Emacs.
- Copyright (C) 1985, 1986, 1987 Free Software Foundation, Inc.
-
- This file is part of GNU Emacs.
-
- GNU Emacs is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- GNU Emacs is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GNU Emacs; see the file COPYING. If not, write to
- the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-
- #include "config.h"
- #include "lisp.h"
- #include "syntax.h"
- #include "buffer.h"
- #include "commands.h"
- #include "regex.h"
-
- #define max(a, b) ((a) > (b) ? (a) : (b))
- #define min(a, b) ((a) < (b) ? (a) : (b))
-
- unsigned char downcase_table[01000] = {0}; /* folds upper to lower case */
- /* A WHEEL WILL FALL OFF IF, IN A trt, CHARACTER A */
- /* TRANSLATES INTO CHARACTER B AND CHARACTER B DOES NOT */
- /* ALSO TRANSLATE INTO CHARACTER B. */
- /* If that constraint is met, compute_trt_inverse will follow a */
- /* translation table with its inverse. The inverse of a table */
- /* follows the table at table[0400]. The form of this is that if */
- /* table[a]=b then the chain starting at table[0400+b], linked by */
- /* link(x)=table[0400+x] and ended by b must include a. */
-
- /* At present compute_trt_inverse is blinded and the inverse for this */
- /* particular table is created by a single-purpose loop. */
- /* compute_trt_inverse has been tested on the following cases: */
- /* trt[x]=x, trt[x]=(+ 3 (logand x, 0370)), trt[x]='a', and the */
- /* downcase table. */
-
- /* We compile regexps into this buffer and then use it for searching. */
-
- struct re_pattern_buffer searchbuf;
-
- extern int re_max_failures;
-
- char search_fastmap[0400];
-
- /* Last regexp we compiled */
-
- Lisp_Object last_regexp;
-
- /* Every call to re_match, etc., must pass &search_regs as the regs argument
- unless you can show it is unnecessary (i.e., if re_match is certainly going
- to be called again before region-around-match can be called). */
-
- static struct re_registers search_regs;
-
- /* error condition signalled when regexp compile_pattern fails */
-
- Lisp_Object Qinvalid_regexp;
-
- /* Compile a regexp and signal a Lisp error if anything goes wrong. */
-
- compile_pattern (pattern, bufp, translate)
- Lisp_Object pattern;
- struct re_pattern_buffer *bufp;
- char *translate;
- {
- char *val;
- Lisp_Object dummy;
-
- if (EQ (pattern, last_regexp)
- && translate == bufp->translate)
- return;
-
- last_regexp = Qnil;
- bufp->translate = translate;
- val = re_compile_pattern (XSTRING (pattern)->data,
- XSTRING (pattern)->size,
- bufp);
- if (val)
- {
- dummy = build_string (val);
- while (1)
- Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
- }
- last_regexp = pattern;
- return;
- }
-
- /* Error condition used for failing searches */
- Lisp_Object Qsearch_failed;
-
- Lisp_Object
- signal_failure (arg)
- Lisp_Object arg;
- {
- Fsignal (Qsearch_failed, Fcons (arg, Qnil));
- return Qnil;
- }
-
- DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
- "t if text after point matches regular expression PAT.")
- (string)
- Lisp_Object string;
- {
- Lisp_Object val;
- unsigned char *p1, *p2;
- int s1, s2;
- register int i;
-
- CHECK_STRING (string, 0);
- compile_pattern (string, &searchbuf,
- !NULL (current_buffer->case_fold_search) ? (char *) downcase_table : 0);
-
- immediate_quit = 1;
- QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
-
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
-
- p1 = BEGV_ADDR;
- s1 = GPT - BEGV;
- p2 = GAP_END_ADDR;
- s2 = ZV - GPT;
- if (s1 < 0)
- {
- p2 = p1;
- s2 = ZV - BEGV;
- s1 = 0;
- }
- if (s2 < 0)
- {
- s1 = ZV - BEGV;
- s2 = 0;
- }
-
- val = (0 <= re_match_2 (&searchbuf, p1, s1, p2, s2,
- point - BEGV, &search_regs, ZV - BEGV)
- ? Qt : Qnil);
- for (i = 0; i < RE_NREGS; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += BEGV;
- search_regs.end[i] += BEGV;
- }
- immediate_quit = 0;
- return val;
- }
-
- DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
- "Return index of start of first match for REGEXP in STRING, or nil.\n\
- If third arg START is non-nil, start search at that index in STRING.\n\
- For index of first char beyond the match, do (match-end 0).\n\
- match-end and match-beginning also give indices of substrings\n\
- matched by parenthesis constructs in the pattern.")
- (regexp, string, start)
- Lisp_Object regexp, string, start;
- {
- int val;
- int s;
-
- CHECK_STRING (regexp, 0);
- CHECK_STRING (string, 1);
-
- if (NULL (start))
- s = 0;
- else
- {
- int len = XSTRING (string)->size;
-
- CHECK_NUMBER (start, 2);
- s = XINT (start);
- if (s < 0 && -s <= len)
- s = len - s;
- else if (0 > s || s > len)
- args_out_of_range (string, start);
- }
-
- compile_pattern (regexp, &searchbuf,
- !NULL (current_buffer->case_fold_search) ? (char *) downcase_table : 0);
- immediate_quit = 1;
- val = re_search (&searchbuf, XSTRING (string)->data, XSTRING (string)->size,
- s, XSTRING (string)->size - s, &search_regs);
- immediate_quit = 0;
- if (val == -2)
- error ("Overflow in regular expression matching");
- if (val < 0) return Qnil;
- return make_number (val);
- }
-
- scan_buffer (target, pos, cnt, shortage)
- int *shortage, pos;
- register int cnt, target;
- {
- int lim = ((cnt > 0) ? ZV - 1 : BEGV);
- int direction = ((cnt > 0) ? 1 : -1);
- register int lim0;
- unsigned char *base;
- register unsigned char *cursor, *limit;
-
- if (shortage != 0)
- *shortage = 0;
-
- immediate_quit = 1;
-
- if (cnt > 0)
- while (pos != lim + 1)
- {
- lim0 = BufferSafeCeiling (pos);
- lim0 = min (lim, lim0);
- limit = &FETCH_CHAR (lim0) + 1;
- base = (cursor = &FETCH_CHAR (pos));
- while (1)
- {
- while (*cursor != target && ++cursor != limit)
- ;
- if (cursor != limit)
- {
- if (--cnt == 0)
- {
- immediate_quit = 0;
- return (pos + cursor - base + 1);
- }
- else
- if (++cursor == limit)
- break;
- }
- else
- break;
- }
- pos += cursor - base;
- }
- else
- {
- pos--; /* first character we scan */
- while (pos > lim - 1)
- { /* we WILL scan under pos */
- lim0 = BufferSafeFloor (pos);
- lim0 = max (lim, lim0);
- limit = &FETCH_CHAR (lim0) - 1;
- base = (cursor = &FETCH_CHAR (pos));
- cursor++;
- while (1)
- {
- while (--cursor != limit && *cursor != target)
- ;
- if (cursor != limit)
- {
- if (++cnt == 0)
- {
- immediate_quit = 0;
- return (pos + cursor - base + 1);
- }
- }
- else
- break;
- }
- pos += cursor - base;
- }
- }
- immediate_quit = 0;
- if (shortage != 0)
- *shortage = cnt * direction;
- return (pos + ((direction == 1 ? 0 : 1)));
- }
-
- int
- find_next_newline (from, cnt)
- register int from, cnt;
- {
- return (scan_buffer ('\n', from, cnt, (int *) 0));
- }
-
- DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
- "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
- CHARS is like the inside of a [...] in a regular expression\n\
- except that ] is never special and \\ quotes ^, - or \\.\n\
- Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
- With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
- (string, lim)
- Lisp_Object string, lim;
- {
- skip_chars (1, string, lim);
- return Qnil;
- }
-
- DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
- "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
- See skip-chars-forward for details.")
- (string, lim)
- Lisp_Object string, lim;
- {
- skip_chars (0, string, lim);
- return Qnil;
- }
-
- skip_chars (forwardp, string, lim)
- int forwardp;
- Lisp_Object string, lim;
- {
- register unsigned char *p, *pend;
- register unsigned char c;
- unsigned char fastmap[0400];
- int negate = 0;
- register int i;
-
- CHECK_STRING (string, 0);
-
- if (NULL (lim))
- XFASTINT (lim) = forwardp ? ZV : BEGV;
- else
- CHECK_NUMBER_COERCE_MARKER (lim, 1);
-
- /* In any case, don't allow scan outside bounds of buffer. */
- if (XFASTINT (lim) > ZV)
- XFASTINT (lim) = ZV;
- if (XFASTINT (lim) < BEGV)
- XFASTINT (lim) = BEGV;
-
- p = XSTRING (string)->data;
- pend = p + XSTRING (string)->size;
- bzero (fastmap, sizeof fastmap);
-
- if (p != pend && *p == '^')
- {
- negate = 1; p++;
- }
-
- /* Find the characters specified and set their elements of fastmap. */
-
- while (p != pend)
- {
- c = *p++;
- if (c == '\\')
- {
- if (p == pend) break;
- c = *p++;
- }
- if (p != pend && *p == '-')
- {
- p++;
- if (p == pend) break;
- while (c <= *p)
- {
- fastmap[c] = 1;
- c++;
- }
- p++;
- }
- else
- fastmap[c] = 1;
- }
-
- /* If ^ was the first character, complement the fastmap. */
-
- if (negate)
- for (i = 0; i < sizeof fastmap; i++)
- fastmap[i] ^= 1;
-
- immediate_quit = 1;
- if (forwardp)
- {
- while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
- SET_PT (point + 1);
- }
- else
- {
- while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
- SET_PT (point - 1);
- }
- immediate_quit = 0;
- }
-
- /* Subroutines of Lisp buffer search functions. */
-
- static Lisp_Object
- search_command (string, bound, noerror, count, direction, RE)
- Lisp_Object string, bound, noerror, count;
- int direction;
- int RE;
- {
- register int np;
- int lim;
- int n = direction;
-
- if (!NULL (count))
- {
- CHECK_NUMBER (count, 3);
- n *= XINT (count);
- }
-
- CHECK_STRING (string, 0);
- if (NULL (bound))
- lim = n > 0 ? ZV : BEGV;
- else
- {
- CHECK_NUMBER_COERCE_MARKER (bound, 1);
- lim = XINT (bound);
- if (n > 0 ? lim < point : lim > point)
- error ("Invalid search bound (wrong side of point)");
- if (lim > ZV)
- lim = ZV;
- if (lim < BEGV)
- lim = BEGV;
- }
-
- np = search_buffer (string, point, lim, n, RE,
- !NULL (current_buffer->case_fold_search) ? downcase_table : 0);
- if (np <= 0)
- {
- if (NULL (noerror))
- return signal_failure (string);
- if (!EQ (noerror, Qt))
- {
- if (lim < BEGV || lim > ZV)
- abort ();
- SET_PT (lim);
- }
- return Qnil;
- }
-
- if (np < BEGV || np > ZV)
- abort ();
-
- SET_PT (np);
-
- return Qt;
- }
-
- /* search for the n'th occurrence of `string' in the current buffer,
- starting at position `from' and stopping at position `lim',
- treating `pat' as a literal string if `RE' is false or as
- a regular expression if `RE' is true.
-
- If `n' is positive, searching is forward and `lim' must be greater than `from'.
- If `n' is negative, searching is backward and `lim' must be less than `from'.
-
- Returns -x if only `n'-x occurrences found (x > 0),
- or else the position at the beginning of the `n'th occurrence (if searching backward)
- or the end (if searching forward). */
-
- /* INTERFACE CHANGE ALERT!!!! search_buffer now returns -x if only */
- /* n-x occurences are found. */
-
- search_buffer (string, pos, lim, n, RE, trt)
- Lisp_Object string;
- int pos;
- int lim;
- int n;
- int RE;
- register unsigned char *trt;
- {
- int len = XSTRING (string)->size;
- unsigned char *base_pat = XSTRING (string)->data;
- register int *BM_tab;
- int *BM_tab_base;
- register int direction = ((n > 0) ? 1 : -1);
- register int dirlen;
- int infinity, limit, k, stride_for_teases;
- register unsigned char *pat, *cursor, *p_limit;
- register int i, j;
- unsigned char *p1, *p2;
- int s1, s2;
-
-
- if (!len)
- return (0);
-
- if (RE)
- compile_pattern (string, &searchbuf, (char *) trt);
-
- if (RE /* Here we detect whether the */
- /* generality of an RE search is */
- /* really needed. */
- && *(searchbuf.buffer) == (char) exactn /* first item is "exact match" */
- && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
- {
- RE = 0; /* can do straight (non RE) search */
- pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
- /* trt already applied */
- len = searchbuf.used - 2;
- }
- else if (!RE)
- {
- pat = (unsigned char *) alloca (len);
-
- for (i = len; i--;) /* Copy the pattern; apply trt */
- *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
- pat -= len; base_pat = pat;
- }
-
- if (RE)
- {
- immediate_quit = 1; /* Quit immediately if user types ^G,
- because letting this function finish
- can take too long. */
- QUIT; /* Do a pending quit right away,
- to avoid paradoxical behavior */
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
-
- p1 = BEGV_ADDR;
- s1 = GPT - BEGV;
- p2 = GAP_END_ADDR;
- s2 = ZV - GPT;
- if (s1 < 0)
- {
- p2 = p1;
- s2 = ZV - BEGV;
- s1 = 0;
- }
- if (s2 < 0)
- {
- s1 = ZV - BEGV;
- s2 = 0;
- }
- while (n < 0)
- {
- int value = re_search_2 (&searchbuf, p1, s1, p2, s2,
- pos - BEGV, lim - pos, &search_regs,
- /* Don't allow match past current point */
- pos - BEGV);
- if (value == -2)
- error ("Overflow in regular expression matching");
-
- if (value >= 0)
- {
- j = BEGV;
- for (i = 0; i < RE_NREGS; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += j;
- search_regs.end[i] += j;
- }
- /* Set pos to the new position. */
- pos = search_regs.start[0];
- }
- else
- {
- immediate_quit = 0;
- return (n);
- }
- n++;
- }
- while (n > 0)
- {
- int value = re_search_2 (&searchbuf, p1, s1, p2, s2,
- pos - BEGV, lim - pos, &search_regs,
- lim - BEGV);
- if (value == -2)
- error ("Overflow in regular expression matching");
-
- if (value >= 0)
- {
- j = BEGV;
- for (i = 0; i < RE_NREGS; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i] += j;
- search_regs.end[i] += j;
- }
- pos = search_regs.end[0];
- }
- else
- {
- immediate_quit = 0;
- return (0 - n);
- }
- n--;
- }
- immediate_quit = 0;
- return (pos);
- }
- else /* non-RE case */
- {
- #ifdef C_ALLOCA
- int BM_tab_space[0400];
- BM_tab = &BM_tab_space[0];
- #else
- BM_tab = (int *) alloca (0400 * sizeof (int));
- #endif
- /* The general approach is that we are going to maintain that we know */
- /* the first (closest to the present position, in whatever direction */
- /* we're searching) character that could possibly be the last */
- /* (furthest from present position) character of a valid match. We */
- /* advance the state of our knowledge by looking at that character */
- /* and seeing whether it indeed matches the last character of the */
- /* pattern. If it does, we take a closer look. If it does not, we */
- /* move our pointer (to putative last characters) as far as is */
- /* logically possible. This amount of movement, which I call a */
- /* stride, will be the length of the pattern if the actual character */
- /* appears nowhere in the pattern, otherwise it will be the distance */
- /* from the last occurrence of that character to the end of the */
- /* pattern. */
- /* As a coding trick, an enormous stride is coded into the table for */
- /* characters that match the last character. This allows use of only */
- /* a single test, a test for having gone past the end of the */
- /* permissible match region, to test for both possible matches (when */
- /* the stride goes past the end immediately) and failure to */
- /* match (where you get nudged past the end one stride at a time). */
-
- /* Here we make a "mickey mouse" BM table. The stride of the search */
- /* is determined only by the last character of the putative match. */
- /* If that character does not match, we will stride the proper */
- /* distance to propose a match that superimposes it on the last */
- /* instance of a character that matches it (per trt), or misses */
- /* it entirely if there is none. */
-
- dirlen = len * direction;
- infinity = dirlen - (lim + pos + len + len) * direction;
- if (direction < 0)
- pat = (base_pat += len - 1);
- BM_tab_base = BM_tab;
- BM_tab += 0400;
- j = dirlen; /* to get it in a register */
- /* A character that does not appear in the pattern induces a */
- /* stride equal to the pattern length. */
- while (BM_tab_base != BM_tab)
- {
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- *--BM_tab = j;
- }
- i = 0;
- while (i != infinity)
- {
- j = pat[i]; i += direction;
- if (i == dirlen) i = infinity;
- if ((int) trt)
- {
- k = (j = trt[j]);
- if (i == infinity)
- stride_for_teases = BM_tab[j];
- BM_tab[j] = dirlen - i;
- /* A translation table is followed by its inverse -- see */
- /* comment following downcase_table for details */
-
- while ((j = trt[0400+j]) != k)
- BM_tab[j] = dirlen - i;
- }
- else
- {
- if (i == infinity)
- stride_for_teases = BM_tab[j];
- BM_tab[j] = dirlen - i;
- }
- /* stride_for_teases tells how much to stride if we get a */
- /* match on the far character but are subsequently */
- /* disappointed, by recording what the stride would have been */
- /* for that character if the last character had been */
- /* different. */
- }
- infinity = dirlen - infinity;
- pos += dirlen - ((direction > 0) ? direction : 0);
- /* loop invariant - pos points at where last char (first char if reverse)
- of pattern would align in a possible match. */
- while (n != 0)
- {
- if ((lim - pos - (direction > 0)) * direction < 0)
- return (n * (0 - direction));
- /* First we do the part we can by pointers (maybe nothing) */
- QUIT;
- pat = base_pat;
- limit = pos - dirlen + direction;
- limit = ((direction > 0)
- ? BufferSafeCeiling (limit)
- : BufferSafeFloor (limit));
- /* LIMIT is now the last (not beyond-last!) value
- POS can take on without hitting edge of buffer or the gap. */
- limit = ((direction > 0)
- ? min (lim - 1, min (limit, pos + 20000))
- : max (lim, max (limit, pos - 20000)));
- if ((limit - pos) * direction > 20)
- {
- p_limit = &FETCH_CHAR (limit);
- p2 = (cursor = &FETCH_CHAR (pos));
- /* In this loop, pos + cursor - p2 is the surrogate for pos */
- while (1) /* use one cursor setting as long as i can */
- {
- if (direction > 0) /* worth duplicating */
- {
- /* Use signed comparison if appropriate
- to make cursor+infinity sure to be > p_limit.
- Assuming that the buffer lies in a range of addresses
- that are all "positive" (as ints) or all "negative",
- either kind of comparison will work as long
- as we don't step by infinity. So pick the kind
- that works when we do step by infinity. */
- if ((int) (p_limit + infinity) > (int) p_limit)
- while ((int) cursor <= (int) p_limit)
- cursor += BM_tab[*cursor];
- else
- while ((unsigned int) cursor <= (unsigned int) p_limit)
- cursor += BM_tab[*cursor];
- }
- else
- {
- if ((int) (p_limit + infinity) < (int) p_limit)
- while ((int) cursor >= (int) p_limit)
- cursor += BM_tab[*cursor];
- else
- while ((unsigned int) cursor >= (unsigned int) p_limit)
- cursor += BM_tab[*cursor];
- }
- /* If you are here, cursor is beyond the end of the searched region. */
- /* This can happen if you match on the far character of the pattern, */
- /* because the "stride" of that character is infinity, a number able */
- /* to throw you well beyond the end of the search. It can also */
- /* happen if you fail to match within the permitted region and would */
- /* otherwise try a character beyond that region */
- if ((cursor - p_limit) * direction <= len)
- break; /* a small overrun is genuine */
- cursor -= infinity; /* large overrun = hit */
- i = dirlen - direction;
- if ((int) trt)
- {
- while ((i -= direction) + direction != 0)
- if (pat[i] != trt[*(cursor -= direction)])
- break;
- }
- else
- {
- while ((i -= direction) + direction != 0)
- if (pat[i] != *(cursor -= direction))
- break;
- }
- cursor += dirlen - i - direction; /* fix cursor */
- if (i + direction == 0)
- {
- cursor -= direction;
- search_regs.start[0]
- = pos + cursor - p2 + ((direction > 0)
- ? 1 - len : 0);
- search_regs.end[0] = len + search_regs.start[0];
- if ((n -= direction) != 0)
- cursor += dirlen; /* to resume search */
- else
- return ((direction > 0)
- ? search_regs.end[0] : search_regs.start[0]);
- }
- else
- cursor += stride_for_teases; /* <sigh> we lose - */
- }
- pos += cursor - p2;
- }
- else
- /* Now we'll pick up a clump that has to be done the hard */
- /* way because it covers a discontinuity */
- {
- limit = ((direction > 0)
- ? BufferSafeCeiling (pos - dirlen + 1)
- : BufferSafeFloor (pos - dirlen - 1));
- limit = ((direction > 0)
- ? min (limit + len, lim - 1)
- : max (limit - len, lim));
- /* LIMIT is now the last value POS can have
- and still be valid for a possible match. */
- while (1)
- {
- /* This loop can be coded for space rather than */
- /* speed because it will usually run only once. */
- /* (the reach is at most len + 21, and typically */
- /* does not exceed len) */
- while ((limit - pos) * direction >= 0)
- pos += BM_tab[FETCH_CHAR(pos)];
- /* now run the same tests to distinguish going off the */
- /* end, a match or a phoney match. */
- if ((pos - limit) * direction <= len)
- break; /* ran off the end */
- /* Found what might be a match.
- Set POS back to last (first if reverse) char pos. */
- pos -= infinity;
- i = dirlen - direction;
- while ((i -= direction) + direction != 0)
- {
- pos -= direction;
- if (pat[i] != (((int) trt)
- ? trt[FETCH_CHAR(pos)]
- : FETCH_CHAR (pos)))
- break;
- }
- /* Above loop has moved POS part or all the way
- back to the first char pos (last char pos if reverse).
- Set it once again at the last (first if reverse) char. */
- pos += dirlen - i- direction;
- if (i + direction == 0)
- {
- pos -= direction;
- search_regs.start[0]
- = pos + ((direction > 0) ? 1 - len : 0);
- search_regs.end[0] = len + search_regs.start[0];
- if ((n -= direction) != 0)
- pos += dirlen; /* to resume search */
- else
- return ((direction > 0)
- ? search_regs.end[0] : search_regs.start[0]);
- }
- else
- pos += stride_for_teases;
- }
- }
- /* We have done one clump. Can we continue? */
- if ((lim - pos) * direction < 0)
- return ((0 - n) * direction);
- }
- return pos;
- }
- }
-
- /* Given a string of words separated by word delimiters,
- compute a regexp that matches those exact words
- separated by arbitrary punctuation. */
-
- static Lisp_Object
- wordify (string)
- Lisp_Object string;
- {
- register unsigned char *p, *o;
- register int i, len, punct_count = 0, word_count = 0;
- Lisp_Object val;
-
- CHECK_STRING (string, 0);
- p = XSTRING (string)->data;
- len = XSTRING (string)->size;
-
- for (i = 0; i < len; i++)
- if (SYNTAX (p[i]) != Sword)
- {
- punct_count++;
- if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
- }
- if (SYNTAX (p[len-1]) == Sword) word_count++;
- if (!word_count) return build_string ("");
-
- val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
-
- o = XSTRING (val)->data;
- *o++ = '\\';
- *o++ = 'b';
-
- for (i = 0; i < len; i++)
- if (SYNTAX (p[i]) == Sword)
- *o++ = p[i];
- else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
- {
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '*';
- }
-
- *o++ = '\\';
- *o++ = 'b';
-
- return val;
- }
-
- DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
- "sSearch backward: ",
- "Search backward from point for STRING.\n\
- Set point to the beginning of the occurrence found, and return t.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend before that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, position at limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions match-beginning and match-end and replace-match.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (string, bound, noerror, count, -1, 0);
- }
-
- DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
- "Search forward from point for STRING.\n\
- Set point to the end of the occurrence found, and return t.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions match-beginning and match-end and replace-match.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (string, bound, noerror, count, 1, 0);
- }
-
- DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
- "sWord search backward: ",
- "Search backward from point for STRING, ignoring differences in punctuation.\n\
- Set point to the beginning of the occurrence found, and return t.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend before that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (wordify (string), bound, noerror, count, -1, 1);
- }
-
- DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
- "sWord search: ",
- "Search forward from point for STRING, ignoring differences in punctuation.\n\
- Set point to the end of the occurrence found, and return t.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (wordify (string), bound, noerror, count, 1, 1);
- }
-
- DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
- "sRE search backward: ",
- "Search backward from point for match for regular expression REGEXP.\n\
- Set point to the beginning of the match, and return t.\n\
- The match found is the one starting last in the buffer\n\
- and yet ending before the place the origin of the search.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must start at or after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions match-beginning and match-end and replace-match.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (string, bound, noerror, count, -1, 1);
- }
-
- DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
- "sRE search: ",
- "Search forward from point for regular expression REGEXP.\n\
- Set point to the end of the occurrence found, and return t.\n\
- An optional second argument bounds the search; it is a buffer position.\n\
- The match found must not extend after that position.\n\
- Optional third argument, if t, means if fail just return nil (no error).\n\
- If not nil and not t, move to limit of search and return nil.\n\
- Optional fourth argument is repeat count--search for successive occurrences.\n\
- See also the functions match-beginning and match-end and replace-match.")
- (string, bound, noerror, count)
- Lisp_Object string, bound, noerror, count;
- {
- return search_command (string, bound, noerror, count, 1, 1);
- }
-
- DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
- "Replace text matched by last search with NEWTEXT.\n\
- If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
- Otherwise convert to all caps or cap initials, like replaced text.\n\
- If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
- Otherwise treat \\ as special:\n\
- \\& in NEWTEXT means substitute original matched text,\n\
- \\N means substitute match for \\(...\\) number N,\n\
- \\\\ means insert one \\.\n\
- Leaves point at end of replacement text.")
- (string, fixedcase, literal)
- Lisp_Object string, fixedcase, literal;
- {
- enum { nochange, all_caps, cap_initial } case_action;
- register int pos, last;
- int some_multiletter_word;
- int some_letter = 0;
- register int c, prevc;
- int inslen;
-
- CHECK_STRING (string, 0);
-
- case_action = nochange; /* We tried an initialization */
- /* but some C compilers blew it */
- if (search_regs.start[0] < BEGV
- || search_regs.start[0] > search_regs.end[0]
- || search_regs.end[0] > ZV)
- args_out_of_range(make_number (search_regs.start[0]),
- make_number (search_regs.end[0]));
-
- if (NULL (fixedcase))
- {
- /* Decide how to casify by examining the matched text. */
-
- last = search_regs.end[0];
- prevc = '\n';
- case_action = all_caps;
-
- /* some_multiletter_word is set nonzero if any original word
- is more than one letter long. */
- some_multiletter_word = 0;
-
- for (pos = search_regs.start[0]; pos < last; pos++)
- {
- c = FETCH_CHAR (pos);
- if (LOWERCASEP (c))
- {
- /* Cannot be all caps if any original char is lower case */
-
- case_action = cap_initial;
- if (SYNTAX (prevc) != Sword)
- {
- /* Cannot even be cap initials
- if some original initial is lower case */
- case_action = nochange;
- break;
- }
- else
- some_multiletter_word = 1;
- }
- else if (!NOCASEP (c))
- {
- some_letter = 1;
- if (!some_multiletter_word && SYNTAX (prevc) == Sword)
- some_multiletter_word = 1;
- }
-
- prevc = c;
- }
-
- /* Do not make new text all caps
- if the original text contained only single letter words. */
- if (case_action == all_caps && !some_multiletter_word)
- case_action = cap_initial;
-
- if (!some_letter) case_action = nochange;
- }
-
- SET_PT (search_regs.end[0]);
- if (!NULL (literal))
- Finsert (1, &string);
- else
- {
- struct gcpro gcpro1;
- GCPRO1 (string);
- for (pos = 0; pos < XSTRING (string)->size; pos++)
- {
- c = XSTRING (string)->data[pos];
- if (c == '\\')
- {
- c = XSTRING (string)->data[++pos];
- if (c == '&')
- Finsert_buffer_substring (Fcurrent_buffer (),
- make_number (search_regs.start[0]),
- make_number (search_regs.end[0]));
- else if (c >= '1' && c <= RE_NREGS + '0')
- {
- if (search_regs.start[c - '0'] >= 1)
- Finsert_buffer_substring (Fcurrent_buffer (),
- make_number (search_regs.start[c - '0']),
- make_number (search_regs.end[c - '0']));
- }
- else
- insert_char (c);
- }
- else
- insert_char (c);
- }
- UNGCPRO;
- }
-
- inslen = point - (search_regs.end[0]);
- del_range (search_regs.start[0], search_regs.end[0]);
-
- if (case_action == all_caps)
- Fupcase_region (make_number (point - inslen), make_number (point));
- else if (case_action == cap_initial)
- upcase_initials_region (make_number (point - inslen), make_number (point));
- return Qnil;
- }
-
- static Lisp_Object
- match_limit (num, beginningp)
- Lisp_Object num;
- int beginningp;
- {
- register int n;
-
- CHECK_NUMBER (num, 0);
- n = XINT (num);
- if (n < 0 || n >= RE_NREGS)
- args_out_of_range (num, make_number (RE_NREGS));
- if (search_regs.start[n] < 0)
- return Qnil;
- return (make_number ((beginningp) ? search_regs.start[n]
- : search_regs.end[n]));
- }
-
- DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
- "Return the character number of start of text matched by last search.\n\
- ARG, a number, specifies which parenthesized expression in the last regexp.\n\
- Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
- Zero means the entire text matched by the whole regexp or whole string.")
- (num)
- Lisp_Object num;
- {
- return match_limit (num, 1);
- }
-
- DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
- "Return the character number of end of text matched by last search.\n\
- ARG, a number, specifies which parenthesized expression in the last regexp.\n\
- Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
- Zero means the entire text matched by the whole regexp or whole string.")
- (num)
- Lisp_Object num;
- {
- return match_limit (num, 0);
- }
-
- DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
- "Return list containing all info on what the last search matched.\n\
- Element 2N is (match-beginning N); element 2N + 1 is (match-end N).\n\
- All the elements are normally markers, or nil if the Nth pair didn't match.\n\
- 0 is also possible, when matching was done with `string-match',\n\
- if a match began at index 0 in the string.")
- ()
- {
- Lisp_Object data[2 * RE_NREGS];
- int i, len;
-
- len = -1;
- for (i = 0; i < RE_NREGS; i++)
- {
- int start = search_regs.start[i];
- if (start >= 0)
- {
- /* Use an integer if the value is out of range for the
- size of the current buffer. */
- if (start < BEG || start > Z)
- XFASTINT (data[2 * i]) = start;
- else
- {
- data[2 * i] = Fmake_marker ();
- Fset_marker (data[2 * i], make_number (start), Qnil);
- }
-
- if (search_regs.end[i] < BEG || search_regs.end[i] > Z)
- XFASTINT (data[2 * i + 1]) = search_regs.end[i];
- else
- {
- data[2 * i + 1] = Fmake_marker ();
- Fset_marker (data[2 * i + 1],
- make_number (search_regs.end[i]), Qnil);
- }
- len = i;
- }
- else
- data[2 * i] = data [2 * i + 1] = Qnil;
- }
- return Flist (2 * len + 2, data);
- }
-
-
- DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
- "Set internal data on last search match from elements of LIST.\n\
- LIST should have been created by calling match-data previously.")
- (list)
- register Lisp_Object list;
- {
- register int i;
- register Lisp_Object marker;
-
- if (!CONSP (list) && !NULL (list))
- list = wrong_type_argument (Qconsp, list, 0);
-
- for (i = 0; i < RE_NREGS; i++)
- {
- marker = Fcar (list);
- if (NULL (marker))
- {
- search_regs.start[i] = -1;
- list = Fcdr (list);
- }
- else
- {
- if (XTYPE (marker) == Lisp_Marker
- && XMARKER (marker)->buffer == 0)
- XFASTINT (marker) = 0;
-
- CHECK_NUMBER_COERCE_MARKER (marker, 0);
- search_regs.start[i] = XINT (marker);
- list = Fcdr (list);
-
- marker = Fcar (list);
- if (XTYPE (marker) == Lisp_Marker
- && XMARKER (marker)->buffer == 0)
- XFASTINT (marker) = 0;
-
- CHECK_NUMBER_COERCE_MARKER (marker, 0);
- search_regs.end[i] = XINT (marker);
- }
- list = Fcdr (list);
- }
-
- return Qnil;
- }
-
- /* Quote a string to inactivate reg-expr chars */
-
- DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
- "Return a regexp string which matches exactly STRING and nothing else.")
- (str)
- Lisp_Object str;
- {
- register unsigned char *in, *out, *end;
- register unsigned char *temp;
-
- CHECK_STRING (str, 0);
-
- temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
-
- /* Now copy the data into the new string, inserting escapes. */
-
- in = XSTRING (str)->data;
- end = in + XSTRING (str)->size;
- out = temp;
-
- for (; in != end; in++)
- {
- if (*in == '[' || *in == ']'
- || *in == '*' || *in == '.' || *in == '\\'
- || *in == '?' || *in == '+'
- || *in == '^' || *in == '$')
- *out++ = '\\';
- *out++ = *in;
- }
-
- return make_string (temp, out - temp);
- }
-
- /* This code should be unzapped when there comes to be multiple */
- /* translation tables. It has been certified on various cases. */
- /*
- void
- compute_trt_inverse (trt)
- register unsigned char *trt;
- {
- register int i = 0400;
- register unsigned char c, q;
-
- while (i--)
- trt[0400+i] = i;
- i = 0400;
- while (i--)
- {
- if ((q = trt[i]) != (unsigned char) i)
- {
- c = trt[q + 0400];
- trt[q + 0400] = i;
- trt[0400 + i] = c;
- }
- }
- }
- */
-
- syms_of_search ()
- {
- register int i;
-
- /* Avoid running out of regexp stack quite so soon. */
- re_max_failures = 10000;
-
- for (i = 0; i < 0400; i++)
- {
- downcase_table[i] = (i >= 'A' && i <= 'Z') ? i + 040 : i;
- /* We do this instead of using compute_trt_inverse to save space. */
- /* Does it? */
- downcase_table[0400+i]
- = ((i >= 'A' && i <= 'Z')
- ? i + ('a' - 'A')
- : ((i >= 'a' && i <= 'z')
- ? i + ('A' - 'a')
- : i));
- }
- /* Use this instead when there come to be multiple translation tables.
- compute_trt_inverse (downcase_table); */
-
- searchbuf.allocated = 100;
- searchbuf.buffer = (char *) malloc (searchbuf.allocated);
- searchbuf.fastmap = search_fastmap;
-
- Qsearch_failed = intern ("search-failed");
- staticpro (&Qsearch_failed);
- Qinvalid_regexp = intern ("invalid-regexp");
- staticpro (&Qinvalid_regexp);
-
- Fput (Qsearch_failed, Qerror_conditions,
- Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
- Fput (Qsearch_failed, Qerror_message,
- build_string ("Search failed"));
-
- Fput (Qinvalid_regexp, Qerror_conditions,
- Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
- Fput (Qinvalid_regexp, Qerror_message,
- build_string ("Invalid regexp"));
-
- last_regexp = Qnil;
- staticpro (&last_regexp);
-
- defsubr (&Sstring_match);
- defsubr (&Slooking_at);
- defsubr (&Sskip_chars_forward);
- defsubr (&Sskip_chars_backward);
- defsubr (&Ssearch_forward);
- defsubr (&Ssearch_backward);
- defsubr (&Sword_search_forward);
- defsubr (&Sword_search_backward);
- defsubr (&Sre_search_forward);
- defsubr (&Sre_search_backward);
- defsubr (&Sreplace_match);
- defsubr (&Smatch_beginning);
- defsubr (&Smatch_end);
- defsubr (&Smatch_data);
- defsubr (&Sstore_match_data);
- defsubr (&Sregexp_quote);
- }
-