home *** CD-ROM | disk | FTP | other *** search
- /* grep - print lines matching an extended regular expression
- Copyright (C) 1988 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
- /* Written June, 1988 by Mike Haertel
- BMG speedups added July, 1988 by James A. Woods and Arthur David Olson */
-
- #include <stdio.h>
- #include <io.h>
- #undef eof
-
- #if defined(USG) || defined(STDC_HEADERS)
- #include <string.h>
- #ifndef bcopy
- #define bcopy(s,d,n) memcpy((d),(s),(n))
- #endif
- #ifndef index
- #define index strchr
- #endif
- #else
- #include <strings.h>
- #endif
-
- #ifdef HAVE_UNISTD_H
- #include <unistd.h>
- #endif
-
- #ifndef STDC_HEADERS
- extern char *getenv();
- #endif
- extern int errno;
-
- extern char *sys_errlist[];
-
- #include "dfa.h"
- #include "regex.h"
- #include "getopt.h"
-
- /* Used by -w */
- #define WCHAR(C) (ISALNUM(C) || (C) == '_')
-
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
-
- /* Exit status codes. */
- #define MATCHES_FOUND 0 /* Exit 0 if no errors and matches found. */
- #define NO_MATCHES_FOUND 1 /* Exit 1 if no matches were found. */
- #define ERROR 2 /* Exit 2 if some error occurred. */
-
- /* Error is set true if something awful happened. */
- static int error;
-
- /* The program name for error messages. */
- static char *prog;
-
- /* We do all our own buffering by hand for efficiency. */
- static char *buffer; /* The buffer itself, grown as needed. */
- static bufbytes; /* Number of bytes in the buffer. */
- static size_t bufalloc; /* Number of bytes allocated to the buffer. */
- static bufprev; /* Number of bytes that have been forgotten.
- This is used to get byte offsets from the
- beginning of the file. */
- static bufread; /* Number of bytes to get with each read(). */
-
- static void
- initialize_buffer()
- {
- bufread = 8192;
- bufalloc = bufread + bufread / 2;
- buffer = malloc(bufalloc);
- if (! buffer)
- {
- fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- sys_errlist[errno]);
- exit(ERROR);
- }
- }
-
- /* The current input file. */
- static fd;
- static char *filename;
- static eof;
-
- /* Fill the buffer retaining the last n bytes at the beginning of the
- newly filled buffer (for backward context). Returns the number of new
- bytes read from disk. */
- static int
- fill_buffer_retaining(n)
- int n;
- {
- char *p, *q;
- int i;
-
- /* See if we need to grow the buffer. */
- if (bufalloc - n <= bufread)
- {
- while (bufalloc - n <= bufread)
- {
- bufalloc *= 2;
- bufread *= 2;
- }
- buffer = realloc(buffer, bufalloc);
- if (! buffer)
- {
- fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- sys_errlist[errno]);
- exit(ERROR);
- }
- }
-
- bufprev += bufbytes - n;
-
- /* Shift stuff down. */
- for (i = n, p = buffer, q = p + bufbytes - n; i--; )
- *p++ = *q++;
- bufbytes = n;
-
- if (eof)
- return 0;
-
- /* Read in new stuff. */
- i = read(fd, buffer + bufbytes, bufread);
- if (i < 0)
- {
- fprintf(stderr, "%s: read on %s failed (%s)\n", prog,
- filename ? filename : "<stdin>", sys_errlist[errno]);
- error = 1;
- }
-
- /* Kludge to pretend every nonempty file ends with a newline. */
- if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
- {
- eof = i = 1;
- buffer[bufbytes] = '\n';
- }
-
- bufbytes += i;
- return i;
- }
-
- /* Various flags set according to the argument switches. */
- static trailing_context; /* Lines of context to show after matches. */
- static leading_context; /* Lines of context to show before matches. */
- static byte_count; /* Precede output lines the byte count of the
- first character on the line. */
- static no_filenames; /* Do not display filenames. */
- static line_numbers; /* Precede output lines with line numbers. */
- static silent; /* Produce no output at all. This switch
- is bogus, ever hear of /dev/null? */
- static int whole_word; /* Match only whole words. Note that if
- backreferences are used this depends on
- the regex routines getting leftmost-longest
- right, which they don't right now if |
- is also used. */
- static int whole_line; /* Match only whole lines. Backreference
- caveat applies here too. */
- static nonmatching_lines; /* Print lines that don't match the regexp. */
-
- static bmgexec; /* Invoke Boyer-Moore-Gosper routines */
-
- /* The compiled regular expression lives here. */
- static struct regexp reg;
-
- /* The compiled regular expression for the backtracking matcher lives here. */
- static struct re_pattern_buffer regex;
-
- /* Pointer in the buffer after the last character printed. */
- static char *printed_limit;
-
- /* True when printed_limit has been artifically advanced without printing
- anything. */
- static int printed_limit_fake;
-
- /* Print a line at the given line number, returning the number of
- characters actually printed. Matching is true if the line is to
- be considered a "matching line". This is only meaningful if
- surrounding context is turned on. */
- static int
- print_line(p, number, matching)
- char *p;
- int number;
- int matching;
- {
- int count = 0;
-
- if (silent)
- {
- do
- ++count;
- while (*p++ != '\n');
- printed_limit_fake = 0;
- printed_limit = p;
- return count;
- }
-
- if (filename && !no_filenames)
- printf("%s%c", filename, matching ? ':' : '-');
- if (byte_count)
- printf("%d%c", p - buffer + bufprev, matching ? ':' : '-');
- if (line_numbers)
- printf("%d%c", number, matching ? ':' : '-');
- do
- {
- ++count;
- putchar(*p);
- }
- while (*p++ != '\n');
- printed_limit_fake = 0;
- printed_limit = p;
- return count;
- }
-
- /* Print matching or nonmatching lines from the current file. Returns a
- count of matching or nonmatching lines. */
- static int
- grep()
- {
- int retain = 0; /* Number of bytes to retain on next call
- to fill_buffer_retaining(). */
- char *search_limit; /* Pointer to the character after the last
- newline in the buffer. */
- char saved_char; /* Character after the last newline. */
- char *resume; /* Pointer to where to resume search. */
- int resume_index = 0; /* Count of characters to ignore after
- refilling the buffer. */
- int line_count = 1; /* Line number. */
- int try_backref; /* Set to true if we need to verify the
- match with a backtracking matcher. */
- int initial_line_count; /* Line count at beginning of last search. */
- char *match; /* Pointer to the first character after the
- string matching the regexp. */
- int match_count = 0; /* Count of matching lines. */
- char *matching_line; /* Pointer to first character of the matching
- line, or of the first line of context to
- print if context is turned on. */
- char *real_matching_line; /* Pointer to the first character of the
- real matching line. */
- char *next_line; /* Pointer to first character of the line
- following the matching line. */
- char *last_match_limit; /* Pointer after last matched line. */
- int pending_lines = 0; /* Lines of context left over from last match
- that we have to print. */
- static first_match = 1; /* True when nothing has been printed. */
- int i;
- char *tmp;
- char *execute();
-
- printed_limit_fake = 0;
-
- while (fill_buffer_retaining(retain) > 0)
- {
- /* Find the last newline in the buffer. */
- search_limit = buffer + bufbytes;
- while (search_limit > buffer && search_limit[-1] != '\n')
- --search_limit;
- if (search_limit == buffer)
- {
- retain = bufbytes;
- continue;
- }
-
- /* Save the character after the last newline so regexecute can write
- its own sentinel newline. */
- saved_char = *search_limit;
-
- /* Search the buffer for a match. */
- printed_limit = buffer;
- resume = buffer + resume_index;
- last_match_limit = resume;
- initial_line_count = line_count;
-
-
- /* In retrospect, I have to say that the following code sucks.
- For an example of how to do this right, see the fgrep
- driver program that I wrote around a year later. I'm
- too lazy to retrofit that to egrep right now (the
- pattern matchers have different needs). */
-
-
- while (match = execute(®, resume, search_limit, 0, &line_count, &try_backref))
- {
- /* Find the beginning of the matching line. */
- matching_line = match;
- while (matching_line > resume && matching_line[-1] != '\n')
- --matching_line;
- real_matching_line = matching_line;
-
- /* Find the beginning of the next line. */
- next_line = match;
- while (next_line < search_limit && *next_line++ != '\n')
- ;
-
- /* If a potential backreference is indicated, try it out with
- a backtracking matcher to make sure the line is a match.
- This is hairy because we need to handle whole_line and
- whole_word matches specially. The method was stolen from
- GNU fgrep. */
- if (try_backref)
- {
- struct re_registers regs;
- int beg, len, maxlen, ret;
-
- beg = 0;
- for (maxlen = next_line - matching_line - 1; beg <= maxlen; ++beg)
- {
- /* See if the matching line matches when backreferences
- are considered... */
- ret = re_search (®ex, matching_line, maxlen,
- beg, maxlen - beg, ®s);
- if (ret == -1)
- goto fail;
- beg = ret;
- len = regs.end[0] - beg;
- /* Ok, now check if it subsumed the whole line if -x */
- if (whole_line && (beg != 0 || len != maxlen))
- goto fail;
- /* If -w then check if the match aligns with word
- boundaries. We have to do this iteratively, because
- (a) The line may contain more than one occurence
- of the pattern, and;
- (b) Several alternatives in the pattern might
- be valid at a given point, and we may need to
- consider a shorter one in order to align with
- word boundaries. */
- else if (whole_word)
- while (len > 0)
- {
- /* If it's preceeded by a word constituent, then no go. */
- if (beg > 0
- && WCHAR((unsigned char) matching_line[beg - 1]))
- break;
- /* If it's followed by a word constituent, look for
- a shorter match. */
- else if (beg + len < maxlen
- && WCHAR((unsigned char) matching_line[beg + len]))
- /* This is sheer incest. */
- len = re_match_2 (®ex, (unsigned char *) 0, 0,
- matching_line, maxlen,
- beg, ®s, beg + len - 1);
- else
- goto succeed;
- }
- else
- goto succeed;
- }
- fail:
- resume = next_line;
- if (resume == search_limit)
- break;
- else
- continue;
- }
-
- succeed:
- /* Print out the matching or nonmatching lines as necessary. */
- if (! nonmatching_lines)
- {
- /* Not -v, so nothing hairy... */
- ++match_count;
-
- /* Print leftover trailing context from last time around. */
- while (pending_lines && last_match_limit < matching_line)
- {
- last_match_limit += print_line(last_match_limit,
- initial_line_count++,
- 0);
- --pending_lines;
- }
-
- /* Back up over leading context if necessary. */
- for (i = leading_context;
- i > 0 && matching_line > printed_limit;
- --i)
- {
- while (matching_line > printed_limit
- && (--matching_line)[-1] != '\n')
- ;
- --line_count;
- }
-
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || matching_line > printed_limit))
- printf("----------\n");
- first_match = 0;
-
- /* Print the matching line and its leading context. */
- while (matching_line < real_matching_line)
- matching_line += print_line(matching_line, line_count++, 0);
- matching_line += print_line(matching_line, line_count++, 1);
-
- /* If there's trailing context, leave some lines pending until
- next time. */
- pending_lines = trailing_context;
- }
- else if (matching_line == last_match_limit)
- {
- /* In the -v case, this is where we deal with leftover
- trailing context from last time... */
- if (pending_lines > 0)
- {
- --pending_lines;
- print_line(matching_line, line_count, 0);
- }
- ++line_count;
- }
- else if (matching_line > last_match_limit)
- {
- char *start = last_match_limit;
-
- /* Back up over leading context if necessary. */
- for (i = leading_context; start > printed_limit && i; --i)
- {
- while (start > printed_limit && (--start)[-1] != '\n')
- ;
- --initial_line_count;
- }
-
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || start > printed_limit))
- printf("----------\n");
- first_match = 0;
-
- /* Print out the presumably matching leading context. */
- while (start < last_match_limit)
- start += print_line(start, initial_line_count++, 0);
-
- /* Print out the nonmatching lines prior to the matching line. */
- while (start < matching_line)
- {
- /* This counts as a "matching line" in -v. */
- ++match_count;
- start += print_line(start, initial_line_count++, 1);
- }
-
- /* Deal with trailing context. In -v what this means is
- we print the current (matching) line, marked as a non
- matching line. */
- if (trailing_context)
- {
- print_line(matching_line, line_count, 0);
- pending_lines = trailing_context - 1;
- }
-
- /* Count the current line. */
- ++line_count;
- }
- else
- /* Let us pray this never happens... */
- abort();
-
- /* Resume searching at the beginning of the next line. */
- initial_line_count = line_count;
- resume = next_line;
- last_match_limit = next_line;
-
- if (resume == search_limit)
- break;
- }
-
- /* Restore the saved character. */
- *search_limit = saved_char;
-
- if (! nonmatching_lines)
- {
- while (last_match_limit < search_limit && pending_lines)
- {
- last_match_limit += print_line(last_match_limit,
- initial_line_count++,
- 0);
- --pending_lines;
- }
- }
- else if (search_limit > last_match_limit)
- {
- char *start = last_match_limit;
-
- /* Back up over leading context if necessary. */
- for (i = leading_context; start > printed_limit && i; --i)
- {
- while (start > printed_limit && (--start)[-1] != '\n')
- ;
- --initial_line_count;
- }
-
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || start > printed_limit))
- printf("----------\n");
- first_match = 0;
-
- /* Print out all the nonmatching lines up to the search limit. */
- while (start < last_match_limit)
- start += print_line(start, initial_line_count++, 0);
- while (start < search_limit)
- {
- ++match_count;
- start += print_line(start, initial_line_count++, 1);
- }
-
- pending_lines = trailing_context;
- resume_index = 0;
- retain = bufbytes - (search_limit - buffer);
- continue;
- }
-
- /* Save the trailing end of the buffer for possible use as leading
- context in the future. */
- i = leading_context;
- tmp = search_limit;
- while (tmp > printed_limit && i--)
- while (tmp > printed_limit && (--tmp)[-1] != '\n')
- ;
- resume_index = search_limit - tmp;
- retain = bufbytes - (tmp - buffer);
- if (tmp > printed_limit)
- printed_limit_fake = 1;
- }
-
- return match_count;
- }
-
- void
- usage_and_die()
- {
- fprintf(stderr, "\
- Usage: %s [-CVbchilnsvwx] [-num] [-A num] [-B num] [-f file]\n\
- [-e] expr [file...]\n", prog);
- exit(ERROR);
- }
-
- static char version[] = "GNU e?grep, version 1.6";
-
- int
- main(argc, argv)
- int argc;
- char **argv;
- {
- int c;
- int ignore_case = 0; /* Compile the regexp to ignore case. */
- char *the_regexp = 0; /* The regular expression. */
- int regexp_len; /* Length of the regular expression. */
- char *regexp_file = 0; /* File containing parallel regexps. */
- int count_lines = 0; /* Display only a count of matching lines. */
- int list_files = 0; /* Display only the names of matching files. */
- int line_count = 0; /* Count of matching lines for a file. */
- int matches_found = 0; /* True if matches were found. */
- char *regex_errmesg; /* Error message from regex routines. */
- char translate[_NOTCHAR]; /* Translate table for case conversion
- (needed by the backtracking matcher). */
-
- if (prog = index(argv[0], '/'))
- ++prog;
- else
- prog = argv[0];
-
- opterr = 0;
- while ((c = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwx")) != EOF)
- switch (c)
- {
- case '?':
- usage_and_die();
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- trailing_context = 10 * trailing_context + c - '0';
- leading_context = 10 * leading_context + c - '0';
- break;
-
- case 'A':
- if (! sscanf(optarg, "%d", &trailing_context)
- || trailing_context < 0)
- usage_and_die();
- break;
-
- case 'B':
- if (! sscanf(optarg, "%d", &leading_context)
- || leading_context < 0)
- usage_and_die();
- break;
-
- case 'C':
- trailing_context = leading_context = 2;
- break;
-
- case 'V':
- fprintf(stderr, "%s\n", version);
- break;
-
- case 'b':
- byte_count = 1;
- break;
-
- case 'c':
- count_lines = 1;
- silent = 1;
- break;
-
- case 'e':
- /* It doesn't make sense to mix -f and -e. */
- if (regexp_file)
- usage_and_die();
- the_regexp = optarg;
- break;
-
- case 'f':
- /* It doesn't make sense to mix -f and -e. */
- if (the_regexp)
- usage_and_die();
- regexp_file = optarg;
- break;
-
- case 'h':
- no_filenames = 1;
- break;
-
- case 'i':
- ignore_case = 1;
- for (c = 0; c < _NOTCHAR; ++c)
- if (isupper(c))
- translate[c] = tolower(c);
- else
- translate[c] = c;
- regex.translate = translate;
- break;
-
- case 'l':
- list_files = 1;
- silent = 1;
- break;
-
- case 'n':
- line_numbers = 1;
- break;
-
- case 's':
- silent = 1;
- break;
-
- case 'v':
- nonmatching_lines = 1;
- break;
-
- case 'w':
- whole_word = 1;
- break;
-
- case 'x':
- whole_line = 1;
- break;
-
- default:
- /* This can't happen. */
- fprintf(stderr, "%s: getopt(3) let one by!\n", prog);
- usage_and_die();
- break;
- }
-
- /* Set the syntax depending on whether we are EGREP or not. */
- #ifdef EGREP
- regsyntax(RE_SYNTAX_EGREP, ignore_case);
- re_set_syntax(RE_SYNTAX_EGREP);
- #else
- regsyntax(RE_SYNTAX_GREP, ignore_case);
- re_set_syntax(RE_SYNTAX_GREP);
- #endif
-
- /* Compile the regexp according to all the options. */
- if (regexp_file)
- {
- FILE *fp = fopen(regexp_file, "r");
- int len = 256;
- int i = 0;
-
- if (! fp)
- {
- fprintf(stderr, "%s: %s: %s\n", prog, regexp_file,
- sys_errlist[errno]);
- exit(ERROR);
- }
-
- the_regexp = malloc(len);
- while ((c = getc(fp)) != EOF)
- {
- the_regexp[i++] = c;
- if (i == len)
- the_regexp = realloc(the_regexp, len *= 2);
- }
- fclose(fp);
- /* Nuke the concluding newline so we won't match the empty string. */
- if (i > 0 && the_regexp[i - 1] == '\n')
- --i;
- regexp_len = i;
- }
- else if (! the_regexp)
- {
- if (optind >= argc)
- usage_and_die();
- the_regexp = argv[optind++];
- regexp_len = strlen(the_regexp);
- }
- else
- regexp_len = strlen(the_regexp);
-
- if (whole_word || whole_line)
- {
- /* In the whole-word case, we use the pattern:
- (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
- In the whole-line case, we use the pattern:
- ^(userpattern)$.
- BUG: Using [A-Za-z_] is locale-dependent! */
-
- char *n = malloc(regexp_len + 50);
- int i = 0;
-
- #ifdef EGREP
- if (whole_word)
- strcpy(n, "(^|[^A-Za-z_])(");
- else
- strcpy(n, "^(");
- #else
- /* Todo: Make *sure* this is the right syntax. Down with grep! */
- if (whole_word)
- strcpy(n, "\\(^\\|[^A-Za-z_]\\)\\(");
- else
- strcpy(n, "^\\(");
- #endif
- i = strlen(n);
- bcopy(the_regexp, n + i, regexp_len);
- i += regexp_len;
- #ifdef EGREP
- if (whole_word)
- strcpy(n + i, ")([^A-Za-z_]|$)");
- else
- strcpy(n + i, ")$");
- #else
- if (whole_word)
- strcpy(n + i, "\\)\\([^A-Za-z_]\\|$\\)");
- else
- strcpy(n + i, "\\)$");
- #endif
- i += strlen(n + i);
- regcompile(n, i, ®, 1);
- }
- else
- regcompile(the_regexp, regexp_len, ®, 1);
-
-
- if (regex_errmesg = re_compile_pattern(the_regexp, regexp_len, ®ex))
- regerror(regex_errmesg);
-
- /*
- Find the longest metacharacter-free string which must occur in the
- regexpr, before short-circuiting regexecute() with Boyer-Moore-Gosper.
- (Conjecture: The problem in general is NP-complete.) If there is no
- such string (like for many alternations), then default to full automaton
- search. regmust() code and heuristics [see dfa.c] courtesy
- Arthur David Olson.
- */
- if (line_numbers == 0 && nonmatching_lines == 0)
- {
- if (reg.mustn == 0 || reg.mustn == MUST_MAX ||
- index(reg.must, '\0') != reg.must + reg.mustn)
- bmgexec = 0;
- else
- {
- reg.must[reg.mustn] = '\0';
- if (getenv("MUSTDEBUG") != NULL)
- (void) printf("must have: \"%s\"\n", reg.must);
- bmg_setup(reg.must, ignore_case);
- bmgexec = 1;
- }
- }
-
- if (argc - optind < 2)
- no_filenames = 1;
-
- initialize_buffer();
-
- if (argc > optind)
- while (optind < argc)
- {
- bufprev = eof = 0;
- filename = argv[optind++];
- fd = open(filename, 0, 0);
- if (fd < 0)
- {
- fprintf(stderr, "%s: %s: %s\n", prog, filename,
- sys_errlist[errno]);
- error = 1;
- continue;
- }
- if (line_count = grep())
- matches_found = 1;
- close(fd);
- if (count_lines)
- if (!no_filenames)
- printf("%s:%d\n", filename, line_count);
- else
- printf("%d\n", line_count);
- else if (list_files && line_count)
- printf("%s\n", filename);
- }
- else
- {
- if (line_count = grep())
- matches_found = 1;
- if (count_lines)
- printf("%d\n", line_count);
- else if (list_files && line_count)
- printf("<stdin>\n");
- }
-
- if (error)
- exit(ERROR);
- if (matches_found)
- exit(MATCHES_FOUND);
- exit(NO_MATCHES_FOUND);
- return NO_MATCHES_FOUND;
- }
-
- /* Needed by the regexp routines. This could be fancier, especially when
- dealing with parallel regexps in files. */
- void
- regerror(s)
- const char *s;
- {
- fprintf(stderr, "%s: %s\n", prog, s);
- exit(ERROR);
- }
-
- /*
- bmg_setup() and bmg_search() adapted from:
- Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
- original paper (CACM, October, 1977). No delta1 or delta2. According to
- experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
- practical value. However, to improve for worst case input, integrating
- the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
- February 1986) deserves consideration.
-
- James A. Woods Copyleft (C) 1986, 1988
- NASA Ames Research Center
- */
-
- char *
- execute(r, begin, end, newline, count, try_backref)
- struct regexp *r;
- char *begin;
- char *end;
- int newline;
- int *count;
- int *try_backref;
- {
- register char *p, *s;
- char *match;
- char *start = begin;
- char save; /* regexecute() sentinel */
- int len;
- char *bmg_search();
-
- if (!bmgexec) /* full automaton search */
- return(regexecute(r, begin, end, newline, count, try_backref));
- else
- {
- len = end - begin;
- while ((match = bmg_search((unsigned char *) start, len)) != NULL)
- {
- p = match; /* narrow search range to submatch line */
- while (p > begin && *p != '\n')
- p--;
- s = match;
- while (s < end && *s != '\n')
- s++;
- s++;
-
- save = *s;
- *s = '\0';
- match = regexecute(r, p, s, newline, count, try_backref);
- *s = save;
-
- if (match != NULL)
- return((char *) match);
- else
- {
- start = s;
- len = end - start;
- }
- }
- return(NULL);
- }
- }
-
- int delta0[256];
- unsigned char cmap[256]; /* (un)folded characters */
- unsigned char pattern[5000];
- int patlen;
-
- char *
- bmg_search(buffer, buflen)
- unsigned char *buffer;
- int buflen;
- {
- register unsigned char *k, *strend, *s, *buflim;
- register int t;
- int j;
-
- if (patlen > buflen)
- return NULL;
-
- buflim = buffer + buflen;
- if (buflen > patlen * 4)
- strend = buflim - patlen * 4;
- else
- strend = buffer;
-
- s = buffer;
- k = buffer + patlen - 1;
-
- for (;;)
- {
- /* The dreaded inner loop, revisited. */
- while (k < strend && (t = delta0[*k]))
- {
- k += t;
- k += delta0[*k];
- k += delta0[*k];
- }
- while (k < buflim && delta0[*k])
- ++k;
- if (k == buflim)
- break;
-
- j = patlen - 1;
- s = k;
- while (--j >= 0 && cmap[*--s] == pattern[j])
- ;
- /*
- delta-less shortcut for literati, but
- short shrift for genetic engineers.
- */
- if (j >= 0)
- k++;
- else /* submatch */
- return ((char *)k);
- }
- return(NULL);
- }
-
- int
- bmg_setup(pat, folded) /* compute "boyer-moore" delta table */
- char *pat;
- int folded;
- { /* ... HAKMEM lives ... */
- int j;
-
- patlen = strlen(pat);
-
- if (folded) /* fold case while saving pattern */
- for (j = 0; j < patlen; j++)
- pattern[j] = (isupper((int) pat[j]) ?
- (char) tolower((int) pat[j]) : pat[j]);
- else
- bcopy(pat, pattern, patlen);
-
- for (j = 0; j < 256; j++)
- {
- delta0[j] = patlen;
- cmap[j] = (char) j; /* could be done at compile time */
- }
- for (j = 0; j < patlen - 1; j++)
- delta0[pattern[j]] = patlen - j - 1;
- delta0[pattern[patlen - 1]] = 0;
-
- if (folded)
- {
- for (j = 0; j < patlen - 1; j++)
- if (islower((int) pattern[j]))
- delta0[toupper((int) pattern[j])] = patlen - j - 1;
- if (islower((int) pattern[patlen - 1]))
- delta0[toupper((int) pattern[patlen - 1])] = 0;
- for (j = 'A'; j <= 'Z'; j++)
- cmap[j] = (char) tolower((int) j);
- }
- }
-