home *** CD-ROM | disk | FTP | other *** search
- /* Permuted index, with keywords in their context.
- Copyright (C) 1990 Free Software Foundation, Inc.
- Francois Pinard <pinard@iro.umontreal.ca>, 1988.
-
- $Id: gptx.c,v 1.1 90/07/08 17:19:01 pinard Exp $
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
- /* Global defines. */
-
- /* Reallocation step when swallowing non regular files. The value is not
- the actual reallocation step, but its base two logarithm. */
- #define SWALLOW_REALLOC_LOG 12
-
- /* Define to be the same as in "regex.c". */
- #define Sword 1
-
- /* The following specifies which getopt strings are allowed in various
- modes. So called `normal' mode options are ptx compatibility mode
- options plus local extensions. When the mode is still unknown, there is
- a supplementary -p option to request ptx compatiliblity mode. */
-
- #define UNKNOWN_MODE_OPTIONS "b:i:fg:o:ptrw:ACF:ORS:TW:"
- #define PTX_MODE_OPTIONS "b:i:fg:o:trw:C"
- #define NORMAL_MODE_OPTIONS "b:i:fg:o:trw:ACF:ORS:TW:"
-
-
- /* Include files. */
-
- #include <stdio.h>
- #include <fcntl.h>
- #include <sys/types.h>
- #include <sys/stat.h>
-
- #ifdef USG
- #include <string.h>
- #else /* not USG */
- #include <strings.h>
- #define strchr index
- #define strrchr rindex
- #endif /* not USG */
-
- #include "bumpalloc.h"
- #include "ctype.h"
- #include "gptx.h"
- #include "regex.h"
-
-
- /* Global definitions. */
-
-
- /* Some types. */
-
- typedef const char STRING; /* to avoid writing `const' too often */
-
- enum Mode
- {
- UNKNOWN_MODE, /* operating mode not decided yet */
- PTX_MODE, /* standard ptx compatibility mode */
- NORMAL_MODE, /* normal GNU ptx operating mode */
- };
-
- enum Format
- {
- UNKNOWN_FORMAT, /* output format still unknown */
- DUMB_FORMAT, /* output for a dumb terminal */
- ROFF_FORMAT, /* output for `troff' or `nroff' */
- TEX_FORMAT, /* output for `TeX' or `LaTeX' */
- };
-
- typedef short DELTA; /* to hold displacement within one context */
-
-
- /* Program name. */
-
- char *program_name; /* name of this program */
-
-
- /* Program options. */
-
- enum Mode operating_mode; /* operating mode */
- STRING *allowed_options; /* getopt option list currently allowed */
-
- int auto_reference = 0; /* references are `file_name(line_number)' */
- int input_reference = 0; /* references at beginning of input lines */
- int right_reference = 0; /* output references after right context */
- int line_width = 72; /* output line width in characters */
- int gap_size = 3; /* number of spaces between output fields */
- STRING *truncation_string = "/"; /* string used to mark line truncations */
- enum Format output_format = UNKNOWN_FORMAT; /* output format */
-
- int fold_lower_to_upper = 0; /* fold upper and lower case for sorting */
- STRING *context_regex_string = NULL; /* raw regex for end of context */
- STRING *word_regex_string = NULL; /* raw regex for a keyword */
- STRING *break_file = NULL; /* name of the `Break characters' file */
- STRING *only_file = NULL; /* name of the `Only words' file */
- STRING *ignore_file = NULL; /* name of the `Ignore words' file */
-
-
- /* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
- whole file. A WORD is something smaller, its length should fit in a
- short integer. A WORD_TABLE may contain several WORDs. */
-
- typedef struct
- {
- char HUGE *start; /* pointer to beginning of region */
- char HUGE *end; /* pointer to end + 1 of region */
- }
- BLOCK;
-
- typedef struct
- {
- char *start; /* pointer to beginning of region */
- short size; /* length of the region */
- }
- WORD;
-
- typedef struct
- {
- WORD *start; /* array of WORDs */
- int length; /* number of entries */
- }
- WORD_TABLE;
-
-
- /* Pattern description tables. */
-
- /* For each character, provide its folded equivalent. */
- unsigned char folded_chars[1 << BYTEWIDTH];
-
- /* For each charcter, indicate if it is part of a word. */
- char syntax_table[1 << BYTEWIDTH];
-
- /* Compiled regex for end of context. */
- struct re_pattern_buffer *context_regex;
-
- /* End of context pattern register indices. */
- struct re_registers context_regs;
-
- /* Compiled regex for a keyword. */
- struct re_pattern_buffer *word_regex;
-
- /* Keyword pattern register indices. */
- struct re_registers word_regs;
-
- /* A word characters fastmap is used only when no word regexp has been
- provided. A word is then made up of a sequence of one or more characters
- allowed by the fastmap. Contains !0 if character allowed in word. */
- char word_fastmap[1 << BYTEWIDTH];
-
- /* Maximum length of any word read. */
- int maximum_word_length;
-
- /* Maximum width of any reference used. */
- int reference_max_width;
-
-
- /* Ignore and Only word tables. */
-
- WORD_TABLE ignore_table; /* table of words to ignore */
- WORD_TABLE only_table; /* table of words to select */
-
- #define ALLOC_NEW_WORD(table) \
- BUMP_ALLOC ((table)->start, (table)->length, 8, WORD)
-
-
- /* Source text table, and scanning macros. */
-
- int number_input_files; /* number of text input files */
- int total_line_count; /* total number of lines seen so far */
- STRING **input_file_name; /* array of text input file names */
- int *file_line_count; /* array of `total_line_count' values at end */
-
- BLOCK text_buffer; /* file to study */
- char HUGE *text_buffer_maxend; /* allocated end of text_buffer */
-
- /* SKIP_NON_WHITE used only for getting or skipping the reference. */
-
- #define SKIP_NON_WHITE(cursor, limit) \
- while (cursor < limit && !isspace(*cursor)) \
- cursor++
-
- #define SKIP_WHITE(cursor, limit) \
- while (cursor < limit && isspace(*cursor)) \
- cursor++
-
- #define SKIP_WHITE_BACKWARDS(cursor, start) \
- while (cursor > start && isspace(cursor[-1])) \
- cursor--
-
- #define SKIP_SOMETHING(cursor, limit) \
- do \
- if (word_regex_string) \
- { \
- int count; \
- count = re_match (word_regex, cursor, \
- L_PDIFF(limit, cursor), ZERO, NULL); \
- cursor += count <= 0 ? 1 : count; \
- } \
- else if (word_fastmap[(unsigned char) *cursor]) \
- while (cursor < limit && word_fastmap[(unsigned char) *cursor]) \
- cursor++; \
- else \
- cursor++; \
- while (0)
-
-
- /* Occurrences table.
-
- The `keyword' pointer provides the central word, which is surrounded by a
- left context and a right context. The `keyword' and `length' field allow
- full 8-bit characters keys, even including NULs. At other places in this
- program, the name `keyafter' refers to the keyword followed by its right
- context.
-
- The left context does not extend, towards the beginning of the file,
- further than a distance given by the `left' value. This value is
- relative to the keyword beginning, it is usually negative. This insures
- that, except for white space, we will never have to backward scan the
- source text, when it is time to generate the final output lines.
-
- The right context, indirectly attainable through the keyword end, does
- not extend, towards the end of the file, further than a distance given by
- the `right' value. This value is relative to the keyword beginnin, it is
- usually positive.
-
- When automatic references are used, the `reference' value is the overall
- line number in all input files read so far, in this case, it is of type
- (int). When input references are used, the `reference' value indicates
- the distance between the keyword beginning and the start of the reference
- field, it is of type (DELTA) and usually negative. Also, to save space,
- the `reference' field is used only if automatic references are used or if
- the source text have references, otherwize it is not even allocated. The
- variable sizeof_occurs contains the actual size of each OCCURS for this
- run, taking care of the variable size of the `reference' value.
-
- PLEASE NOTE that, for this reason, the reference field should be kept
- last in the structure. */
-
- typedef struct
- {
- WORD key; /* description of the keyword */
- DELTA left; /* distance to left context start */
- DELTA right; /* distance to right context end */
- int reference; /* reference descriptor */
- }
- OCCURS;
-
- /* The various OCCURS tables are indexed by the language. But the time
- being, there is no such multiple language support. */
-
- OCCURS *occurs_table[1]; /* all words retained from the read text */
- int number_of_occurs[1]; /* number of used slots in occurs_table */
- int sizeof_occurs; /* size of each allocated OCCURS */
-
- #define ALLOC_NEW_OCCURS(language) \
- BUMP_ALLOC_VARSIZE \
- (occurs_table[language], number_of_occurs[language], \
- 9, OCCURS, sizeof_occurs)
-
-
- /* Communication among output routines. */
-
- /* Indicate if special output processing is requested for each character. */
- char edited_flag[1 << BYTEWIDTH];
-
- int half_line_width; /* half of line width, reference excluded */
- int before_max_width; /* maximum width of before field */
- int keyafter_max_width; /* maximum width of keyword-and-after field */
- int truncation_string_length; /* length of string used to flag truncation */
-
- /* When context is limited by lines, wraparound may happen on final output:
- the `head' pointer gives access to some supplementary left context which
- will be seen at the end of the output line, the `tail' pointer gives
- access to some supplementary right context which will be seen at the
- beginning of the output line. */
-
- BLOCK tail; /* tail field */
- int tail_truncation; /* flag truncation after the tail field */
-
- BLOCK before; /* before field */
- int before_truncation; /* flag truncation before the before field */
-
- BLOCK keyafter; /* keyword-and-after field */
- int keyafter_truncation; /* flag truncation after the keyafter field */
-
- BLOCK head; /* head field */
- int head_truncation; /* flag truncation before the head field */
-
- BLOCK reference; /* reference field for input reference mode */
-
-
- /* Miscellaneous routines. */
-
-
- /* Diagnose an input/output error for FILE_NAME, then exit with non-zero
- status. The perror message will be prefixed by the program name. */
-
- #ifdef __STDC__
- void volatile perror_and_exit (STRING *file_name)
- #else
- void
- volatile perror_and_exit (file_name)
- STRING *file_name;
- #endif
- {
- fprintf (stderr, "%s: ", program_name);
- perror (file_name);
- exit (1);
- }
-
-
- /* Compile the regex represented by STRING, diagnose and abort if any error.
- Returns the compiled regex structure. */
-
- #ifdef __STDC__
- struct re_pattern_buffer *alloc_and_compile_regex (STRING *string)
- #else
- struct re_pattern_buffer *
- alloc_and_compile_regex (string)
- STRING *string;
- #endif
- {
- struct re_pattern_buffer *pattern; /* newly allocated structure */
- char *message; /* error message returned by regex.c */
-
- pattern = (struct re_pattern_buffer *)
- xmalloc (sizeof (struct re_pattern_buffer));
-
- pattern->buffer = NULL;
- pattern->allocated = 0;
- pattern->translate = fold_lower_to_upper ? (char *) folded_chars : NULL;
- pattern->fastmap = (char *) xmalloc (1 << BYTEWIDTH);
-
- /* Note: regex.h and regex.c do not declare const parameters, so the
- following call generate a spurious: `warning: argument passing of
- non-const * pointer from const *'. So, for the time being, I simply
- cast it (char *) and avoid using -Wcast-qual. */
-
- message = re_compile_pattern ((char *) string, strlen (string), pattern);
- if (message)
- {
- fprintf (stderr, "* error in `%s'\n", string);
- fprintf (stderr, "* %s\n", message);
- exit (1);
- }
-
- /* Note that the fastmap should be explicitely recompiled for `re_match',
- but `re_search' is always called sooner, which automatically compiles
- the fastmap if this has not been done yet. So there is no real danger.
-
- re_compile_fastmap (pattern); */
-
- /* Do not waste extra allocated space. */
-
- if (pattern->allocated > pattern->used)
- {
- #ifdef MSDOS
- pattern->buffer
- = (char *) xrealloc (pattern->buffer, (size_t) pattern->used);
- #else /* not MSDOS */
- pattern->buffer = (char *) xrealloc (pattern->buffer, pattern->used);
- #endif /* not MSDOS */
- pattern->allocated = pattern->used;
- }
-
- return pattern;
- }
-
-
- /* This will initialize various tables for pattern match and compiles some
- regexps. */
-
- #ifdef __STDC__
- void initialize_regex (void)
- #else
- void
- initialize_regex ()
- #endif
- {
- int character; /* character value */
-
- /* Initialize the regex syntax table. */
-
- for (character = 0; character < (1 << BYTEWIDTH); character++)
- syntax_table[character] = (isalpha (character) ? Sword : 0);
-
- re_syntax_table = syntax_table;
-
- /* Initialize the case folding table. */
-
- if (fold_lower_to_upper)
- {
- for (character = 0; character < (1 << BYTEWIDTH); character++)
- folded_chars[character]
- = (syntax_table[character] == Sword && (character & 040))
- ? (character & ~040) : character;
- }
-
- /* Unless the user already provided a description of the end of line or
- end of sentence sequence, select an end of line sequence to compile.
- If the user provided an empty definition, thus disabling end of line or
- sentence feature, make it NULL to speed up tests. In ptx compatibility
- mode, use end of lines. In normal mode, use end of sentence, like GNU
- emacs'. */
-
- if (context_regex_string)
- {
- if (!*context_regex_string)
- context_regex_string = NULL;
- }
- else
- context_regex_string = ((operating_mode == PTX_MODE || input_reference)
- ? "\n"
- : "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*");
-
- if (context_regex_string)
- context_regex = alloc_and_compile_regex (context_regex_string);
-
- /* If the user has already provided a non-empty regexp to describe words,
- compile it. Else, unless this has already been done through a user
- provided Break character file, construct a fastmap of characters that
- may appear in a word. In normal mode, include only letters of the
- underlying character set. In ptx compatibility mode, include almost
- everything, even punctuations; stop only on white space. */
-
- if (word_regex_string && *word_regex_string)
- word_regex = alloc_and_compile_regex (word_regex_string);
- else if (!break_file)
- if (operating_mode == PTX_MODE)
- {
-
- /* Simulate [^ \t\n]+. */
-
- memset (word_fastmap, 1, 1 << BYTEWIDTH);
- word_fastmap[' '] = 0;
- word_fastmap['\t'] = 0;
- word_fastmap['\n'] = 0;
- }
- else
-
- /* Simulate \w+. */
-
- for (character = 0; character < (1 << BYTEWIDTH); character++)
- word_fastmap[character] = syntax_table[character] == Sword;
- }
-
-
- /* This routine will attempt to swallow a whole file name FILE_NAME into a
- contiguous region of memory and return a description of it into BLOCK.
- Standard input is assumed whenever FILE_NAME is NULL or simply "-". */
-
- #ifdef __STDC__
- void swallow_file_in_memory (STRING *file_name, BLOCK *block)
- #else
- void
- swallow_file_in_memory (file_name, block)
- STRING *file_name;
- BLOCK *block;
- #endif
- {
- int file_handle; /* file descriptor number */
- struct stat stat_block; /* stat block for file */
- LONG allocated_length; /* allocated length of memory buffer */
- LONG used_length; /* used length in memory buffer */
- LONG read_length; /* number of character gotten on last read */
-
- /* As special cases, a file name which is NULL or "-" indicates standard
- input, which is already opened. In all other cases, open the file from
- its name. */
-
- if (!file_name || strcmp (file_name, "-") == 0)
- file_handle = fileno (stdin);
- else
- if ((file_handle = open (file_name, O_RDONLY)) < 0)
- perror_and_exit (file_name);
-
- /* If the file is a plain, regular file, allocate the memory buffer all at
- once and swallow the file in one blow. In other cases, read the file
- repeatedly in smaller chunks until we have it all, reallocating memory
- once in a while, as we go. */
-
- if (fstat (file_handle, &stat_block) < 0)
- perror_and_exit (file_name);
-
- if ((stat_block.st_mode & S_IFMT) == S_IFREG)
- {
- #ifdef MSDOS
- block->start = (char HUGE *) xhalloc (stat_block.st_size);
-
- read_length = hread (file_handle, block->start, stat_block.st_size);
- if (read_length <= 0L)
- perror_and_exit (file_name);
-
- block->end = block->start + read_length;
- #else /* not MSDOS */
- block->start = (char *) xmalloc ((int) stat_block.st_size);
-
- if (read (file_handle, block->start, (int) stat_block.st_size)
- != stat_block.st_size)
- perror_and_exit (file_name);
-
- block->end = block->start + stat_block.st_size;
- #endif /* not MSDOS */
- }
- else
- {
- #ifdef MSDOS
- block->start = (char HUGE *) xhalloc (1L << SWALLOW_REALLOC_LOG);
- used_length = 0;
- allocated_length = (1 << SWALLOW_REALLOC_LOG);
-
- while ((read_length = hread (file_handle,
- block->start + used_length,
- allocated_length - used_length)) > 0L)
- #else /* not MSDOS */
- block->start = (char *) xmalloc (1 << SWALLOW_REALLOC_LOG);
- used_length = 0;
- allocated_length = (1 << SWALLOW_REALLOC_LOG);
-
- while ((read_length = read (file_handle,
- block->start + used_length,
- allocated_length - used_length)) > 0)
- #endif /* not MSDOS */
- {
- used_length += read_length;
- if (used_length == allocated_length)
- {
- #ifdef MSDOS
- block->start
- = (char HUGE *) xhrealloc (block->start, allocated_length,
- allocated_length
- + (1L << SWALLOW_REALLOC_LOG));
- allocated_length += (1L << SWALLOW_REALLOC_LOG);
- #else /* not MSDOS */
- allocated_length += (1 << SWALLOW_REALLOC_LOG);
- block->start
- = (char *) xrealloc (block->start, allocated_length);
- #endif /* not MSDOS */
- }
- }
-
- if (read_length < 0)
- perror_and_exit (file_name);
-
- block->end = block->start + used_length;
- }
-
- /* Close the file, but only if it was not the standard input. */
-
- if (file_handle != fileno (stdin))
- close (file_handle);
- }
-
- /* Sort and search routines. */
-
-
- /* Compare two words, FIRST and SECOND, and return 0 if they are identical.
- Return less than 0 if the first word goes before the second; return
- greater than 0 if the first word goes after the second.
-
- If a word is indeed a prefix of the other, the shorter should go first.
- */
-
- #ifdef __STDC__
- int compare_words (WORD *first, WORD *second)
- #else
- int
- compare_words (first, second)
- WORD *first;
- WORD *second;
- #endif
- {
- int length; /* minimum of two lengths */
- int counter; /* cursor in words */
- int value; /* value of comparison */
-
- length = first->size < second->size ? first->size : second->size;
-
- if (fold_lower_to_upper)
- {
- for (counter = 0; counter < length; counter++)
- if ((value = (folded_chars [(unsigned char) first->start[counter]]
- - folded_chars [(unsigned char) second->start[counter]]))
- != 0)
- return value;
- }
- else
- {
- for (counter = 0; counter < length; counter++)
- if ((value = ((unsigned char) first->start[counter]
- - (unsigned char) second->start[counter]))
- != 0)
- return value;
- }
-
- return first->size - second->size;
- }
-
-
- /* Decides which of two OCCURS, FIRST or SECOND, should lexicographically go
- first. In case of a tie, preserve the original order through a pointer
- comparison. */
-
- #ifdef __STDC__
- int compare_occurs (OCCURS *first, OCCURS *second)
- #else
- int
- compare_occurs (first, second)
- OCCURS *first;
- OCCURS *second;
- #endif
- {
- int value;
-
- value = compare_words (&first->key, &second->key);
-
- #ifdef MSDOS
- return
- value == 0
- ? (L_PDIFF (first->key.start, second->key.start) >= 0L ? 1 : -1)
- : value;
- #else /* not MSDOS */
- return value == 0 ? first->key.start - second->key.start : value;
- #endif /* not MSDOS */
- }
-
-
- /* Return !0 if WORD appears in TABLE. Uses a binary search. */
-
- #ifdef __STDC__
- int search_table (WORD *word, WORD_TABLE *table)
- #else
- int
- search_table (word, table)
- WORD *word;
- WORD_TABLE *table;
- #endif
- {
- int lowest; /* current lowest possible index */
- int highest; /* current highest possible index */
- int middle; /* current middle index */
- int value; /* value from last comparison */
-
- lowest = 0;
- highest = table->length - 1;
- while (lowest <= highest)
- {
- middle = (lowest + highest) / 2;
- value = compare_words (word, table->start + middle);
- if (value < 0)
- highest = middle - 1;
- else if (value > 0)
- lowest = middle + 1;
- else
- return 1;
- }
- return 0;
- }
-
-
- /* Sort the whole occurs table in memory. Presumably, `qsort' does not take
- intermediate copies or table elements, so the sort will be stabilized
- throught the comparison routine. */
-
- #ifdef __STDC__
- void sort_found_occurs (void)
- #else
- void
- sort_found_occurs ()
- #endif
- {
-
- /* Only one language for the time being. */
-
- #ifdef MSDOS
- assert ((long) number_of_occurs[0] * (long) sizeof_occurs < 0x10000L);
- #endif
-
- qsort (occurs_table[0], number_of_occurs[0],
- sizeof_occurs, compare_occurs);
- }
-
- /* Parameter files reading routines. */
-
-
- /* Read a file named FILE_NAME, containing a set of break characters. Build
- a content to the array word_fastmap in which all characters are allowed
- except those found in the file. Characters may be repeated. */
-
- #ifdef __STDC__
- void digest_break_file (STRING *file_name)
- #else
- void
- digest_break_file (file_name)
- STRING *file_name;
- #endif
- {
- BLOCK file_contents; /* to receive a copy of the file */
- char *cursor; /* cursor in file copy */
-
- swallow_file_in_memory (file_name, &file_contents);
-
- /* Make the fastmap and record the file contents in it. */
-
- memset (word_fastmap, 1, 1 << BYTEWIDTH);
- for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
- word_fastmap[(unsigned char) *cursor] = 0;
-
- /* In normal mode, the only way to avoid newline as a break character is
- to write all the break characters in the file with no newline at all,
- not even at the end of the file. In ptx compatibility mode, spaces,
- tabs and newlines are always considered as break characters even if not
- included in the break file. */
-
- if (operating_mode == PTX_MODE)
- {
- word_fastmap[' '] = 0;
- word_fastmap['\t'] = 0;
- word_fastmap['\n'] = 0;
- }
-
- /* Return the space of the file, which is no more required. */
-
- free (file_contents.start);
- }
-
- /* Read a file named FILE_NAME, containing one word per line, then construct
- in TABLE a table of WORD descriptors for them. The routine swallows the
- whole file in memory; this is at the expense of space needed for
- newlines, which are useless; however, the reading is fast. */
-
- #ifdef __STDC__
- void digest_word_file (STRING *file_name, WORD_TABLE *table)
- #else
- void
- digest_word_file (file_name, table)
- STRING *file_name;
- WORD_TABLE *table;
- #endif
- {
- BLOCK file_contents; /* to receive a copy of the file */
- char *cursor; /* cursor in file copy */
- char *word_start; /* start of the current word */
-
- swallow_file_in_memory (file_name, &file_contents);
-
- table->start = NULL;
- table->length = 0;
-
- /* Read the whole file. */
-
- cursor = file_contents.start;
- while (cursor < file_contents.end)
- {
-
- /* Read one line, and save the word in contains. */
-
- word_start = cursor;
- while (cursor < file_contents.end && *cursor != '\n')
- cursor++;
-
- /* Record the word in table if it is not empty. */
-
- if (cursor > word_start)
- {
- ALLOC_NEW_WORD (table);
- table->start[table->length].start = word_start;
- table->start[table->length].size = I_PDIFF (cursor, word_start);
- table->length++;
- }
-
- /* This test allows for an incomplete line at end of file. */
-
- if (cursor < file_contents.end)
- cursor++;
- }
-
- /* Finally, sort all the words read. */
-
- #ifdef MSDOS
- assert ((long) table->length * (long) sizeof (WORD) < 0x10000L);
- #endif
-
- qsort (table->start, table->length, sizeof (WORD), compare_words);
- }
-
-
- /* Keyword recognition and selection. */
-
-
- /* For each keyword in the source text, constructs an OCCURS structure. */
-
- #ifdef __STDC__
- void find_occurs_in_text (void)
- #else
- void
- find_occurs_in_text ()
- #endif
- {
- char HUGE *cursor; /* for scanning the source text */
- char HUGE *scan; /* for scanning the source text also */
- char HUGE *line_start; /* start of the current input line */
- char HUGE *line_scan; /* newlines scanned until this point */
- int reference_length; /* length of reference in input mode */
- WORD possible_key; /* possible key, to ease searches */
- OCCURS *occurs_cursor; /* current OCCURS under construction */
-
- char HUGE *context_start; /* start of left context */
- char HUGE *context_end; /* end of right context */
- char HUGE *next_context_start;/* next start of left context */
-
- /* Tracking where lines start is helpful for reference processing. In
- auto reference mode, this allows counting lines. In input reference
- mode, this permits finding the beginning of the references.
-
- The first line begins with the file, skip immediately this very first
- reference in input reference mode, to help further rejection any word
- found inside it. Also, unconditionnaly assigning these variable has
- the happy effect of shutting up lint. */
-
- line_start = text_buffer.start;
- line_scan = line_start;
- if (input_reference)
- {
- SKIP_NON_WHITE (line_scan, text_buffer.end);
- reference_length = I_PDIFF (line_scan, line_start);
- SKIP_WHITE (line_scan, text_buffer.end);
- }
-
- /* Process the whole buffer, one line or one sentence at a time. */
-
- for (cursor = text_buffer.start;
- cursor < text_buffer.end;
- cursor = next_context_start)
- {
-
- /* `context_start' gets initialized before the processing of each
- line, or once for the whole buffer if no end of line or sentence
- sequence separator. */
-
- context_start = cursor;
-
- /* If a end of line or end of sentence sequence is defined and
- non-empty, `next_context_start' will be recomputed to be the end of
- each line or sentence, before each one is processed. If no such
- sequence, then `next_context_start' is set at the end of the whole
- buffer, which is then considered to be a single line or sentence.
- This test also accounts for the case of an incomplete line or
- sentence at the end of the buffer. */
-
- if (context_regex_string
- && (re_search (context_regex, cursor,
- L_PDIFF (text_buffer.end, cursor),
- ZERO, L_PDIFF (text_buffer.end, cursor), &context_regs)
- >= 0))
- next_context_start = cursor + context_regs.end[0];
-
- else
- next_context_start = text_buffer.end;
-
- /* Include the separator into the right context, but not any suffix
- white space in this separator; this insures it will be seen in
- output and will not take more space than necessary. */
-
- context_end = next_context_start;
- SKIP_WHITE_BACKWARDS (context_end, context_start);
-
- /* Read and process a single input line or sentence, one word at a
- time. */
-
- while (1)
- {
- if (word_regex)
-
- /* If a word regexp has been compiled, use it to skip at the
- beginning of the next word. If there is no such word, exit
- the loop. */
-
- {
- if (re_search (word_regex, cursor,
- L_PDIFF (context_end, cursor),
- ZERO, L_PDIFF (context_end, cursor), &word_regs)
- < 0)
- break;
- }
- else
-
- /* Avoid re_search and use the fastmap to skip to the beginning
- of the next word, but update word_regs.start[0] and
- word_regs.end[0] as if re_search had been called. If there
- is no more word in the buffer, exit the loop. */
-
- {
- scan = cursor;
- while (scan < context_end
- && !word_fastmap[(unsigned char) *scan])
- scan++;
-
- if (scan == context_end)
- break;
-
- word_regs.start[0] = I_PDIFF (scan, cursor);
-
- while (scan < context_end
- && word_fastmap[(unsigned char) *scan])
- scan++;
-
- word_regs.end[0] = I_PDIFF (scan, cursor);
- }
-
- /* Skip right to the beginning of the found word. */
-
- cursor += word_regs.start[0];
-
- /* Skip any zero length word. Just advance a single position,
- then go fetch the next word. */
-
- if (word_regs.end[0] == word_regs.start[0])
- {
- cursor++;
- continue;
- }
-
- /* This is a genuine, non empty word, so save it as a possible
- key. Then skip over it. Also, maintain the maximum length of
- all words read so far. It is mandatory to take the maximum
- length of all words in the file, without considering if they
- are actually kept or rejected, because backward jumps at output
- generation time may fall in *any* word. */
-
- possible_key.start = cursor;
- possible_key.size = word_regs.end[0] - word_regs.start[0];
- cursor += possible_key.size;
-
- if (possible_key.size > maximum_word_length)
- maximum_word_length = possible_key.size;
-
- /* In input reference mode, update `line_start' from its previous
- value. Count the lines just in case auto reference mode is
- also selected. If it happens that the word just matched is
- indeed part of a reference; just ignore it. */
-
- if (input_reference)
- {
- while (line_scan < possible_key.start)
- if (*line_scan == '\n')
- {
- total_line_count++;
- line_scan++;
- line_start = line_scan;
- SKIP_NON_WHITE (line_scan, text_buffer.end);
-
- reference_length = I_PDIFF (line_scan, line_start);
- }
- else
- line_scan++;
- if (line_scan > possible_key.start)
- continue;
- }
-
- /* Ignore the word if an `Ignore words' table exists and if it is
- part of it. Also ignore the word if an `Only words' table and
- if it is *not* part of it.
-
- It is allowed that both tables be used at once, even if this
- may look strange for now. Just ignore a word that would appear
- in both. If regexps are eventually implemented for these
- tables, the Ignore table could then reject words that would
- have been previously accepted by the Only table. */
-
- if (ignore_file && search_table (&possible_key, &ignore_table))
- continue;
- if (only_file && !search_table (&possible_key, &only_table))
- continue;
-
- /* A non-empty word has been found. First of all, insure
- proper allocation of the next OCCURS, and make a pointer to
- where it will be constructed. */
-
- ALLOC_NEW_OCCURS (0);
- occurs_cursor = (OCCURS *)
- ((char *) occurs_table[0] + sizeof_occurs * number_of_occurs[0]);
-
- /* Define the refence field, if any. */
-
- if (auto_reference)
- {
-
- /* While auto referencing, update `line_start' from its
- previous value, counting lines as we go. If input
- referencing at the same time, `line_start' has been
- advanced earlier, and the following loop is never really
- executed. */
-
- while (line_scan < possible_key.start)
- if (*line_scan == '\n')
- {
- total_line_count++;
- line_scan++;
- line_start = line_scan;
- SKIP_NON_WHITE (line_scan, text_buffer.end);
- }
- else
- line_scan++;
-
- occurs_cursor->reference = total_line_count;
- }
- else if (input_reference)
- {
-
- /* If only input referencing, `line_start' has been computed
- earlier to detect the case the word matched would be part
- of the reference. The reference position is simply the
- value of `line_start'. */
-
- occurs_cursor->reference
- = (DELTA) I_PDIFF (line_start, possible_key.start);
- if (reference_length > reference_max_width)
- reference_max_width = reference_length;
- }
-
- /* Exclude the reference from the context in simple cases. */
-
- if (input_reference && line_start == context_start)
- {
- SKIP_NON_WHITE (context_start, context_end);
- SKIP_WHITE (context_start, context_end);
- }
-
- /* Completes the OCCURS structure. */
-
- occurs_cursor->key = possible_key;
- occurs_cursor->left = I_PDIFF (context_start, possible_key.start);
- occurs_cursor->right = I_PDIFF (context_end, possible_key.start);
-
- #ifdef MSDOS
- assert (number_of_occurs[0] < 32767);
- #endif /* MSDOS */
- number_of_occurs[0]++;
- }
- }
- }
-
- /* Formatting and actual output - service routines. */
-
-
- /* Prints some NUMBER of spaces on stdout. */
-
- #ifdef __STDC__
- void print_spaces (int number)
- #else
- void
- print_spaces (number)
- int number; /* number of spaces to print */
- #endif
- {
- int counter;
-
- for (counter = number; counter > 0; counter--)
- putchar (' ');
- }
-
-
- /* Prints the field provided by FIELD. */
-
- #ifdef __STDC__
- void print_field (BLOCK field)
- #else
- void
- print_field (field)
- BLOCK field;
- #endif
- {
- char HUGE *cursor; /* Cursor in field to print */
- int character; /* Current character */
- int base; /* Base character, without diacritic */
- int diacritic; /* Diacritic code for the character */
-
- /* Whitespace is not really compressed. Instead, each white space
- character (tab, vt, ht etc.) is printed as one single space. */
-
- for (cursor = field.start; cursor < field.end; cursor++)
- {
- character = (unsigned char) *cursor;
- if (edited_flag[character])
- {
-
- /* First check if this is a diacriticized character. All this
- stuff should be done by "ctype.c" specific routines, at least
- because the diacritic codes are quite "ctype.c" dependent.
- I'll do it here for now, and will move it elsewhere when the
- code will have settle down a little.
-
- This works only for TeX. I do not know how diacriticized
- letters work with `roff'. Please someone explain it to me! */
-
- diacritic = todiac (character);
- if (diacritic != 0 && output_format == TEX_FORMAT)
- {
- base = tobase (character);
- switch (diacritic)
- {
-
- case 1: /* Latin diphtongues */
- switch (base)
- {
- case 'o':
- printf ("\\oe{}");
- break;
-
- case 'O':
- printf ("\\OE{}");
- break;
-
- case 'a':
- printf ("\\ae{}");
- break;
-
- case 'A':
- printf ("\\AE{}");
- break;
-
- default:
- putchar (' ');
- }
- break;
-
- case 2: /* Acute accent */
- printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base);
- break;
-
- case 3: /* Grave accent */
- printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base);
- break;
-
- case 4: /* Circumflex accent */
- printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base);
- break;
-
- case 5: /* Diaeresis */
- printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base);
- break;
-
- case 6: /* Tilde accent */
- printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base);
- break;
-
- case 7: /* Cedilla */
- printf ("\\c{%c}", base);
- break;
-
- case 8: /* Small circle beneath */
- switch (base)
- {
- case 'a':
- printf ("\\aa{}");
- break;
-
- case 'A':
- printf ("\\AA{}");
- break;
-
- default:
- putchar (' ');
- }
- break;
-
- case 9: /* Strike through */
- switch (base)
- {
- case 'o':
- printf ("\\o{}");
- break;
-
- case 'O':
- printf ("\\O{}");
- break;
-
- default:
- putchar (' ');
- }
- break;
- }
- }
- else
-
- /* This is not a diacritic character, so handle cases which are
- really specific to `roff' or TeX. All white space processing
- is done as the default case of this switch. */
-
- switch (character)
- {
- case '"':
- /* In roff output format, double any quote. */
- putchar ('"');
- putchar ('"');
- break;
-
- case '$':
- case '%':
- case '&':
- case '#':
- case '_':
- /* In TeX output format, precede these with a backslash. */
- putchar ('\\');
- putchar (character);
- break;
-
- case '{':
- case '}':
- /* In TeX output format, precede these with a backslash and
- force mathematical mode. */
- printf ("$\\%c$", character);
- break;
-
- case '\\':
- /* In TeX output mode, request production of a backslash. */
- printf ("\\backslash{}");
- break;
-
- default:
- /* Any other flagged character produces a single space. */
- putchar (' ');
- }
- }
- else
- putchar (*cursor);
- }
- }
-
-
- /* Formatting and actual output - planning routines. */
-
-
- /* From information collected from command line options and input file
- readings, compute and fix some output parameter values. */
-
- #ifdef __STDC__
- void fix_output_parameters (void)
- #else
- void
- fix_output_parameters ()
- #endif
- {
- int file_index; /* index in text input file arrays */
- int line_ordinal; /* line ordinal value for reference */
- char ordinal_string[12]; /* edited line ordinal for reference */
- int reference_width; /* width for the whole reference */
- int character; /* character ordinal */
- STRING *cursor; /* cursor in some constant strings */
-
- /* In auto reference mode, the maximum width of this field is precomputed and
- subtracted from the overall line width. Add two for the parentheses
- that surround the line number. */
-
- if (auto_reference)
- {
- reference_max_width = 0;
- for (file_index = 0; file_index < number_input_files; file_index++)
- {
- line_ordinal = file_line_count[file_index] + 1;
- if (file_index > 0)
- line_ordinal -= file_line_count[file_index - 1];
- sprintf (ordinal_string, "%d", line_ordinal);
- reference_width = strlen (ordinal_string);
- if (input_file_name[file_index])
- reference_width += strlen (input_file_name[file_index]);
- if (reference_width > reference_max_width)
- reference_max_width = reference_width;
- }
- reference_max_width += 2;
- reference.start = xmalloc (reference_max_width + 1);
- }
-
- /* If the reference appears to the left of the output line, reserve some
- space for it right away, including one gap size. */
-
- if ((auto_reference || input_reference) && !right_reference)
- line_width -= reference_max_width + gap_size;
-
- /* The output lines, minimally, will contain from left to right a left
- context, a gap, and a keyword followed by the right context with no
- special intervening gap. Half of the line width is dedicated to the
- left context and the gap, the other half is dedicated to the keyword
- and the right context; these values are computed once and for all here.
- There also are tail and head wrap around fields, used when the keywork
- is near the beginning or the end of the line, or when some long word
- cannot fit in, but leave place from wrapped around shorter words. The
- maximum width of these fields are recomputed seperately for each line,
- on a case by case basis. It is worth noting that it cannot happen that
- both the tail and head fields are used at once. */
-
- half_line_width = line_width / 2;
- before_max_width = half_line_width - gap_size;
- keyafter_max_width = half_line_width;
-
- /* If truncation_string is the empty string, make it NULL to speed up
- tests. In this case, truncation_string_length will never get used, so
- there is no need to set it. */
-
- if (truncation_string && *truncation_string)
- truncation_string_length = strlen (truncation_string);
- else
- truncation_string = NULL;
-
- /* I never figured out exactly how UNIX' ptx plan the output width of its
- various fields. The following formula does not completely imitate
- UNIX' ptx in UNIX' ptx compatibility mode, but almost. In normal mode,
- rather compute the field widths correctly. */
-
- if (operating_mode == PTX_MODE)
- keyafter_max_width -= 2 * truncation_string_length + 1;
- else
- {
-
- /* When flagging truncation at the left of the keyword, the truncation
- mark goes at the beginning of the before field, unless there is a
- head field, in which case the mark goes at the left of the head
- field. When flagging truncation at the right of the keyward, the
- mark goes at the end of the keyafter field, unless there is a tail
- field, in which case the mark goes at the end of the tail field.
- So, only eight combination cases could arise for truncation marks:
-
- . None.
- . One beginning the before field.
- . One beginning the head field.
- . One ending the keyafter field.
- . One ending the tail field.
- . One beginning the before field, another ending the keyafter field.
- . One ending the tail field, another beginning the before field.
- . One ending the keyafter field, another beginning the head field.
-
- So, there is at most two truncation marks, which could appear both
- on the left side of the center of the output line, both on the
- right side, or one on either side. */
-
- before_max_width -= 2 * truncation_string_length;
- keyafter_max_width -= 2 * truncation_string_length;
- }
-
- /* Compute which characters need special output processing. Initialize by
- flagging any white space character. Complete the special character
- flagging according to selected output format. */
-
- for (character = 0; character < (1 << BYTEWIDTH); character++)
- edited_flag[character] = isspace (character);
-
- switch (output_format)
- {
- case UNKNOWN_FORMAT:
- /* Should never happen. */
-
- case DUMB_FORMAT:
- break;
-
- case ROFF_FORMAT:
-
- /* `Quote' charcters should be doubled. */
-
- edited_flag['"'] = 1;
-
- /* Any character with 8th bit set will print to a single space.
- Diacriticized characters do not work for `roff', because I do not
- how to do it. Please someone tell me! */
-
- for (character = 0200; character < (1 << BYTEWIDTH); character++)
- edited_flag[character] = 1;
- break;
-
- case TEX_FORMAT:
-
- /* Various characters need special processing. */
-
- for (cursor = "$%_{}\\"; *cursor; cursor++)
- edited_flag[*cursor] = 1;
-
- /* Any character with 8th bit setwill print to a single space, unless
- it is diacriticized. */
-
- for (character = 0200; character < (1 << BYTEWIDTH); character++)
- edited_flag[character] = todiac (character) != 0;
- break;
- }
- }
-
-
- /* Compute the position and length of all the output fields, given a pointer
- to some OCCURS. */
-
- #ifdef __STDC__
- void define_all_fields (OCCURS *occurs)
- #else
- void
- define_all_fields (occurs)
- OCCURS *occurs; /* current keyword entry being processed */
- #endif
- {
- int tail_max_width; /* allowable width of tail field */
- int head_max_width; /* allowable width of head field */
- char HUGE *cursor; /* running cursor in source text */
- char HUGE *left_context_start;/* start of left context */
- char HUGE *right_context_end; /* end of right context */
- char HUGE *left_field_start; /* conservative start for `head'/`before' */
- int file_index; /* index in text input file arrays */
- STRING *file_name; /* file name for reference */
- int line_ordinal; /* line ordinal for reference */
-
- /* Define `keyafter', start of left context and end of right context.
- `keyafter' starts at the saved position for keyword and extend to the
- right from the end of the keyword, eating separators or full words, but
- not beyond maximum allowed width for `keyafter' field or limit for the
- right context. Suffix spaces will be removed afterwards. */
-
- keyafter.start = occurs->key.start;
- keyafter.end = keyafter.start + occurs->key.size;
- left_context_start = keyafter.start + occurs->left;
- right_context_end = keyafter.start + occurs->right;
-
- cursor = keyafter.end;
- while (cursor < right_context_end
- && cursor <= keyafter.start + keyafter_max_width)
- {
- keyafter.end = cursor;
- SKIP_SOMETHING (cursor, right_context_end);
- }
- if (cursor <= keyafter.start + keyafter_max_width)
- keyafter.end = cursor;
-
- keyafter_truncation = truncation_string && keyafter.end < right_context_end;
-
- SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
-
- /* When the left context is wide, it might take some time to catch up from
- the left context boundary to the beginning of the `head' or `before'
- fields. So, in this case, to speed the catchup, we jump back from the
- keyword, using some secure distance, possibly falling in the middle of
- a word. A secure backward jump would be at least half the maximum
- width of a line, plus the size of the longest word met in the whole
- input. We conclude this backward jump by a skip forward of at least
- one word. In this manner, we should not inadvertently accept only part
- of a word. From the reached point, when it will be time to fix the
- beginning of `head' or `before' fields, we will skip forward words or
- delimiters until we get sufficiently near. */
-
- if (-occurs->left > half_line_width + maximum_word_length)
- {
- left_field_start
- = keyafter.start - (half_line_width + maximum_word_length);
- SKIP_SOMETHING (left_field_start, keyafter.start);
- }
- else
- left_field_start = keyafter.start + occurs->left;
-
- /* `before' certainly ends at the keyword, but not including separating
- spaces. It starts after than the saved value for the left context, by
- advancing it until it falls inside the maximum allowed width for the
- before field. There will be no prefix spaces either. `before' only
- advances by skipping single separators or whole words. */
-
- before.start = left_field_start;
- before.end = keyafter.start;
- SKIP_WHITE_BACKWARDS (before.end, before.start);
-
- while (before.start + before_max_width < before.end)
- SKIP_SOMETHING (before.start, before.end);
-
- if (truncation_string)
- {
- cursor = before.start;
- SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
- before_truncation = cursor > left_context_start;
- }
- else
- before_truncation = 0;
-
- SKIP_WHITE (before.start, text_buffer.end);
-
- /* The tail could not take more columns than what has been left in the
- left context field, and a gap is mandatory. It starts after the
- right context, and does not contain prefixed spaces. It ends at
- the end of line, the end of buffer or when the tail field is full,
- whichever comes first. It cannot contain only part of a word, and
- has no suffixed spaces. */
-
- tail_max_width
- = before_max_width - I_PDIFF (before.end, before.start) - gap_size;
-
- if (tail_max_width > 0)
- {
- tail.start = keyafter.end;
- SKIP_WHITE (tail.start, text_buffer.end);
-
- tail.end = tail.start;
- cursor = tail.end;
- while (cursor < right_context_end
- && cursor < tail.start + tail_max_width)
- {
- tail.end = cursor;
- SKIP_SOMETHING (cursor, right_context_end);
- }
-
- if (cursor < tail.start + tail_max_width)
- tail.end = cursor;
-
- if (tail.end > tail.start)
- {
- keyafter_truncation = 0;
- tail_truncation = truncation_string && tail.end < right_context_end;
- }
- else
- tail_truncation = 0;
-
- SKIP_WHITE_BACKWARDS (tail.end, tail.start);
- }
- else
- {
-
- /* No place left for a tail field. */
-
- tail.start = NULL;
- tail.end = NULL;
- tail_truncation = 0;
- }
-
- /* `head' could not take more columns than what has been left in the right
- context field, and a gap is mandatory. It ends before the left
- context, and does not contain suffixed spaces. Its pointer is advanced
- until the head field has shrunk to its allowed width. It cannot
- contain only part of a word, and has no suffixed spaces. */
-
- head_max_width
- = keyafter_max_width - I_PDIFF (keyafter.end, keyafter.start) - gap_size;
-
- if (head_max_width > 0)
- {
- head.end = before.start;
- SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
-
- head.start = left_field_start;
- while (head.start + head_max_width < head.end)
- SKIP_SOMETHING (head.start, head.end);
-
- if (head.end > head.start)
- {
- before_truncation = 0;
- head_truncation = (truncation_string
- && head.start > left_context_start);
- }
- else
- head_truncation = 0;
-
- SKIP_WHITE (head.start, head.end);
- }
- else
- {
-
- /* No place left for a head field. */
-
- head.start = NULL;
- head.end = NULL;
- head_truncation = 0;
- }
-
- if (auto_reference)
- {
-
- /* Construct the reference text in preallocated space from the file
- name and the line number. Find out in which file the reference
- occured. Standard input yields an empty file name. Insure line
- numbers are one based, even if they are computed zero based. */
-
- file_index = 0;
- while (file_line_count[file_index] < occurs->reference)
- file_index++;
-
- file_name = input_file_name[file_index];
- if (!file_name)
- file_name = "";
-
- line_ordinal = occurs->reference + 1;
- if (file_index > 0)
- line_ordinal -= file_line_count[file_index - 1];
-
- sprintf (reference.start, "%s(%d)", file_name, line_ordinal);
- reference.end = reference.start + strlen (reference.start);
- }
- else if (input_reference)
- {
-
- /* Reference starts at saved position for reference and extends right
- until some white space is met. */
-
- reference.start = keyafter.start + (DELTA) occurs->reference;
- reference.end = reference.start;
- SKIP_NON_WHITE (reference.end, right_context_end);
- }
- }
-
-
- /* Formatting and actual output - control routines. */
-
-
- /* Output the current output fields as one line for `troff' or `nroff'. */
-
- #ifdef __STDC__
- void output_one_roff_line (void)
- #else
- void
- output_one_roff_line ()
- #endif
- {
- /* Output the `tail' field. */
-
- printf (".xx \"");
- print_field (tail);
- if (tail_truncation)
- printf ("%s", truncation_string);
- putchar ('"');
-
- /* Output the `before' field. */
-
- printf (" \"");
- if (before_truncation)
- printf ("%s", truncation_string);
- print_field (before);
- putchar ('"');
-
- /* Output the `keyafter' field. */
-
- printf (" \"");
- print_field (keyafter);
- if (keyafter_truncation)
- printf ("%s", truncation_string);
- putchar ('"');
-
- /* Output the `head' field. */
-
- printf (" \"");
- if (head_truncation)
- printf ("%s", truncation_string);
- print_field (head);
- putchar ('"');
-
- /* Conditionnaly output the `reference' field. */
-
- if (auto_reference || input_reference)
- {
- printf (" \"");
- print_field (reference);
- putchar ('"');
- }
-
- putchar ('\n');
- }
-
-
- /* Output the current output fields as one line for `TeX'. */
-
- #ifdef __STDC__
- void output_one_tex_line ()
- #else
- void
- output_one_tex_line ()
- #endif
- {
- BLOCK key; /* key field, isolated */
- BLOCK after; /* after field, isolated */
- char HUGE *cursor; /* running cursor in source text */
-
- printf ("\\xx ");
- printf ("{");
- print_field (tail);
- printf ("}{");
- print_field (before);
- printf ("}{");
- key.start = keyafter.start;
- after.end = keyafter.end;
- cursor = keyafter.start;
- SKIP_SOMETHING (cursor, keyafter.end);
- key.end = cursor;
- after.start = cursor;
- print_field (key);
- printf ("}{");
- print_field (after);
- printf ("}{");
- print_field (head);
- printf ("}");
- if (auto_reference || input_reference)
- {
- printf ("{");
- print_field (reference);
- printf ("}");
- }
- printf ("\n");
- }
-
-
- /* Output the current output fields as one line for a dumb terminal. */
-
- #ifdef __STDC__
- void output_one_dumb_line (void)
- #else
- void
- output_one_dumb_line ()
- #endif
- {
- if (!right_reference)
- if (auto_reference)
- {
-
- /* Output the `reference' field, in such a way that GNU emacs
- next-error will handle it. The colon is taken from the gap which
- follows. */
-
- print_field (reference);
- putchar (':');
- print_spaces (reference_max_width
- + gap_size
- - I_PDIFF (reference.end, reference.start)
- - 1);
- }
- else
- {
-
- /* Output the `reference' field and its following gap. */
-
- print_field (reference);
- print_spaces (reference_max_width
- + gap_size
- - I_PDIFF (reference.end, reference.start));
- }
-
- if (tail.start < tail.end)
- {
- /* Output the `tail' field. */
-
- print_field (tail);
- if (tail_truncation)
- printf ("%s", truncation_string);
-
- print_spaces (half_line_width - gap_size
- - I_PDIFF (before.end, before.start)
- - (before_truncation ? truncation_string_length : 0)
- - I_PDIFF (tail.end, tail.start)
- - (tail_truncation ? truncation_string_length : 0));
- }
- else
- print_spaces (half_line_width - gap_size
- - I_PDIFF (before.end, before.start)
- - (before_truncation ? truncation_string_length : 0));
-
- /* Output the `before' field. */
-
- if (before_truncation)
- printf ("%s", truncation_string);
- print_field (before);
-
- print_spaces (gap_size);
-
- /* Output the `keyafter' field. */
-
- print_field (keyafter);
- if (keyafter_truncation)
- printf ("%s", truncation_string);
-
- if (head.start < head.end)
- {
- /* Output the `head' field. */
-
- print_spaces (half_line_width
- - I_PDIFF (keyafter.end, keyafter.start)
- - (keyafter_truncation ? truncation_string_length : 0)
- - I_PDIFF (head.end, head.start)
- - (head_truncation ? truncation_string_length : 0));
- if (head_truncation)
- printf ("%s", truncation_string);
- print_field (head);
- }
- else
-
- if ((auto_reference || input_reference) && right_reference)
- print_spaces (half_line_width
- - I_PDIFF (keyafter.end, keyafter.start)
- - (keyafter_truncation ? truncation_string_length : 0));
-
- if ((auto_reference || input_reference) && right_reference)
- {
- /* Output the `reference' field. */
-
- print_spaces (gap_size);
- print_field (reference);
- }
-
- printf ("\n");
- }
-
-
- /* Scan the whole occurs table and, for each entry, output one line in the
- appropriate format. */
-
- #ifdef __STDC__
- void generate_all_output (void)
- #else
- void
- generate_all_output ()
- #endif
- {
- int occurs_index; /* index of keyword entry being processed */
- OCCURS *occurs_cursor; /* current keyword entry being processed */
-
-
- /* The following assignments are useful to provide default values in case
- line contexts or references are not used, in which case these variables
- would never be computed. */
-
- tail.start = NULL;
- tail.end = NULL;
- tail_truncation = 0;
-
- head.start = NULL;
- head.end = NULL;
- head_truncation = 0;
-
-
- /* Loop over all keyword occurrences. */
-
- occurs_cursor = occurs_table[0];
-
- for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
- {
- /* Compute the exact size of every field and whenever truncation flags
- are present or not. */
-
- define_all_fields (occurs_cursor);
-
- /* Produce one output line according to selected format. */
-
- switch (output_format)
- {
- case UNKNOWN_FORMAT:
- /* Should never happen. */
-
- case DUMB_FORMAT:
- output_one_dumb_line ();
- break;
-
- case ROFF_FORMAT:
- output_one_roff_line ();
- break;
-
- case TEX_FORMAT:
- output_one_tex_line ();
- break;
- }
-
- /* Advance the cursor into the occurs table. */
-
- occurs_cursor = (OCCURS *) ((char *) occurs_cursor + sizeof_occurs);
- }
- }
-
- /* Option decoding and main program. */
-
-
- /* Print program identification and options, then exit. If the program is
- installed under the name `ptx', then output only options pertaining to
- ptx compatibility mode. */
-
- #ifdef __STDC__
- void usage_and_exit (void)
- #else
- void
- usage_and_exit ()
- #endif
- {
- int is_gptx; /* if program name is not ptx */
-
- is_gptx = strcmp (program_name, "ptx") != 0;
-
- print_version ();
-
- if (is_gptx)
- {
- fprintf (stderr, "usage: %s [OPTION]... [INPUT]...\n",
- program_name);
- fprintf (stderr, "or: %s -p [OPTION]... [INPUT [OUTPUT]]\n",
- program_name);
- }
- else
- fprintf (stderr, "usage: %s [OPTION]... [INPUT [OUTPUT]]\n",
- program_name);
-
- fprintf (stderr, "\
- \n\
- -b FILE word break characters in this FILE\n\
- -f fold lower case to upper case for sorting\n\
- -g NUMBER gap size in characters between output fields\n\
- -i FILE read ignore word list from FILE\n\
- -o FILE read only word list from this FILE\n");
-
- if (is_gptx)
- fprintf (stderr, "\
- -p enforce standard ptx compatibility mode\n");
-
- fprintf (stderr, "\
- -r first field of each line is a reference\n\
- -t - still unimplemented -\n\
- -w NUMBER output line width in characters, reference excluded\n");
-
- if (is_gptx)
- fprintf (stderr, "\
- -A output automatically generated references\n");
-
- fprintf (stderr, "\
- -C see Copyright and copying conditions, then exit\n");
-
- if (is_gptx)
- fprintf (stderr, "\
- -F STRING flag line truncations with STRING (default is `/')\n\
- -O generate output as roff directives\n\
- -R references after right context, not counted in -w\n\
- -S REGEXP use REGEXP to match end of lines or end of sentences\n\
- -T generate output as TeX directives\n\
- -W REGEXP use REGEXP to match each keyword\n");
-
- if (is_gptx)
- fprintf (stderr, "\
- \n\
- Note: option -p disallows options -[AFORSTW].\n");
-
- exit (-1);
- }
-
-
- /* Main program. Decode ARGC arguments passed through the ARGV array of
- strings, then launch execution. */
-
- #ifdef __STDC__
- int main (int argc, char **argv)
- #else
- int
- main (argc, argv)
- int argc;
- char **argv;
- #endif
- {
- int optchar; /* argument character */
- extern int optind; /* index of argument */
- extern char *optarg; /* value or argument */
- int file_index; /* index in text input file arrays */
-
- if ((program_name = strrchr (argv[0], '/')))
- program_name++;
- else
- program_name = argv[0];
-
- if (strcmp (program_name, "ptx") == 0)
- {
- operating_mode = PTX_MODE;
- allowed_options = PTX_MODE_OPTIONS;
- }
- else
- {
- operating_mode = UNKNOWN_MODE;
- allowed_options = UNKNOWN_MODE_OPTIONS;
- }
-
- while ((optchar = getopt (argc, argv, allowed_options)) != EOF)
- {
-
- /* If some option is used which exists in normal mode but not in ptx
- mode, then it is an extension, so disallow ptx mode option. */
-
- if (operating_mode == UNKNOWN_MODE
- && strchr (NORMAL_MODE_OPTIONS, optchar) != NULL
- && strchr (PTX_MODE_OPTIONS, optchar) == NULL)
- {
- operating_mode = NORMAL_MODE;
- allowed_options = NORMAL_MODE_OPTIONS;
- }
-
- switch (optchar)
- {
- case 'p':
- operating_mode = PTX_MODE;
- allowed_options = PTX_MODE_OPTIONS;
- break;
-
- case 'b':
- break_file = optarg;
- break;
-
- case 'f':
- fold_lower_to_upper = 1;
- break;
-
- case 'g':
- gap_size = atoi (optarg);
- break;
-
- case 'i':
- ignore_file = optarg;
- break;
-
- case 'o':
- only_file = optarg;
- break;
-
- case 'r':
- input_reference = 1;
- break;
-
- case 't':
- /* A decouvrir... */
- break;
-
- case 'w':
- line_width = atoi (optarg);
- break;
-
- case 'A':
- auto_reference = 1;
- break;
-
- case 'C':
- print_version ();
- print_copyright ();
- exit (0);
-
- case 'F':
- truncation_string = optarg;
- break;
-
- case 'O':
- output_format = ROFF_FORMAT;
- break;
-
- case 'R':
- right_reference = 1;
- break;
-
- case 'S':
- context_regex_string = optarg;
- break;
-
- case 'T':
- output_format = TEX_FORMAT;
- break;
-
- case 'W':
- word_regex_string = optarg;
- break;
-
- default:
- usage_and_exit ();
- }
- }
-
- /* Select `normal' operating mode, if options processing did not fix it
- yet. */
-
- if (operating_mode == UNKNOWN_MODE)
- operating_mode = NORMAL_MODE;
-
- /* Change the default Ignore file according to operating mode. */
-
- if (!ignore_file)
- {
- #ifdef IGNORE
- if (operating_mode == NORMAL_MODE)
- ignore_file = IGNORE;
- #endif /* IGNORE */
-
- #ifdef PIGNORE
- if (operating_mode == PTX_MODE)
- ignore_file = PIGNORE;
- #endif /* PIGNORE */
- }
-
- /* Process remaining arguments according to operating mode. */
-
- if (optind == argc)
- {
-
- /* No more argument simply means: read standard input. */
-
- input_file_name = (STRING **) xmalloc (sizeof (STRING *));
- file_line_count = (int *) xmalloc (sizeof (int));
- number_input_files = 1;
- input_file_name[0] = NULL;
- }
- else if (operating_mode == PTX_MODE)
- {
-
- /* There is one necessary input file. */
-
- number_input_files = 1;
- input_file_name = (STRING **) xmalloc (sizeof (STRING *));
- file_line_count = (int *) xmalloc (sizeof (int));
- input_file_name[0] = argv[optind++];
-
- /* Redirect standard output, only if requested. */
-
- if (optind < argc)
- {
- fclose (stdout);
- if (fopen (argv[optind], "w") == NULL)
- perror_and_exit (argv[optind]);
- optind++;
- }
-
- /* Diagnose any other argument as an error. */
-
- if (optind < argc)
- usage_and_exit ();
- }
- else
- {
- number_input_files = argc - optind;
- input_file_name
- = (STRING **) xmalloc (number_input_files * sizeof (STRING *));
- file_line_count
- = (int *) xmalloc (number_input_files * sizeof (int));
-
- for (file_index = 0; file_index < number_input_files; file_index++)
- input_file_name[file_index] = argv[optind++];
- }
-
- /* When auto referencing, insure "-" will never be printed. */
-
- if (auto_reference)
- for (file_index = 0; file_index < number_input_files; file_index++)
- if (strcmp (input_file_name[file_index], "-") == 0)
- input_file_name[file_index] = NULL;
-
- /* If the output format has not been explicitely selected, choose `roff'
- format in UNIX' ptx compatibility mode, else choose dumb terminal
- format. */
-
- if (output_format == UNKNOWN_FORMAT)
- output_format = (operating_mode == PTX_MODE
- ? ROFF_FORMAT
- : DUMB_FORMAT);
-
- /* Read `Break character' file, if any. */
-
- if (break_file)
- digest_break_file (break_file);
-
- /* Read `Ignore words' file and `Only words' files, if any. If any of
- these files is empty, reset the name of the file to NULL, to avoid
- unnecessary calls to search_table. */
-
- if (ignore_file)
- {
- digest_word_file (ignore_file, &ignore_table);
- if (ignore_table.length == 0)
- ignore_file = NULL;
- }
-
- if (only_file)
- {
- digest_word_file (only_file, &only_table);
- if (only_table.length == 0)
- only_file = NULL;
- }
-
- /* Initialize the main tables. */
-
- initialize_regex ();
-
- sizeof_occurs = sizeof (OCCURS);
- if (!auto_reference)
- {
- sizeof_occurs -= sizeof (int);
- if (input_reference)
- sizeof_occurs += sizeof (DELTA);
- }
- #ifdef OCCURS_ALIGNMENT
- sizeof_occurs = ((sizeof_occurs + OCCURS_ALIGNMENT - 1)
- & ~(OCCURS_ALIGNMENT - 1));
- #endif
-
- /* Prepare to study all the input files. */
-
- number_of_occurs[0] = 0;
- total_line_count = 0;
- maximum_word_length = 0;
- reference_max_width = 0;
-
- for (file_index = 0; file_index < number_input_files; file_index++)
- {
-
- /* Read the file in core, than study it. */
-
- swallow_file_in_memory (input_file_name[file_index], &text_buffer);
- find_occurs_in_text ();
-
- /* Maintain for each file how many lines has been read so far when its
- end is reached. Incrementing the count first is a simple kludge to
- handle a possible incomplete line at end of file. */
-
- total_line_count++;
- file_line_count[file_index] = total_line_count;
- }
-
- /* Do the output process phase. */
-
- sort_found_occurs ();
- fix_output_parameters ();
- generate_all_output ();
-
- /* All done. */
-
- exit (0);
- }
-