home *** CD-ROM | disk | FTP | other *** search
- /* $Id: wp2x.c 1.10 91/08/18 15:05:41 raymond Exp $ */
-
- /* Before compiling, read the section titled `portability concerns'. */
-
- /************************************************************************
- * $Log: wp2x.c $
- * Revision 1.10 91/08/18 15:05:41 raymond
- * Descriptor file stuff.
- *
- * Revision 1.9 91/08/06 09:08:09 raymond
- * add missing `break' in check_arity
- *
- * Revision 1.8 91/08/06 08:31:21 raymond
- * Avoid infinite loop if file is corrupted.
- * Better error-checking on configuration file (new output scheme).
- *
- * Revision 1.7 91/08/02 13:35:37 raymond
- * Epsilonically better handling of environments that didn't end properly.
- * Change return type of main() to keep gcc quiet.
- * MSC support.
- *
- * Revision 1.6 91/07/28 21:08:53 raymond
- * BeginTabs et al, FNote#, ENote#, NegateTotal, more unsupported codes
- * Improve character tokens, Header, Footer
- * Take care when people don't end lines with HRt
- * Fix major bugs in endnote processing, footnote numbering (and nobody
- * noticed!)
- * More worries about signed characters.
- *
- * Revision 1.5 91/07/23 22:59:43 raymond
- * Add COMMENT token, and some bug fixes.
- *
- * Revision 1.4 91/07/23 22:09:23 raymond
- * Concessions to slightly non-ANSI compilers. (`const', `unsigned char')
- * More patches for machines with signed characters.
- * Fix blatant bug in hex constants. (Amazed nobody noticed.)
- * New tags SetFn#, Header, Footer.
- * Warning messages for unsupported tokens.
- * Backslahes processed in character tags.
- * Fixed(?) footnotes, endnotes, page length changes.
- * Inserted missing `break's into the huge switch.
- *
- * Revision 1.3 91/07/12 15:39:44 raymond
- * Spiffy Turbo C support.
- * Some <stdlib.h>'s don't declare errno et al.
- * Command line switches `-s' and `-n' added.
- * More cute warning messages.
- * Dots periodically emitted.
- * Give the enum of token types a name, to placate QuickC.
- * Fix problems with pitch changes and signed characters.
- *
- * Revision 1.2 91/06/22 08:18:22 raymond
- * <process.h> and fputchar() aren't sufficiently portable.
- * strerror() fails to exist on some so-called ANSI platforms.
- * Removed assumption that characters are unsigned.
- * Forgot to #include <stdarg.h>
- *
- */
-
- /************************************************************************
- * PORTABILITY CONCERNS
- ************************************************************************
- *
- * If possible, compile with unsigned characters. (Though I think
- * I've taken care of all the places where I assumed characters are
- * unsigned.)
- *
- * This program assumes that your compiler is fully ANSI-conformant.
- * Depending on how non-conformant your compiler is, you may need to
- * set the following symbols at compile time:
- *
- * NO_CONST -- set this if your compiler does not know what `const' means.
- * Cdecl -- how to tag functions that are variadic.
- *
- * Cdecl is used if you need special declarations for variadic functions.
- * This is used by IBM PC compilers so that you can make the default
- * parameter passing Pascal-style or Fastcalls.
- *
- * Some very machine-dependent stuff happens when trying to open the
- * descriptor file. Please read dopen.c as well.
- */
-
- #ifdef NO_CONST
- #define const
- #endif
-
- #ifndef Cdecl /* default is nothing */
- #define Cdecl
- #endif
-
- /************************************************************************
- * This program divides naturally into two parts.
- *
- * The first part reads in the descriptor file and builds the expansions
- * for each of the identifiers listed above.
- * This is the easy part.
- *
- * The second part reads the input file and uses the expansions collected
- * in the first part to transform the file into the output.
- * This is the hard part.
- *
- ************************************************************************/
-
- /* And now, the code.
- * We start off with some obvious header files.
- */
-
- #include <stdio.h>
- #include <stdarg.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
-
- /* Some platforms do not define these externals in stdlib.h */
- extern int Cdecl errno;
- extern char *Cdecl sys_errlist[];
- extern int Cdecl sys_nerr;
-
- /************************************************************************/
- /* Some common idioms */
- /************************************************************************/
-
- #define do_nothing /* twiddle thumbs */
-
- /************************************************************************/
- /* Blowing up */
- /************************************************************************/
-
- /* The function "error" accepts two arguments. A FILE pointer and
- * a printf-style argument list. The printf-style arguments are
- * printed to stderr. If the FILE is non-NULL, the the remaining
- * contents of the file are printed as well (to provide context), up
- * to 80 characters.
- */
-
- void Cdecl error(FILE *fp, char *fmt, ...)
- {
- int i;
- va_list ap;
-
- fputs("Error: ", stderr);
- va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap);
- fputc('\n', stderr);
-
- if (fp) {
- fprintf(stderr, "Unread text: ");
- for (i = 0; i < 80 && !feof(fp); i++) fputc(getc(fp), stderr);
- fputc('\n', stderr);
- }
- exit(1);
- }
-
- /************************************************************************/
- /* Command-line switches */
- /************************************************************************/
- int silent = 0;
- int blipinterval = 1024; /* display blips every 1K */
- int blipcount;
-
- /************************************************************************/
- /* Basic file manipulations */
- /************************************************************************/
-
- /* We here define a few basic functions. Let us hope that the first
- * three functions' names are self-descriptive.
- */
-
- int next_non_whitespace(FILE *fp)
- {
- register int c;
-
- while ((c = getc(fp)) != EOF && isspace(c)) do_nothing;
-
- return c;
- }
-
- int next_non_space_or_tab(FILE *fp)
- {
- register int c;
-
- while ((c = getc(fp)) != EOF && (c == ' ' || c == '\t')) do_nothing;
-
- return c;
- }
-
- void eat_until_newline(FILE *fp)
- {
- register int c;
-
- while ((c = getc(fp)) != EOF && c != '\n') do_nothing;
- }
-
- /* The function parse_hex grabs a (no-more-than-two-character) hex
- * constant. Similarly, parse_octal does the same for octal constants.
- */
-
- int parse_hex(FILE *fp)
- {
- register int c, value;
-
- if (!isxdigit(c = toupper(getc(fp))))
- error(fp, "Expecting a hex digit");
-
- if ((value = c - '0') > 9) value += '0' - 'A' + 10;
-
- if (!isxdigit(c = getc(fp))) { ungetc(c, fp); return value; }
-
- c = toupper(c);
- value = (value << 4) + c - '0';
- if (c > '9') value += '0' - 'A' + 10;
- return value;
- }
-
- int parse_octal(FILE *fp, register int c)
- {
- register int value = c - '0';
-
- if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }
-
- value = (value << 3) + c - '0';
-
- if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; }
-
- return (value << 3) + c - '0';
- }
-
-
- /************************************************************************/
- /* Storing the input strings */
- /************************************************************************/
-
- /* The input strings are allocated from a large pool we set up at
- * startup. This lets us do our thing without having to fight
- * with people like malloc and friends. This method does limit
- * our configuration file to 32K, however. We hope that this is
- * not a problem. (It also means that the program can be translated
- * to almost any other language without too much difficulty.)
- *
- * Here's how it works.
- *
- * "pool" is an array of POOL_SIZE characters. The value of POOL_SIZE
- * is flexible, but shouldn't exceed 65535, since that's the size of
- * an IBM PC segment. If your configuration file is more than 64K,
- * then there's probably something wrong.
- *
- * "pool_ptr" points to the next character in "pool" that hasn't been
- * used for anything yet.
- *
- * "top_of_pool" points one character beyond the end of pool, so we can
- * see if we've run out of memory.
- *
- * When we want to put something into the pool, we simply store into "pool"
- * and increment "pool_ptr" appropriately.
- *
- * Access to these variables is done through the following functions,
- * implemented as macros.
- *
- * "anchor_string()" is called before you start throwing things into
- * the pool. It returns a pointer to the beginning of the string
- * being built up.
- *
- * "add_to_string(c)" adds the character "c" to the string being built up.
- *
- * "finish_string()" gets ready for building a new string. We check
- * that we did not overflow our pool. We pull the sneaky trick of
- * a dummy else clause so that [1] "else"s match up properly if this
- * is nested inside an "if" statement, [2] the semicolon gets eaten
- * up correctly.
- *
- * "remove_string(s)" removes all strings from the one called "s" onwards.
- *
- */
-
- #define POOL_SIZE 32768U
-
- char pool[POOL_SIZE];
- char *pool_ptr = pool;
- #define top_of_pool (pool + POOL_SIZE)
-
- #define anchor_string() pool_ptr
- #define add_to_string(c) (*pool_ptr++ = c)
- #define finish_string() \
- if (pool_ptr >= top_of_pool) error(NULL, "string pool overflow."); \
- else do_nothing
- #define remove_string(s) (pool_ptr = s)
-
- /************************************************************************/
- /* Remembering the expansions */
- /************************************************************************/
-
- /* The array "expansion" contains the expansions for everything.
- * Everything is initialized to NULL.
- *
- * We set up things as follows:
- * expansion[0..255] contain the expansions for the possible characters.
- * expansion[256...] contain the expansions for the special codes.
- *
- * Make sure this table is kept in parallel with the names[] array
- *
- *
- */
-
- /* name value When is it expanded? */
- /* ---- --- -------------------- */
- enum token_type { /* Some compilers do not like unnamed enums */
- typeout = 256, /* Typed out as soon as it is encountered */
- BEGIN , /* Before the first character of the file */
- END , /* After the last character of the file */
- Comment , /* For wp2x-generated comments */
- eComment ,
-
- PageNo , /* Current page number */
- RomanPage , /* Set page number (to roman numerals) */
- ArabicPage , /* Set page number (to arabic) */
-
- HSpace , /* unbreakable space (`Hard space') */
-
- Tab , /* Tab character */
- BeginTabs , /* Begin tab settings */
-
- /* DO NOT CHANGE THE RELATIVE ORDER OF THESE FOUR TOKENS */
- SetTab , /* Set normal tabstop at %d */
- SetTabCenter , /* Set center tabstop at %d */
- SetTabRight , /* Set right-justified tab at %d */
- SetTabDecimal, /* Set decimal tab at %d */
-
- EndTabs , /* End tab settings */
-
- HPg , /* Hard page break */
- CondEOP , /* Conditional end-of-page */
- HRt , /* Hard return */
- SRt , /* Soft return */
-
- NHyph , /* Normal hyphen */
- NHyphE , /* Normal hyphen at the end of a line */
- HHyph , /* Hard (nonbreakable) hyphen */
- DHyph , /* Discretionary hyphen */
- DHyphE , /* Discretionary hyphen at the end of a line */
- NoHyphWord , /* Do not hyphenate this word */
-
- Marg , /* Margin settings */
- TopMarg , /* Set top margin */
- PageLength , /* Set page length */
-
- SS , /* Single spacing */
- DS , /* Double spacing */
- OHS , /* 1.5 spacing (One and a Half Spacing) */
- TS , /* Triple spacing */
- LS , /* Generic line spacing */
- LPI , /* set 6 or 8 LPI */
-
- Bold , /* Begin boldface */
- eBold , /* End boldface */
- Und , /* Begin underline */
- eUnd , /* End underline */
- Red , /* Begin redline */
- eRed , /* End redline */
- Strike , /* Begin strikeout */
- eStrike , /* End strikeout */
- Rev , /* Begin reverse video */
- eRev , /* End reverse video */
-
- Over , /* Overstrike */
- eOver , /* [mythical "end overstroke" code] */
- Sup , /* Superscript */
- eSup , /* [mythical "end superscript" code] */
- Sub , /* Subscript */
- eSub , /* [mythical "end subscript" code] */
-
- UpHalfLine , /* Advance printer up 1/2 line */
- DownHalfLine , /* Advance printer down 1/2 line */
- AdvanceToHalfLine, /* Advance to absolute vertical position */
-
- Indent , /* Indented paragraph */
- DIndent , /* Left-and-right-indented paragraph */
- eIndent , /* End indented paragraph */
- MargRel , /* Margin release (unknown argument) */
-
- Center , /* Center current line */
- eCenter , /* End centering */
- CenterHere , /* Center line around current column */
- eCenterHere , /* End centering */
-
- Align , /* Begin alignment */
- eAlign , /* End alignment */
- AlignChar , /* Set alignment character */
- FlushRight , /* Begin flush right */
- eFlushRight , /* End flush right */
-
- Math , /* Begin math mode */
- eMath , /* End math mode */
- MathCalc , /* Begin math calc mode */
- MathCalcColumn, /* Math calc column */
-
- SubTtl , /* Do subtotal */
- IsSubTtl , /* Subtotal entry */
- Ttl , /* Do total */
- IsTtl , /* Total entry */
- GrandTtl , /* Do grand total */
- NegateTotal , /* Negate current total */
-
- Col , /* Begin column mode */
- eCol , /* End column mode */
-
- Fn , /* Begin footnote */
- eFn , /* End footnote */
- En , /* Begin endnote */
- eEn , /* End endnote */
- SetFnNum , /* Set footnote number */
- FNoteNum , /* Footnote number */
- ENoteNum , /* Endnote number */
- TableMarker , /* Table of contents or whatever marker */
-
- Hyph , /* Hyphenation on */
- eHyph , /* off */
- Just , /* Justification on */
- eJust , /* off */
- Wid , /* Widow/orphan protection on */
- eWid , /* off */
- HZone , /* Hyphenation zone */
- DAlign , /* Decimal alignment character */
-
- Header , /* Begin header text */
- eHeader , /* End header text */
- Footer , /* Begin footer text */
- eFooter , /* End footer text */
-
- Supp , /* Suppress formatting for one page */
- CtrPg , /* Center page vertically */
-
- SetFont , /* Change pitch or font */
- SetBin , /* Select paper bin (0, 1, ...) */
-
- PN , /* Page number position (PN+0 through PN+8) */
-
- /* Internal tokens for unsupported operations */
- UnsupportedPlaceHolder = PN + 9,
- SetPageNumberColumn,
- SetTabs,
- SetUnderlineMode,
- DefineColumn,
- SetFootnoteAttributes,
- SetParagraphNumberingStyle,
- NumberedParagraph,
- BeginMarkedText,
- EndMarkedText,
- DefineMarkedText,
- DefineIndexMark,
- DefineMathColumns,
- Obsolete,
- ReservedCode,
- UnknownCode,
- LastToken
- };
-
- char *expansion[LastToken];
-
-
- /************************************************************************/
- /* Naming the identifiers */
- /************************************************************************/
- /* Extreme care must be taken to ensure that this list parallels the list
- * of token names above.
- */
-
- typedef struct identifier {
- char *name;
- int arity;
- } Identifier;
-
- Identifier names[] = {
- { "typeout", 0 },
- { "BEGIN", 0 },
- { "END", 0 },
- { "Comment", 0 },
- { "comment", 0 },
- { "PageNo", 0 },
- { "RomanPage", 1 },
- { "ArabicPage", 1 },
- { "HSpace", 0 },
- { "Tab", 0 },
- { "BeginTabs", 0 },
- { "SetTab", 1 },
- { "SetTabCenter", 1 },
- { "SetTabRight", 1 },
- { "SetTabDecimal", 1 },
- { "EndTabs", 0 },
- { "HPg", 0 },
- { "CondEOP", 1 },
- { "HRt", 0 },
- { "SRt", 0 },
- { "-", 0 }, /* NHyph */
- { "--", 0 }, /* NHyphE */
- { "=", 0 }, /* HHyph */
- { "\\-", 0 }, /* DHyph */
- { "\\--", 0 }, /* DHyphE */
- { "NoHyphWord", 0 },
- { "Marg", 2 },
- { "TopMarg", 1 },
- { "PageLength", 1 },
- { "SS", 0 },
- { "DS", 0 },
- { "1.5S", 0 }, /* OHS */
- { "TS", 0 },
- { "LS", 1 },
- { "LPI", 1 },
- { "Bold", 0 },
- { "bold", 0 },
- { "Und", 0 },
- { "und", 0 },
- { "Red", 0 },
- { "red", 0 },
- { "Strike", 0 },
- { "strike", 0 },
- { "Rev", 0 },
- { "rev", 0 },
- { "Over", 0 },
- { "over", 0 },
- { "Sup", 0 },
- { "sup", 0 },
- { "Sub", 0 },
- { "sub", 0 },
- { "UpHalfLine", 0 },
- { "DownHalfLine", 0 },
- { "AdvanceToHalfLine", 2 },
- { "Indent", 0 },
- { "DIndent", 0 },
- { "indent", 0 },
- { "MarginRelease", 1 },
- { "Center", 0 },
- { "center", 0 },
- { "CenterHere", 0 },
- { "centerhere", 0 },
- { "Align", 0 },
- { "align", 0 },
- { "AlignChar", 1 },
- { "FlushRight", 0 },
- { "flushright", 0 },
- { "Math", 0 },
- { "math", 0 },
- { "MathCalc", 0 },
- { "MathCalcColumn", 0 },
- { "SubTotal", 0 },
- { "IsSubTotal", 0 },
- { "Total", 0 },
- { "IsTotal", 0 },
- { "GrandTotal", 0 },
- { "NegateTotal", 0 },
- { "Col", 0 },
- { "col", 0 },
- { "Fn", 0 },
- { "fn", 0 },
- { "En", 0 },
- { "en", 0 },
- { "SetFn#", 1 },
- { "FNote#", 0 },
- { "ENote#", 0 },
- { "TableMarker", 0 },
- { "Hyph", 0 },
- { "hyph", 0 },
- { "Just", 0 },
- { "just", 0 },
- { "Wid", 0 },
- { "wid", 0 },
- { "HZone", 2 },
- { "DAlign", 1 },
- { "Header", 0 },
- { "header", 0 },
- { "Footer", 0 },
- { "footer", 0 },
- { "Supp", 1 },
- { "CtrPg", 0 },
- { "SetFont", 2 },
- { "SetBin", 1 },
- { "PN0", 0 },
- { "PN1", 0 },
- { "PN2", 0 },
- { "PN3", 0 },
- { "PN4", 0 },
- { "PN5", 0 },
- { "PN6", 0 },
- { "PN7", 0 },
- { "PN8", 0 },
- { NULL, 0 }, /* UnsupportedPlaceHolder -- keeps match_identifier happy */
- { "set page number column", 0 },
- { "extended tabs", 0 },
- { "underline mode", 0 },
- { "define column", 0 },
- { "footnote attributes", 0 },
- { "paragraph numbering style", 0 },
- { "numbered paragraph", 0 },
- { "begin marked text", 0 },
- { "end marked text", 0 },
- { "define marked text", 0 },
- { "define index mark", 0 },
- { "define math columns", 0 },
- { "WPCorp obsolete", 0 },
- { "WPCorp reserved", 0 },
- { "WPCorp undefined", 0 },
- };
-
- /* The file pointer "descriptor" points to our descriptor file
- * and "input" points to our input file.
- *
- * Kinda makes sense that way.
- */
-
- FILE *descriptor, *input;
-
- /* And the function match_identifier(s) takes a string and converts
- * it to its corresponding integer. Or blows up if it couldn't
- * find one.
- */
-
- int match_identifier(const char *s)
- {
- Identifier *I;
-
- /* Maybe it is a special character */
- if (s[0] == '\'' && s[2] == '\'' && s[3] == '\0')
- return (int) (unsigned char) s[1];
-
- /* Else it must be a multi-character guy */
- for (I = names; I->name; I++)
- if (!strcmp(I->name, s)) return typeout + (I - names);
-
- /* Otherwise, I don't know what to do with it */
- error(descriptor, "Unknown identifier %s", s);
- /*NOTREACHED*/
- return 0;
- }
-
- /* check_arity ensures that the expansion string is valid */
- void check_arity(int ident, char *t)
- {
- char *s;
- int arity = 0;
- if (ident > typeout) arity = names[ident-typeout].arity;
- for (s = t; *s; s++) {
- if (*s != '%') continue;
- switch (*++s) {
- case '\n':
- if (s != t+1)
- error(descriptor, "%s: `%%\\n' not at start of expansion",
- names[ident-typeout].name);
- break;
- case '1':
- case 'c':
- if (arity < 1) goto bad_escape;
- break;
- case '2':
- if (arity < 2) goto bad_escape;
- break;
- case '%':
- break;
- default:
- bad_escape:
- error(descriptor, "%s: invalid escape `%%%c'", names[ident-typeout].name, *s);
- }
- }
- }
-
- /************************************************************************/
- /* Reading input from the descriptor file */
- /************************************************************************/
-
- /* The macro igetc() gets a character from the input file.
- * the macro dgetc() gets a character from the descriptor file.
- */
-
- #define igetc() getc(input)
- #define dgetc() getc(descriptor)
-
- /* expand_backslash() is called when a backslash is encountered in
- * the descriptor file. Its job is to parse a backslash-sequence.
- * The usual C-escapes (\a \b \f \n \r \t \v) are understood, as
- * well as the octal escape \000 [up to three octal digits] and
- * the hex escape \xFF [up to two hex digits].
- */
-
- int expand_backslash(void) {
- int c;
-
- switch (c = dgetc()) {
- case 'a': c = '\a'; break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
- case 'x':
- case 'X': c = parse_hex(descriptor); break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7': c = parse_octal(descriptor, c); break;
- default: /* c = c; */ break;
- }
- return c;
- }
-
- /* The function read_identifier() attempts to match an identifier
- * in the descriptor file. It returns EOF if the end of the descriptor
- * file was reached, or the code of the identifier we found.
- * (or blows up if an error was detected.)
- * We build the identifier in "s", with the help of our
- * pool-managing functions above, then discard it, immediately,
- * since we don't use it any more.
- */
-
- int read_identifier(void)
- {
- register int c; /* A character we have read */
- char *s; /* The identifier we are building */
- int ident; /* The identifier we found */
-
- /* Skip over comments */
- while ((c = next_non_whitespace(descriptor)) == '#')
- eat_until_newline(descriptor);
-
- if (c == EOF) return EOF;
-
- /* At this point, "c" contains the first letter of a potential
- * identifier. Let's see what it could possibly be.
- */
- s = anchor_string();
- if (c == '\'') { /* a character token */
- add_to_string(c);
- if ((c = dgetc()) == '\\') c = expand_backslash();
- add_to_string(c);
- if ((c = dgetc()) != '\'')
- error(descriptor, "Invalid character identifier");
- add_to_string(c);
- c = next_non_space_or_tab(descriptor);
- } else do { /* a name token */
- add_to_string(c);
- c = next_non_space_or_tab(descriptor);
- if (c == '\\') c = expand_backslash();
- } while (c != EOF && c != '=' && c != '\n');
-
- if (c != '=') error(descriptor, "Identifier not followed by = sign");
- /* A boo-boo. Something bad happened. */
-
- add_to_string('\0'); /* Make it a standard C string. */
- finish_string();
-
- ident = match_identifier(s); /* Go find one. */
-
- remove_string(s); /* And we're done with it now. */
-
- return ident;
- }
-
- /* The function grab_expansion() reads expansion text from the
- * descriptor file and adds it to the pool, returning a pointer
- * to the string it just created.
- *
- * After anchoring a new string, we look for the opening quotation
- * mark, then start gobbling characters. Everything gets copied
- * straight into the string.
- *
- */
-
- char *grab_expansion(void)
- {
- register int c; /* Characters being read */
- char *s; /* The string we are building */
-
- s = anchor_string();
-
- if (next_non_whitespace(descriptor) != '\"')
- error(descriptor, "Quotation mark expected");
-
- /* Now read the stream until we hit another quotation mark. */
-
- while ((c = dgetc()) != EOF && c != '\"') {
- if (c == '\\') c = expand_backslash();
- add_to_string(c);
- }
- add_to_string('\0');
- finish_string();
- return s;
- }
-
- /* Ah, now with all of these beautiful functions waiting for us,
- * we can now write our first Useful Function: do_descriptor_file.
- * It reads the descriptor file and loads up the "expansion" array
- * with the text expansions we are reading from the file.
- *
- * If we grabbed the expansion of a "typeout", we type it out
- * and discard the string.
- *
- * We stop when the descriptor file runs dry.
- *
- */
-
- void do_descriptor_file(void)
- {
- register int ident;
-
- while ((ident = read_identifier()) != EOF) {
- expansion[ident] = grab_expansion();
- if (ident == typeout && !silent) {
- fputs(expansion[typeout], stderr); remove_string(expansion[typeout]);
- expansion[typeout] = NULL;
- } else check_arity(ident, expansion[ident]);
- }
- }
-
- /************************************************************************/
- /* Reading from the input file */
- /************************************************************************/
-
- /* The function verify(c) checks that the next character in the input
- * stream is indeed "c". It eats the character, if all is well.
- * If something went wrong, we complain to stderr, but keep going.
- */
-
- void verify(int c)
- {
- int d = igetc();
- if (d != c) fprintf(stderr, "Warning: Expected %02X but received %02X.\n", c, d);
- }
-
- /* The function gobble(n) simply eats "n" characters from the input
- * file.
- */
- void gobble(int n)
- {
- while (n--) (void) igetc();
- }
-
- int last_HRt = 0; /* most recent output was HRt */
-
- /* Processing a special code simply entails dumping its expansion.
- * If the expansion is NULL, then we either
- * [1] print nothing, if it is a code,
- * [2] print the character itself, if it is an ASCII character.
- *
- * In dumping its expansion, we expand the following percent-escapes:
- *
- * The percent-escapes are:
- * %\n -- newline if previous character was not a newline
- * (meaningful only as first character in sequence)
- * %1 -- first parameter, in decimal form
- * %2 -- second parameter, in decimal form
- * %c -- first parameter, in character form
- * %% -- literal percent sign
- *
- * all other %-escapes are flagged as warnings (but should never occur,
- * since they are trapped at the time the descriptor file is read.)
- */
- void process(int c, int d1, int d2)
- {
- char *s;
- static int last_newline = 0;
-
- last_HRt = 0; /* the killer switch sets this */
-
- if (expansion[c] == NULL) { /* invent a default action */
- if (c >= ' ' && c < 128) {
- putchar(c); /* regular characters emit themselves */
- last_newline = 0;
- return;
- } else if (c < 256) { /* single character */
- expansion[c] = anchor_string(); /* emits itself */
- add_to_string(c); add_to_string('\0');
- finish_string();
- if (!silent) fprintf(stderr, "Warning: No expansion for %02X (%c)\n", c, c);
- } else { /* provide null expansion */
- expansion[c] = "";
- if (!silent) {
- fprintf(stderr, "Warning: No expansion for %s\n", names[c-typeout].name);
- }
- }
- }
-
- s = expansion[c];
- if (!*s) return; /* the rest of the code assumes non-null string */
- do {
- if (*s != '%') putchar(*s++);
- else {
- s++;
- switch (*s++) {
- case '\n':
- if (!last_newline) putchar('\n'); break;
- case '1':
- printf("%d", d1); break;
- case '2':
- printf("%d", d2); break;
- case 'c':
- putchar(d1); break;
- case '%':
- putchar('%'); break;
- default:
- fprintf(stderr, "Internal error: Invalid escape, %%%c\n", s[-1]);
- break;
- }
- }
- } while (*s);
- last_newline = s[-1] == '\n';
- }
-
- #define process0(c) process(c,0,0)
- #define process1(c,a) process(c,a,0)
- #define process2(c,a,b) process(c,a,b)
-
- void unsupported(int c)
- {
- if (!silent && !expansion[c]) {
- expansion[c] = "";
- fprintf(stderr, "Warning: `%s' code not supported\n", names[c-typeout].name);
- }
- process0(Comment); fputs(names[c-typeout].name, stdout); process0(eComment);
- }
-
- /* The function gobble_until(c) eats characters from the input file
- * until it reaches a c or reaches EOF.
- */
- void gobble_until(int c)
- {
- int i;
- while ((i = igetc()) != EOF && (int) (unsigned char) i != c) do_nothing;
- }
-
- /* line_spacing(l) is called whenever we hit a line-spacing-change command.
- * The argument is the desired line spacing, multiplied by two.
- * So single spacing gets a 2, 1.5 spacing gets a 3, etc.
- */
- void line_spacing(int l)
- {
- switch (l) {
- case 2: process0(SS); break;
- case 3: process0(OHS); break;
- case 4: process0(DS); break;
- case 6: process0(TS); break;
- default: process1(LS, l); break;
- }
- }
-
- int environment_status = 0; /* cleanup at HRt */
- void leave_environment(int force_HRt) {
- if (environment_status) {
- process0(environment_status);
- environment_status = 0;
- }
- if (force_HRt && !last_HRt) process0(HRt);
- }
-
- /* The "note_status" flag has one of three values:
- * 0 if we are not inside a note
- * 1 if we are inside a footnote
- * 2 if we are inside an endnote
- *
- * The function handle_note() is called to deal with footnotes and
- * endnotes. It adjusts the note_status accordingly.
- */
-
- int note_status = 0;
-
- void handle_note(void)
- {
- if (note_status) {
- leave_environment(1); process0(note_status); note_status = 0;
- } else { /* Decide whether it is an endnote or a footnote */
- if (igetc() & 2) { process0(En); note_status = eEn; gobble(5); }
- else { process0(Fn); note_status = eFn; gobble(7); }
- verify(0xFF);
- gobble(2); /* margins */
- }
- }
-
- /* The tab_table is a bit field. Each set bit represents a tabstop.
- * Note, however, that the bits are counted from MSB to LSB.
- *
- * The tab_attribute_table is a nybble field. The n'th nybble represents
- * the attributes of the n'th tabstop.
- */
- unsigned char tab_table[32];
- unsigned char tab_attribute_table[20];
- int next_attribute;
-
- void process_tab_attribute(int i) {
- int b;
-
- if (next_attribute & 1) b = tab_attribute_table[next_attribute/2] & 3;
- else b = (tab_attribute_table[next_attribute/2] / 16) & 3;
- next_attribute++;
-
- /* Bottom two bites define what kind of tab.
- * Bit 2 is set if we need dot filling.
- * Bit 3 is unused.
- * We `&3' above because we won't support dot filling.
- */
- process1(SetTab + b, i);
- }
-
- void process_tab_table(void) {
- int i;
- next_attribute = 0;
-
- process0(BeginTabs);
- for (i = 0; i < 32; i++) {
- if (tab_table[i] == 0) continue; /* early out */
- if (tab_table[i] & 0x80) process_tab_attribute(i * 8 + 0);
- if (tab_table[i] & 0x40) process_tab_attribute(i * 8 + 1);
- if (tab_table[i] & 0x20) process_tab_attribute(i * 8 + 2);
- if (tab_table[i] & 0x10) process_tab_attribute(i * 8 + 3);
- if (tab_table[i] & 0x08) process_tab_attribute(i * 8 + 4);
- if (tab_table[i] & 0x04) process_tab_attribute(i * 8 + 5);
- if (tab_table[i] & 0x02) process_tab_attribute(i * 8 + 6);
- if (tab_table[i] & 0x01) process_tab_attribute(i * 8 + 7);
- }
- process0(EndTabs);
- }
-
- void handle_tabs(void) {
- /* pad the tables to force no new tabs, and left tabs everywhere */
- memset(tab_table, 0, sizeof(tab_table));
- memset(tab_attribute_table, 0, sizeof(tab_attribute_table));
-
- fread(tab_table, 20, 1, input); /* old-style tabs */
- process_tab_table();
- }
-
- void handle_extended_tabs(void) {
- fread(tab_table, 32, 1, input);
- fread(tab_attribute_table, 20, 1, input);
- process_tab_table();
- }
-
- /* The FF_status flag tells us what we should do when we encounter an 0xFF.
- * It contains the token code of the active code, or 0 if no code is active.
- */
-
- int FF_status = 0;
-
- void handle_FF(void)
- {
- if (FF_status) { /* finish header/footer */
- leave_environment(1);
- process0(FF_status);
- gobble(2);
- verify(0xD1);
- FF_status = 0;
- } else process0(0xFF);
- }
-
- /* The function process_token does all of the real work.
- * Given the first character of a token, we eat up everything
- * that belongs to that token. This routine might be called
- * recursively, since some tokens are defined in terms of other
- * tokens. (For example, the subscript code is expanded as
- * [Sub] <character being subscripted> [sub]
- * and the <character being subscripted> might involve other token
- * expansions; specifically, it might be an IBM Extended character.)
- *
- * Luckily, most of our tokens are not recursive. The macro
- * bracket(before, after)
- * does the recursive stuff for us, bracketing the next token
- * between expansions of "before" and "after".
- *
- */
-
- #define bracket(before,after) process0(before); process_token(); \
- process0(after);
-
- int process_token(void)
- {
- int c = igetc();
-
- if (c == EOF) return 0;
-
- c = (int) (unsigned char) c;
-
- if (!--blipcount && !silent) {
- blipcount = blipinterval;
- putc('.', stderr);
- }
-
- switch (c) { /* Codes listed in numerical rather than logical order */
-
- case 0x02: process0(PageNo); break; /* Page number */
-
- case 0x09: process0(Tab); break; /* Tab character */
-
- case 0x8C: /* Soft page break after a HRt */
- case 0x0A: /* Hard Return */
- last_HRt = 0; leave_environment(1); last_HRt = 1; break;
- case 0x0B: /* Soft page break after a SRt */
- case 0x0D: process0(SRt); break; /* Soft Return */
-
- case 0x0C: process0(HPg); break; /* Hard Page */
-
- case '-' : process0(HHyph); break; /* Nonbreaking hyphen */
-
- case 0x80: break; /* NOP */
- case 0x81: process0(Just); break; /* Right justification */
- case 0x82: process0(eJust); break; /* Ragged right */
- case 0x83: /* End centering */
- case 0x84: leave_environment(0); break; /* End aligned text */
- case 0x85: process0(MathCalc); break; /* Begin math calc */
- case 0x86: process0(CtrPg); break; /* Center page vertically */
- case 0x87: process0(Col); break; /* Begin column mode */
- case 0x88: process0(eCol); break; /* End column mode */
- case 0x89: process0(Tab); break; /* Tab after right margin */
- case 0x8A: process0(Wid); break; /* Widow/orphan protection */
- case 0x8B: process0(eWid); break; /* Allow widows/orphans */
- /* case 0x8C: see 0x0A */
- case 0x8D: /* Footnote/Endnote number */
- process0(note_status == eFn ? FNoteNum : ENoteNum); break;
- case 0x8E:
- case 0x8F: unsupported(ReservedCode); break; /* Reserved codes */
- case 0x90: process0(Red); break; /* Begin redline */
- case 0x91: process0(eRed); break; /* End redline */
- case 0x92: process0(Strike); break; /* Begin strikeout */
- case 0x93: process0(eStrike); break; /* End strikeout */
- case 0x94: process0(Und); break; /* Begin underlining */
- case 0x95: process0(eUnd); break; /* End underlining */
- case 0x96: process0(Rev); break; /* Begin reverse video */
- case 0x97: process0(eRev); break; /* End reverse video */
- case 0x98: process0(TableMarker); break;/* Table of something marker */
- case 0x99: bracket(Over, eOver); break; /* Overstrike */
- case 0x9A: process0(NoHyphWord); break;/* Do not hyphenate this word */
- case 0x9B: break; /* End of generated text */
- case 0x9C: process0(eBold); break; /* End boldface */
- case 0x9D: process0(Bold); break; /* Begin boldface */
- case 0x9E: process0(eHyph); break; /* Forbid hyphenation */
- case 0x9F: process0(Hyph); break; /* Allow hyphenation */
- case 0xA0: process0(HSpace); break; /* Hard space */
- case 0xA1: process0(SubTtl); break; /* Do subtotal */
- case 0xA2: process0(IsSubTtl); break; /* Subtotal entry */
- case 0xA3: process0(Ttl); break; /* Do total */
- case 0xA4: process0(IsTtl); break; /* Total entry */
- case 0xA5: process0(GrandTtl); break; /* Do grand total */
- case 0xA6: process0(MathCalcColumn); break; /* Math calc column */
- case 0xA7: process0(Math); break; /* Begin math mode */
- case 0xA8: process0(eMath); break; /* End math mode */
- case 0xA9: process0(NHyph); break; /* Normal breakable hyphen */
- case 0xAA: /* Hyphen at end of line */
- case 0xAB: process0(NHyphE); break; /* Hyphen at end of page */
- case 0xAC: process0(DHyph); break; /* Discretionary hyphen */
- case 0xAD: /* Discretionary hyphen at EOLn */
- case 0xAE: process0(DHyphE); break; /* Discretionary hyphen at EOPg */
- case 0xAF: /* EOT columns and EOLn */
- case 0xB0: break; /* EOT columns and EOPg */
-
- case 0xB1: process0(NegateTotal); break; /* Negate current total */
-
- case 0xBC: bracket(Sup, eSup); break; /* Superscript */
- case 0xBD: bracket(Sub, eSub); break; /* Subscript */
- case 0xBE: process0(UpHalfLine); break; /* Advance 1/2 line up */
- case 0xBF: process0(DownHalfLine); break; /* Advance 1/2 line down */
-
- case 0xC0: gobble(2); c = igetc(); /* Margin change */
- process2(Marg, c, igetc()); verify(0xC0); break;
-
- case 0xC1: gobble(1); line_spacing(igetc()); verify(0xC1); break;
- /* Line spacing change */
-
- case 0xC2: process1(MargRel, igetc()); /* Margin release */
- verify(0xC2); break;
-
-
- case 0xC3: /* Center text */
- leave_environment(0);
- switch (igetc()) {
- case 0: process0(Center); /* Center between margins */
- environment_status = eCenter; break;
- case 1: /* Center around current column */
- process0(CenterHere);
- environment_status = eCenterHere; break;
- }
- gobble(2); verify(0xC3); break;
-
- case 0xC4: /* Align or Flush Right */
- leave_environment(0);
- c = igetc();
- /* if high bit on c is set, then dot fill. (Ignore) */
- switch (c & 0x7f) {
- case 0x0C:
- case 0x0A: process1(FlushRight, igetc());/* alignment col */
- environment_status = eFlushRight;
- break;
- default: process2(Align, c, igetc());/* alignment column */
- environment_status = eAlign;
- break;
- }
- gobble(1); /* trash */
- verify(0xC4);
- break;
-
- case 0xC5: gobble(2); c = igetc(); /* Hyphenation zone */
- process2(HZone, c, igetc()); verify(0xC5); break;
-
- case 0xC6: gobble(1); /* Page number position */
- process0(PN + igetc()); verify(0xC6); break;
-
- case 0xC7: gobble(2); c = igetc(); /* New page number */
- c = (c<<8) + (unsigned char)igetc();
- process1( (c&0x8000) ? RomanPage : ArabicPage, c&0x7fff);
- verify(0xC7); break;
-
- case 0xC8: gobble(3); /* Set Page number column */
- /* next 3 bytes are <left> <center> <right> */
- gobble(3);
- unsupported(SetPageNumberColumn);
- verify(0xC8); break;
-
- case 0xC9: gobble(20); /* Set tabs */
- handle_tabs();
- verify(0xC9); break;
-
- case 0xCA: process1(CondEOP, igetc()); /* Conditional end of page */
- verify(0xCA); break;
-
- case 0xCB: /* Set pitch or font */
- gobble(2); /* old pitch and font */
- c = igetc();
- process2(SetFont, c, igetc()); /* pitch and font number */
- /* negative pitch means proportional font */
- verify(0xCB); break;
-
- case 0xCC: /* Indented paragraph */
- leave_environment(0);
- gobble(1); process1(Indent, igetc()); verify(0xCC);
- environment_status = eIndent; break;
- /* (really: Temporary margin) */
-
- case 0xCD: /* Indented paragraph (obsolete) */
- leave_environment(0);
- process1(Indent, igetc()); verify(0xCD);
- environment_status = eIndent; break;
- /* (really: Temporary margin) */
-
- case 0xCE: gobble(1); process1(TopMarg, igetc()); /* Set top margin */
- verify(0xCE); break;
-
- case 0xCF: /* Suppress headers/footers for this page */
- process1(Supp, (unsigned char)igetc());
- verify(0xCF); break;
-
- case 0xD0: gobble(2); /* old form length */ /* Set page length */
- process1(PageLength, igetc()); /* lines per page */
- gobble(1); /* new page length */
- verify(0xD0); break;
-
- case 0xD1: /* header/footer */
- c = igetc(); /* def byte */
- gobble(1); /* old half-lines */
- if (c&2) { process0(Footer); FF_status = eFooter; }
- else { process0(Header); FF_status = eHeader; }
- verify(0xFF); verify(0xFF); /* separator */
- gobble(2); /* left and right margin */
- break; /* continue processing */
-
- case 0xD2: gobble(5); /* obsolete footnote */
- unsupported(Obsolete);
- gobble_until(0xD2);
- break;
-
- case 0xD3: gobble(2); /* obsolete `set footnote number' */
- unsupported(Obsolete);
- verify(0xD3);
- break;
-
- case 0xD4: /* Advance to half line number */
- c = igetc(); /* current line number */
- process2(AdvanceToHalfLine, c, igetc());/* desired line # */
- verify(0xD4); break;
-
- case 0xD5: gobble(1); process1(LPI, igetc()); /* Set LPI (6 or 8) */
- verify(0xD5); break;
-
- case 0xD6: /* set extended tabs */
- /* next 4 bytes are <old start><old increment>
- <new start><new increment> */
- gobble(4);
- unsupported(SetTabs);
- verify(0xD6); break;
-
- case 0xD7: gobble(63); /* Define math columns */
- unsupported(DefineMathColumns);
- verify(0xD7); break;
-
- case 0xD8: gobble(1); process1(AlignChar, igetc());
- verify(0xD8); break; /* Set alignment character */
-
- case 0xD9: gobble(2); /* obsolete margin release */
- unsupported(Obsolete);
- verify(0xD9);
- break;
-
- case 0xDA: gobble(1+1); /* Set underline mode */
- /* second byte is a bit field.
- * 1 = double-underline (default single),
- * 2 = underline spaces (default don't)
- */
- unsupported(SetUnderlineMode);
- verify(0xDA); break;
-
- case 0xDB: /* Set sheet feeder bin */
- gobble(1); process1(SetBin, igetc());
- verify(0xDB); break;
-
- /* We ignore these codes, since they are followed by an 0x0C or an 0x8C */
- case 0xDC: gobble(7); verify(0xDC); break; /* End-of-page codes */
-
- case 0xDD: gobble(22); /* define columns */
- unsupported(DefineColumn);
- verify(0xDD);
-
- case 0xDE: environment_status = 0; /* End indented paragraph */
- gobble(2); process0(eIndent); verify(0xDE); break;
-
- case 0xDF: /* invisible characters */
- gobble_until(0xDF);
- break;
-
- case 0xE0: /* Doubly-indented paragraph */
- leave_environment(0);
- gobble(1); process1(DIndent, igetc()); verify(0xE0);
- environment_status = eIndent; break;
-
- case 0xE1: process0((unsigned char)igetc()); verify(0xE1); break;
- /* IBM character */
-
- case 0xE2: handle_note(); break; /* footnote or endnote */
-
- case 0xE3: gobble(74+74); /* footnote attributes */
- unsupported(SetFootnoteAttributes);
- verify(0xE3);
- break;
-
- case 0xE4: gobble(2); /* old */ /* set footnote number */
- /* bit 7 of second byte doesn't count, and the value
- * is offset by one.
- */
- c = igetc() & 0x3f;
- c = (c << 7) + (igetc() & 0x7f);
- process1(SetFnNum, 1 + c);
- verify(0xE4);
- break;
-
- case 0xE5: /* paragraph numbering style */
- gobble(7+7+7+7);
- unsupported(SetParagraphNumberingStyle);
- verify(0xE5);
- break;
-
- case 0xE6: /* paragraph number */
- gobble(2+7);
- unsupported(NumberedParagraph);
- verify(0xE6);
- break;
-
- case 0xE9: /* begin marked text */
- gobble(6);
- unsupported(BeginMarkedText);
- verify(0xE9);
- break;
-
- case 0xEA: /* end marked text */
- unsupported(EndMarkedText);
- gobble_until(0xEA);
- break;
-
- case 0xEB: /* define marked text */
- gobble(30);
- unsupported(DefineMarkedText);
- verify(0xEB);
- break;
-
- case 0xEC: /* define index mark */
- gobble(2);
- unsupported(DefineIndexMark);
- verify(0xEC);
- break;
-
- case 0xED: /* Table of authorities */
- unsupported(DefineIndexMark);
- gobble_until(0xED);
- break;
- case 0xEE: /* paragraph number def */
- gobble(42);
- unsupported(SetParagraphNumberingStyle);
- verify(0xEE);
- break;
-
- case 0xEF: /* paragraph number */
- gobble(16);
- unsupported(NumberedParagraph);
- verify(0xEF);
- break;
-
- case 0xF1: gobble(32 + 20); /* Tab settings */
- handle_extended_tabs();
- verify(0xF1);
- break;
-
- case 0xF3: /* column definition */
- gobble(98);
- unsupported(DefineColumn);
- verify(0xF3);
- break;
-
-
- case 0xB2:
- case 0xB3:
- case 0xB4:
- case 0xB5:
- case 0xB6:
- case 0xB7:
- case 0xB8:
- case 0xB9:
- case 0xBA:
-
- case 0xF0:
-
- case 0xF2:
- case 0xF4:
- case 0xF5:
- case 0xF6:
- case 0xF7:
- case 0xF8:
- case 0xF9:
- case 0xFA:
- case 0xFB:
- case 0xFC:
- case 0xFD:
- case 0xFE: unsupported(UnknownCode); break; /* undefined codes */
-
- case 0xFF: handle_FF(); break;
-
- default: process0(c); break;
- }
- return 1;
- }
-
- /* Now do the other Useful Function.
- */
- void process_input(void)
- {
- process0(BEGIN);
- while (process_token()) do_nothing;
- process0(END);
- }
-
-
- /************************************************************************/
- /* The main program */
- /************************************************************************/
-
- /* First, a pretty little function which tries to open a file and
- * complains loudly if it cannot.
- */
-
- FILE *efopen(const char *s, const char *m)
- {
- FILE *fp = fopen(s, m);
-
- if (fp == NULL) {
- fprintf(stderr, "Error: Cannot open %s", s);
- if (errno > 0 && errno < sys_nerr)
- fprintf(stderr, " (%s)\n", s, sys_errlist[errno]);
- fprintf(stderr, "\n");
- exit(1);
- }
-
- return fp;
- }
-
- #include "dopen.c" /* ickiness with file opening */
-
- /* Our main program does very little, really.
- *
- * After checking the command line, it proceeds to open the descriptor
- * file in text mode, and the input file in binary mode.
- * It then calls our two Useful Functions in turn, closing each file
- * after it has served its purpose.
- */
-
- int Cdecl main(int argc, char **argv)
- {
- while (--argc && **++argv == '-') {
- while (*++*argv) switch (**argv) {
- case 's': silent = 1; break;
- case 'n': blipinterval = atoi(&argv[0][1]); goto finarg;
- default: goto usage;
- }
- finarg: ;
- }
- blipcount = blipinterval;
-
- if (argc != 2) {
- usage:
- fprintf(stderr, "usage: wp2x descriptor input > output\n");
- exit(2);
- }
-
- dopen(argv[0]);
- input = efopen(argv[1], "rb");
-
- do_descriptor_file();
- fclose(descriptor);
-
- process_input();
- fclose(input);
- return 0;
- }
-