Internet File Formats

home *** CD-ROM | disk | FTP | other *** search

/ Internet File Formats / InternetFileFormatsCD.bin / text / html / windows / wp2x / wp2x.c < prev next >

Wrap

C/C++ Source or Header | 1991-08-18 | 51.8 KB | 1,516 lines

/* $Id: wp2x.c 1.10 91/08/18 15:05:41 raymond Exp $ */ /* Before compiling, read the section titled `portability concerns'. */ /************************************************************************ * $Log: wp2x.c $ * Revision 1.10 91/08/18 15:05:41 raymond * Descriptor file stuff. * * Revision 1.9 91/08/06 09:08:09 raymond * add missing `break' in check_arity * * Revision 1.8 91/08/06 08:31:21 raymond * Avoid infinite loop if file is corrupted. * Better error-checking on configuration file (new output scheme). * * Revision 1.7 91/08/02 13:35:37 raymond * Epsilonically better handling of environments that didn't end properly. * Change return type of main() to keep gcc quiet. * MSC support. * * Revision 1.6 91/07/28 21:08:53 raymond * BeginTabs et al, FNote#, ENote#, NegateTotal, more unsupported codes * Improve character tokens, Header, Footer * Take care when people don't end lines with HRt * Fix major bugs in endnote processing, footnote numbering (and nobody * noticed!) * More worries about signed characters. * * Revision 1.5 91/07/23 22:59:43 raymond * Add COMMENT token, and some bug fixes. * * Revision 1.4 91/07/23 22:09:23 raymond * Concessions to slightly non-ANSI compilers. (`const', `unsigned char') * More patches for machines with signed characters. * Fix blatant bug in hex constants. (Amazed nobody noticed.) * New tags SetFn#, Header, Footer. * Warning messages for unsupported tokens. * Backslahes processed in character tags. * Fixed(?) footnotes, endnotes, page length changes. * Inserted missing `break's into the huge switch. * * Revision 1.3 91/07/12 15:39:44 raymond * Spiffy Turbo C support. * Some <stdlib.h>'s don't declare errno et al. * Command line switches `-s' and `-n' added. * More cute warning messages. * Dots periodically emitted. * Give the enum of token types a name, to placate QuickC. * Fix problems with pitch changes and signed characters. * * Revision 1.2 91/06/22 08:18:22 raymond * <process.h> and fputchar() aren't sufficiently portable. * strerror() fails to exist on some so-called ANSI platforms. * Removed assumption that characters are unsigned. * Forgot to #include <stdarg.h> * */ /************************************************************************ * PORTABILITY CONCERNS ************************************************************************ * * If possible, compile with unsigned characters. (Though I think * I've taken care of all the places where I assumed characters are * unsigned.) * * This program assumes that your compiler is fully ANSI-conformant. * Depending on how non-conformant your compiler is, you may need to * set the following symbols at compile time: * * NO_CONST -- set this if your compiler does not know what `const' means. * Cdecl -- how to tag functions that are variadic. * * Cdecl is used if you need special declarations for variadic functions. * This is used by IBM PC compilers so that you can make the default * parameter passing Pascal-style or Fastcalls. * * Some very machine-dependent stuff happens when trying to open the * descriptor file. Please read dopen.c as well. */ #ifdef NO_CONST #define const #endif #ifndef Cdecl /* default is nothing */ #define Cdecl #endif /************************************************************************ * This program divides naturally into two parts. * * The first part reads in the descriptor file and builds the expansions * for each of the identifiers listed above. * This is the easy part. * * The second part reads the input file and uses the expansions collected * in the first part to transform the file into the output. * This is the hard part. * ************************************************************************/ /* And now, the code. * We start off with some obvious header files. */ #include <stdio.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> #include <ctype.h> /* Some platforms do not define these externals in stdlib.h */ extern int Cdecl errno; extern char *Cdecl sys_errlist[]; extern int Cdecl sys_nerr; /************************************************************************/ /* Some common idioms */ /************************************************************************/ #define do_nothing /* twiddle thumbs */ /************************************************************************/ /* Blowing up */ /************************************************************************/ /* The function "error" accepts two arguments. A FILE pointer and * a printf-style argument list. The printf-style arguments are * printed to stderr. If the FILE is non-NULL, the the remaining * contents of the file are printed as well (to provide context), up * to 80 characters. */ void Cdecl error(FILE *fp, char *fmt, ...) { int i; va_list ap; fputs("Error: ", stderr); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); if (fp) { fprintf(stderr, "Unread text: "); for (i = 0; i < 80 && !feof(fp); i++) fputc(getc(fp), stderr); fputc('\n', stderr); } exit(1); } /************************************************************************/ /* Command-line switches */ /************************************************************************/ int silent = 0; int blipinterval = 1024; /* display blips every 1K */ int blipcount; /************************************************************************/ /* Basic file manipulations */ /************************************************************************/ /* We here define a few basic functions. Let us hope that the first * three functions' names are self-descriptive. */ int next_non_whitespace(FILE *fp) { register int c; while ((c = getc(fp)) != EOF && isspace(c)) do_nothing; return c; } int next_non_space_or_tab(FILE *fp) { register int c; while ((c = getc(fp)) != EOF && (c == ' ' || c == '\t')) do_nothing; return c; } void eat_until_newline(FILE *fp) { register int c; while ((c = getc(fp)) != EOF && c != '\n') do_nothing; } /* The function parse_hex grabs a (no-more-than-two-character) hex * constant. Similarly, parse_octal does the same for octal constants. */ int parse_hex(FILE *fp) { register int c, value; if (!isxdigit(c = toupper(getc(fp)))) error(fp, "Expecting a hex digit"); if ((value = c - '0') > 9) value += '0' - 'A' + 10; if (!isxdigit(c = getc(fp))) { ungetc(c, fp); return value; } c = toupper(c); value = (value << 4) + c - '0'; if (c > '9') value += '0' - 'A' + 10; return value; } int parse_octal(FILE *fp, register int c) { register int value = c - '0'; if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; } value = (value << 3) + c - '0'; if ( (c = getc(fp)) < '0' || c > '7') { ungetc(c, fp); return value; } return (value << 3) + c - '0'; } /************************************************************************/ /* Storing the input strings */ /************************************************************************/ /* The input strings are allocated from a large pool we set up at * startup. This lets us do our thing without having to fight * with people like malloc and friends. This method does limit * our configuration file to 32K, however. We hope that this is * not a problem. (It also means that the program can be translated * to almost any other language without too much difficulty.) * * Here's how it works. * * "pool" is an array of POOL_SIZE characters. The value of POOL_SIZE * is flexible, but shouldn't exceed 65535, since that's the size of * an IBM PC segment. If your configuration file is more than 64K, * then there's probably something wrong. * * "pool_ptr" points to the next character in "pool" that hasn't been * used for anything yet. * * "top_of_pool" points one character beyond the end of pool, so we can * see if we've run out of memory. * * When we want to put something into the pool, we simply store into "pool" * and increment "pool_ptr" appropriately. * * Access to these variables is done through the following functions, * implemented as macros. * * "anchor_string()" is called before you start throwing things into * the pool. It returns a pointer to the beginning of the string * being built up. * * "add_to_string(c)" adds the character "c" to the string being built up. * * "finish_string()" gets ready for building a new string. We check * that we did not overflow our pool. We pull the sneaky trick of * a dummy else clause so that [1] "else"s match up properly if this * is nested inside an "if" statement, [2] the semicolon gets eaten * up correctly. * * "remove_string(s)" removes all strings from the one called "s" onwards. * */ #define POOL_SIZE 32768U char pool[POOL_SIZE]; char *pool_ptr = pool; #define top_of_pool (pool + POOL_SIZE) #define anchor_string() pool_ptr #define add_to_string(c) (*pool_ptr++ = c) #define finish_string() \ if (pool_ptr >= top_of_pool) error(NULL, "string pool overflow."); \ else do_nothing #define remove_string(s) (pool_ptr = s) /************************************************************************/ /* Remembering the expansions */ /************************************************************************/ /* The array "expansion" contains the expansions for everything. * Everything is initialized to NULL. * * We set up things as follows: * expansion[0..255] contain the expansions for the possible characters. * expansion[256...] contain the expansions for the special codes. * * Make sure this table is kept in parallel with the names[] array * * */ /* name value When is it expanded? */ /* ---- --- -------------------- */ enum token_type { /* Some compilers do not like unnamed enums */ typeout = 256, /* Typed out as soon as it is encountered */ BEGIN , /* Before the first character of the file */ END , /* After the last character of the file */ Comment , /* For wp2x-generated comments */ eComment , PageNo , /* Current page number */ RomanPage , /* Set page number (to roman numerals) */ ArabicPage , /* Set page number (to arabic) */ HSpace , /* unbreakable space (`Hard space') */ Tab , /* Tab character */ BeginTabs , /* Begin tab settings */ /* DO NOT CHANGE THE RELATIVE ORDER OF THESE FOUR TOKENS */ SetTab , /* Set normal tabstop at %d */ SetTabCenter , /* Set center tabstop at %d */ SetTabRight , /* Set right-justified tab at %d */ SetTabDecimal, /* Set decimal tab at %d */ EndTabs , /* End tab settings */ HPg , /* Hard page break */ CondEOP , /* Conditional end-of-page */ HRt , /* Hard return */ SRt , /* Soft return */ NHyph , /* Normal hyphen */ NHyphE , /* Normal hyphen at the end of a line */ HHyph , /* Hard (nonbreakable) hyphen */ DHyph , /* Discretionary hyphen */ DHyphE , /* Discretionary hyphen at the end of a line */ NoHyphWord , /* Do not hyphenate this word */ Marg , /* Margin settings */ TopMarg , /* Set top margin */ PageLength , /* Set page length */ SS , /* Single spacing */ DS , /* Double spacing */ OHS , /* 1.5 spacing (One and a Half Spacing) */ TS , /* Triple spacing */ LS , /* Generic line spacing */ LPI , /* set 6 or 8 LPI */ Bold , /* Begin boldface */ eBold , /* End boldface */ Und , /* Begin underline */ eUnd , /* End underline */ Red , /* Begin redline */ eRed , /* End redline */ Strike , /* Begin strikeout */ eStrike , /* End strikeout */ Rev , /* Begin reverse video */ eRev , /* End reverse video */ Over , /* Overstrike */ eOver , /* [mythical "end overstroke" code] */ Sup , /* Superscript */ eSup , /* [mythical "end superscript" code] */ Sub , /* Subscript */ eSub , /* [mythical "end subscript" code] */ UpHalfLine , /* Advance printer up 1/2 line */ DownHalfLine , /* Advance printer down 1/2 line */ AdvanceToHalfLine, /* Advance to absolute vertical position */ Indent , /* Indented paragraph */ DIndent , /* Left-and-right-indented paragraph */ eIndent , /* End indented paragraph */ MargRel , /* Margin release (unknown argument) */ Center , /* Center current line */ eCenter , /* End centering */ CenterHere , /* Center line around current column */ eCenterHere , /* End centering */ Align , /* Begin alignment */ eAlign , /* End alignment */ AlignChar , /* Set alignment character */ FlushRight , /* Begin flush right */ eFlushRight , /* End flush right */ Math , /* Begin math mode */ eMath , /* End math mode */ MathCalc , /* Begin math calc mode */ MathCalcColumn, /* Math calc column */ SubTtl , /* Do subtotal */ IsSubTtl , /* Subtotal entry */ Ttl , /* Do total */ IsTtl , /* Total entry */ GrandTtl , /* Do grand total */ NegateTotal , /* Negate current total */ Col , /* Begin column mode */ eCol , /* End column mode */ Fn , /* Begin footnote */ eFn , /* End footnote */ En , /* Begin endnote */ eEn , /* End endnote */ SetFnNum , /* Set footnote number */ FNoteNum , /* Footnote number */ ENoteNum , /* Endnote number */ TableMarker , /* Table of contents or whatever marker */ Hyph , /* Hyphenation on */ eHyph , /* off */ Just , /* Justification on */ eJust , /* off */ Wid , /* Widow/orphan protection on */ eWid , /* off */ HZone , /* Hyphenation zone */ DAlign , /* Decimal alignment character */ Header , /* Begin header text */ eHeader , /* End header text */ Footer , /* Begin footer text */ eFooter , /* End footer text */ Supp , /* Suppress formatting for one page */ CtrPg , /* Center page vertically */ SetFont , /* Change pitch or font */ SetBin , /* Select paper bin (0, 1, ...) */ PN , /* Page number position (PN+0 through PN+8) */ /* Internal tokens for unsupported operations */ UnsupportedPlaceHolder = PN + 9, SetPageNumberColumn, SetTabs, SetUnderlineMode, DefineColumn, SetFootnoteAttributes, SetParagraphNumberingStyle, NumberedParagraph, BeginMarkedText, EndMarkedText, DefineMarkedText, DefineIndexMark, DefineMathColumns, Obsolete, ReservedCode, UnknownCode, LastToken }; char *expansion[LastToken]; /************************************************************************/ /* Naming the identifiers */ /************************************************************************/ /* Extreme care must be taken to ensure that this list parallels the list * of token names above. */ typedef struct identifier { char *name; int arity; } Identifier; Identifier names[] = { { "typeout", 0 }, { "BEGIN", 0 }, { "END", 0 }, { "Comment", 0 }, { "comment", 0 }, { "PageNo", 0 }, { "RomanPage", 1 }, { "ArabicPage", 1 }, { "HSpace", 0 }, { "Tab", 0 }, { "BeginTabs", 0 }, { "SetTab", 1 }, { "SetTabCenter", 1 }, { "SetTabRight", 1 }, { "SetTabDecimal", 1 }, { "EndTabs", 0 }, { "HPg", 0 }, { "CondEOP", 1 }, { "HRt", 0 }, { "SRt", 0 }, { "-", 0 }, /* NHyph */ { "--", 0 }, /* NHyphE */ { "=", 0 }, /* HHyph */ { "\\-", 0 }, /* DHyph */ { "\\--", 0 }, /* DHyphE */ { "NoHyphWord", 0 }, { "Marg", 2 }, { "TopMarg", 1 }, { "PageLength", 1 }, { "SS", 0 }, { "DS", 0 }, { "1.5S", 0 }, /* OHS */ { "TS", 0 }, { "LS", 1 }, { "LPI", 1 }, { "Bold", 0 }, { "bold", 0 }, { "Und", 0 }, { "und", 0 }, { "Red", 0 }, { "red", 0 }, { "Strike", 0 }, { "strike", 0 }, { "Rev", 0 }, { "rev", 0 }, { "Over", 0 }, { "over", 0 }, { "Sup", 0 }, { "sup", 0 }, { "Sub", 0 }, { "sub", 0 }, { "UpHalfLine", 0 }, { "DownHalfLine", 0 }, { "AdvanceToHalfLine", 2 }, { "Indent", 0 }, { "DIndent", 0 }, { "indent", 0 }, { "MarginRelease", 1 }, { "Center", 0 }, { "center", 0 }, { "CenterHere", 0 }, { "centerhere", 0 }, { "Align", 0 }, { "align", 0 }, { "AlignChar", 1 }, { "FlushRight", 0 }, { "flushright", 0 }, { "Math", 0 }, { "math", 0 }, { "MathCalc", 0 }, { "MathCalcColumn", 0 }, { "SubTotal", 0 }, { "IsSubTotal", 0 }, { "Total", 0 }, { "IsTotal", 0 }, { "GrandTotal", 0 }, { "NegateTotal", 0 }, { "Col", 0 }, { "col", 0 }, { "Fn", 0 }, { "fn", 0 }, { "En", 0 }, { "en", 0 }, { "SetFn#", 1 }, { "FNote#", 0 }, { "ENote#", 0 }, { "TableMarker", 0 }, { "Hyph", 0 }, { "hyph", 0 }, { "Just", 0 }, { "just", 0 }, { "Wid", 0 }, { "wid", 0 }, { "HZone", 2 }, { "DAlign", 1 }, { "Header", 0 }, { "header", 0 }, { "Footer", 0 }, { "footer", 0 }, { "Supp", 1 }, { "CtrPg", 0 }, { "SetFont", 2 }, { "SetBin", 1 }, { "PN0", 0 }, { "PN1", 0 }, { "PN2", 0 }, { "PN3", 0 }, { "PN4", 0 }, { "PN5", 0 }, { "PN6", 0 }, { "PN7", 0 }, { "PN8", 0 }, { NULL, 0 }, /* UnsupportedPlaceHolder -- keeps match_identifier happy */ { "set page number column", 0 }, { "extended tabs", 0 }, { "underline mode", 0 }, { "define column", 0 }, { "footnote attributes", 0 }, { "paragraph numbering style", 0 }, { "numbered paragraph", 0 }, { "begin marked text", 0 }, { "end marked text", 0 }, { "define marked text", 0 }, { "define index mark", 0 }, { "define math columns", 0 }, { "WPCorp obsolete", 0 }, { "WPCorp reserved", 0 }, { "WPCorp undefined", 0 }, }; /* The file pointer "descriptor" points to our descriptor file * and "input" points to our input file. * * Kinda makes sense that way. */ FILE *descriptor, *input; /* And the function match_identifier(s) takes a string and converts * it to its corresponding integer. Or blows up if it couldn't * find one. */ int match_identifier(const char *s) { Identifier *I; /* Maybe it is a special character */ if (s[0] == '\'' && s[2] == '\'' && s[3] == '\0') return (int) (unsigned char) s[1]; /* Else it must be a multi-character guy */ for (I = names; I->name; I++) if (!strcmp(I->name, s)) return typeout + (I - names); /* Otherwise, I don't know what to do with it */ error(descriptor, "Unknown identifier %s", s); /*NOTREACHED*/ return 0; } /* check_arity ensures that the expansion string is valid */ void check_arity(int ident, char *t) { char *s; int arity = 0; if (ident > typeout) arity = names[ident-typeout].arity; for (s = t; *s; s++) { if (*s != '%') continue; switch (*++s) { case '\n': if (s != t+1) error(descriptor, "%s: `%%\\n' not at start of expansion", names[ident-typeout].name); break; case '1': case 'c': if (arity < 1) goto bad_escape; break; case '2': if (arity < 2) goto bad_escape; break; case '%': break; default: bad_escape: error(descriptor, "%s: invalid escape `%%%c'", names[ident-typeout].name, *s); } } } /************************************************************************/ /* Reading input from the descriptor file */ /************************************************************************/ /* The macro igetc() gets a character from the input file. * the macro dgetc() gets a character from the descriptor file. */ #define igetc() getc(input) #define dgetc() getc(descriptor) /* expand_backslash() is called when a backslash is encountered in * the descriptor file. Its job is to parse a backslash-sequence. * The usual C-escapes (\a \b \f \n \r \t \v) are understood, as * well as the octal escape \000 [up to three octal digits] and * the hex escape \xFF [up to two hex digits]. */ int expand_backslash(void) { int c; switch (c = dgetc()) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': case 'X': c = parse_hex(descriptor); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c = parse_octal(descriptor, c); break; default: /* c = c; */ break; } return c; } /* The function read_identifier() attempts to match an identifier * in the descriptor file. It returns EOF if the end of the descriptor * file was reached, or the code of the identifier we found. * (or blows up if an error was detected.) * We build the identifier in "s", with the help of our * pool-managing functions above, then discard it, immediately, * since we don't use it any more. */ int read_identifier(void) { register int c; /* A character we have read */ char *s; /* The identifier we are building */ int ident; /* The identifier we found */ /* Skip over comments */ while ((c = next_non_whitespace(descriptor)) == '#') eat_until_newline(descriptor); if (c == EOF) return EOF; /* At this point, "c" contains the first letter of a potential * identifier. Let's see what it could possibly be. */ s = anchor_string(); if (c == '\'') { /* a character token */ add_to_string(c); if ((c = dgetc()) == '\\') c = expand_backslash(); add_to_string(c); if ((c = dgetc()) != '\'') error(descriptor, "Invalid character identifier"); add_to_string(c); c = next_non_space_or_tab(descriptor); } else do { /* a name token */ add_to_string(c); c = next_non_space_or_tab(descriptor); if (c == '\\') c = expand_backslash(); } while (c != EOF && c != '=' && c != '\n'); if (c != '=') error(descriptor, "Identifier not followed by = sign"); /* A boo-boo. Something bad happened. */ add_to_string('\0'); /* Make it a standard C string. */ finish_string(); ident = match_identifier(s); /* Go find one. */ remove_string(s); /* And we're done with it now. */ return ident; } /* The function grab_expansion() reads expansion text from the * descriptor file and adds it to the pool, returning a pointer * to the string it just created. * * After anchoring a new string, we look for the opening quotation * mark, then start gobbling characters. Everything gets copied * straight into the string. * */ char *grab_expansion(void) { register int c; /* Characters being read */ char *s; /* The string we are building */ s = anchor_string(); if (next_non_whitespace(descriptor) != '\"') error(descriptor, "Quotation mark expected"); /* Now read the stream until we hit another quotation mark. */ while ((c = dgetc()) != EOF && c != '\"') { if (c == '\\') c = expand_backslash(); add_to_string(c); } add_to_string('\0'); finish_string(); return s; } /* Ah, now with all of these beautiful functions waiting for us, * we can now write our first Useful Function: do_descriptor_file. * It reads the descriptor file and loads up the "expansion" array * with the text expansions we are reading from the file. * * If we grabbed the expansion of a "typeout", we type it out * and discard the string. * * We stop when the descriptor file runs dry. * */ void do_descriptor_file(void) { register int ident; while ((ident = read_identifier()) != EOF) { expansion[ident] = grab_expansion(); if (ident == typeout && !silent) { fputs(expansion[typeout], stderr); remove_string(expansion[typeout]); expansion[typeout] = NULL; } else check_arity(ident, expansion[ident]); } } /************************************************************************/ /* Reading from the input file */ /************************************************************************/ /* The function verify(c) checks that the next character in the input * stream is indeed "c". It eats the character, if all is well. * If something went wrong, we complain to stderr, but keep going. */ void verify(int c) { int d = igetc(); if (d != c) fprintf(stderr, "Warning: Expected %02X but received %02X.\n", c, d); } /* The function gobble(n) simply eats "n" characters from the input * file. */ void gobble(int n) { while (n--) (void) igetc(); } int last_HRt = 0; /* most recent output was HRt */ /* Processing a special code simply entails dumping its expansion. * If the expansion is NULL, then we either * [1] print nothing, if it is a code, * [2] print the character itself, if it is an ASCII character. * * In dumping its expansion, we expand the following percent-escapes: * * The percent-escapes are: * %\n -- newline if previous character was not a newline * (meaningful only as first character in sequence) * %1 -- first parameter, in decimal form * %2 -- second parameter, in decimal form * %c -- first parameter, in character form * %% -- literal percent sign * * all other %-escapes are flagged as warnings (but should never occur, * since they are trapped at the time the descriptor file is read.) */ void process(int c, int d1, int d2) { char *s; static int last_newline = 0; last_HRt = 0; /* the killer switch sets this */ if (expansion[c] == NULL) { /* invent a default action */ if (c >= ' ' && c < 128) { putchar(c); /* regular characters emit themselves */ last_newline = 0; return; } else if (c < 256) { /* single character */ expansion[c] = anchor_string(); /* emits itself */ add_to_string(c); add_to_string('\0'); finish_string(); if (!silent) fprintf(stderr, "Warning: No expansion for %02X (%c)\n", c, c); } else { /* provide null expansion */ expansion[c] = ""; if (!silent) { fprintf(stderr, "Warning: No expansion for %s\n", names[c-typeout].name); } } } s = expansion[c]; if (!*s) return; /* the rest of the code assumes non-null string */ do { if (*s != '%') putchar(*s++); else { s++; switch (*s++) { case '\n': if (!last_newline) putchar('\n'); break; case '1': printf("%d", d1); break; case '2': printf("%d", d2); break; case 'c': putchar(d1); break; case '%': putchar('%'); break; default: fprintf(stderr, "Internal error: Invalid escape, %%%c\n", s[-1]); break; } } } while (*s); last_newline = s[-1] == '\n'; } #define process0(c) process(c,0,0) #define process1(c,a) process(c,a,0) #define process2(c,a,b) process(c,a,b) void unsupported(int c) { if (!silent && !expansion[c]) { expansion[c] = ""; fprintf(stderr, "Warning: `%s' code not supported\n", names[c-typeout].name); } process0(Comment); fputs(names[c-typeout].name, stdout); process0(eComment); } /* The function gobble_until(c) eats characters from the input file * until it reaches a c or reaches EOF. */ void gobble_until(int c) { int i; while ((i = igetc()) != EOF && (int) (unsigned char) i != c) do_nothing; } /* line_spacing(l) is called whenever we hit a line-spacing-change command. * The argument is the desired line spacing, multiplied by two. * So single spacing gets a 2, 1.5 spacing gets a 3, etc. */ void line_spacing(int l) { switch (l) { case 2: process0(SS); break; case 3: process0(OHS); break; case 4: process0(DS); break; case 6: process0(TS); break; default: process1(LS, l); break; } } int environment_status = 0; /* cleanup at HRt */ void leave_environment(int force_HRt) { if (environment_status) { process0(environment_status); environment_status = 0; } if (force_HRt && !last_HRt) process0(HRt); } /* The "note_status" flag has one of three values: * 0 if we are not inside a note * 1 if we are inside a footnote * 2 if we are inside an endnote * * The function handle_note() is called to deal with footnotes and * endnotes. It adjusts the note_status accordingly. */ int note_status = 0; void handle_note(void) { if (note_status) { leave_environment(1); process0(note_status); note_status = 0; } else { /* Decide whether it is an endnote or a footnote */ if (igetc() & 2) { process0(En); note_status = eEn; gobble(5); } else { process0(Fn); note_status = eFn; gobble(7); } verify(0xFF); gobble(2); /* margins */ } } /* The tab_table is a bit field. Each set bit represents a tabstop. * Note, however, that the bits are counted from MSB to LSB. * * The tab_attribute_table is a nybble field. The n'th nybble represents * the attributes of the n'th tabstop. */ unsigned char tab_table[32]; unsigned char tab_attribute_table[20]; int next_attribute; void process_tab_attribute(int i) { int b; if (next_attribute & 1) b = tab_attribute_table[next_attribute/2] & 3; else b = (tab_attribute_table[next_attribute/2] / 16) & 3; next_attribute++; /* Bottom two bites define what kind of tab. * Bit 2 is set if we need dot filling. * Bit 3 is unused. * We `&3' above because we won't support dot filling. */ process1(SetTab + b, i); } void process_tab_table(void) { int i; next_attribute = 0; process0(BeginTabs); for (i = 0; i < 32; i++) { if (tab_table[i] == 0) continue; /* early out */ if (tab_table[i] & 0x80) process_tab_attribute(i * 8 + 0); if (tab_table[i] & 0x40) process_tab_attribute(i * 8 + 1); if (tab_table[i] & 0x20) process_tab_attribute(i * 8 + 2); if (tab_table[i] & 0x10) process_tab_attribute(i * 8 + 3); if (tab_table[i] & 0x08) process_tab_attribute(i * 8 + 4); if (tab_table[i] & 0x04) process_tab_attribute(i * 8 + 5); if (tab_table[i] & 0x02) process_tab_attribute(i * 8 + 6); if (tab_table[i] & 0x01) process_tab_attribute(i * 8 + 7); } process0(EndTabs); } void handle_tabs(void) { /* pad the tables to force no new tabs, and left tabs everywhere */ memset(tab_table, 0, sizeof(tab_table)); memset(tab_attribute_table, 0, sizeof(tab_attribute_table)); fread(tab_table, 20, 1, input); /* old-style tabs */ process_tab_table(); } void handle_extended_tabs(void) { fread(tab_table, 32, 1, input); fread(tab_attribute_table, 20, 1, input); process_tab_table(); } /* The FF_status flag tells us what we should do when we encounter an 0xFF. * It contains the token code of the active code, or 0 if no code is active. */ int FF_status = 0; void handle_FF(void) { if (FF_status) { /* finish header/footer */ leave_environment(1); process0(FF_status); gobble(2); verify(0xD1); FF_status = 0; } else process0(0xFF); } /* The function process_token does all of the real work. * Given the first character of a token, we eat up everything * that belongs to that token. This routine might be called * recursively, since some tokens are defined in terms of other * tokens. (For example, the subscript code is expanded as * [Sub] <character being subscripted> [sub] * and the <character being subscripted> might involve other token * expansions; specifically, it might be an IBM Extended character.) * * Luckily, most of our tokens are not recursive. The macro * bracket(before, after) * does the recursive stuff for us, bracketing the next token * between expansions of "before" and "after". * */ #define bracket(before,after) process0(before); process_token(); \ process0(after); int process_token(void) { int c = igetc(); if (c == EOF) return 0; c = (int) (unsigned char) c; if (!--blipcount && !silent) { blipcount = blipinterval; putc('.', stderr); } switch (c) { /* Codes listed in numerical rather than logical order */ case 0x02: process0(PageNo); break; /* Page number */ case 0x09: process0(Tab); break; /* Tab character */ case 0x8C: /* Soft page break after a HRt */ case 0x0A: /* Hard Return */ last_HRt = 0; leave_environment(1); last_HRt = 1; break; case 0x0B: /* Soft page break after a SRt */ case 0x0D: process0(SRt); break; /* Soft Return */ case 0x0C: process0(HPg); break; /* Hard Page */ case '-' : process0(HHyph); break; /* Nonbreaking hyphen */ case 0x80: break; /* NOP */ case 0x81: process0(Just); break; /* Right justification */ case 0x82: process0(eJust); break; /* Ragged right */ case 0x83: /* End centering */ case 0x84: leave_environment(0); break; /* End aligned text */ case 0x85: process0(MathCalc); break; /* Begin math calc */ case 0x86: process0(CtrPg); break; /* Center page vertically */ case 0x87: process0(Col); break; /* Begin column mode */ case 0x88: process0(eCol); break; /* End column mode */ case 0x89: process0(Tab); break; /* Tab after right margin */ case 0x8A: process0(Wid); break; /* Widow/orphan protection */ case 0x8B: process0(eWid); break; /* Allow widows/orphans */ /* case 0x8C: see 0x0A */ case 0x8D: /* Footnote/Endnote number */ process0(note_status == eFn ? FNoteNum : ENoteNum); break; case 0x8E: case 0x8F: unsupported(ReservedCode); break; /* Reserved codes */ case 0x90: process0(Red); break; /* Begin redline */ case 0x91: process0(eRed); break; /* End redline */ case 0x92: process0(Strike); break; /* Begin strikeout */ case 0x93: process0(eStrike); break; /* End strikeout */ case 0x94: process0(Und); break; /* Begin underlining */ case 0x95: process0(eUnd); break; /* End underlining */ case 0x96: process0(Rev); break; /* Begin reverse video */ case 0x97: process0(eRev); break; /* End reverse video */ case 0x98: process0(TableMarker); break;/* Table of something marker */ case 0x99: bracket(Over, eOver); break; /* Overstrike */ case 0x9A: process0(NoHyphWord); break;/* Do not hyphenate this word */ case 0x9B: break; /* End of generated text */ case 0x9C: process0(eBold); break; /* End boldface */ case 0x9D: process0(Bold); break; /* Begin boldface */ case 0x9E: process0(eHyph); break; /* Forbid hyphenation */ case 0x9F: process0(Hyph); break; /* Allow hyphenation */ case 0xA0: process0(HSpace); break; /* Hard space */ case 0xA1: process0(SubTtl); break; /* Do subtotal */ case 0xA2: process0(IsSubTtl); break; /* Subtotal entry */ case 0xA3: process0(Ttl); break; /* Do total */ case 0xA4: process0(IsTtl); break; /* Total entry */ case 0xA5: process0(GrandTtl); break; /* Do grand total */ case 0xA6: process0(MathCalcColumn); break; /* Math calc column */ case 0xA7: process0(Math); break; /* Begin math mode */ case 0xA8: process0(eMath); break; /* End math mode */ case 0xA9: process0(NHyph); break; /* Normal breakable hyphen */ case 0xAA: /* Hyphen at end of line */ case 0xAB: process0(NHyphE); break; /* Hyphen at end of page */ case 0xAC: process0(DHyph); break; /* Discretionary hyphen */ case 0xAD: /* Discretionary hyphen at EOLn */ case 0xAE: process0(DHyphE); break; /* Discretionary hyphen at EOPg */ case 0xAF: /* EOT columns and EOLn */ case 0xB0: break; /* EOT columns and EOPg */ case 0xB1: process0(NegateTotal); break; /* Negate current total */ case 0xBC: bracket(Sup, eSup); break; /* Superscript */ case 0xBD: bracket(Sub, eSub); break; /* Subscript */ case 0xBE: process0(UpHalfLine); break; /* Advance 1/2 line up */ case 0xBF: process0(DownHalfLine); break; /* Advance 1/2 line down */ case 0xC0: gobble(2); c = igetc(); /* Margin change */ process2(Marg, c, igetc()); verify(0xC0); break; case 0xC1: gobble(1); line_spacing(igetc()); verify(0xC1); break; /* Line spacing change */ case 0xC2: process1(MargRel, igetc()); /* Margin release */ verify(0xC2); break; case 0xC3: /* Center text */ leave_environment(0); switch (igetc()) { case 0: process0(Center); /* Center between margins */ environment_status = eCenter; break; case 1: /* Center around current column */ process0(CenterHere); environment_status = eCenterHere; break; } gobble(2); verify(0xC3); break; case 0xC4: /* Align or Flush Right */ leave_environment(0); c = igetc(); /* if high bit on c is set, then dot fill. (Ignore) */ switch (c & 0x7f) { case 0x0C: case 0x0A: process1(FlushRight, igetc());/* alignment col */ environment_status = eFlushRight; break; default: process2(Align, c, igetc());/* alignment column */ environment_status = eAlign; break; } gobble(1); /* trash */ verify(0xC4); break; case 0xC5: gobble(2); c = igetc(); /* Hyphenation zone */ process2(HZone, c, igetc()); verify(0xC5); break; case 0xC6: gobble(1); /* Page number position */ process0(PN + igetc()); verify(0xC6); break; case 0xC7: gobble(2); c = igetc(); /* New page number */ c = (c<<8) + (unsigned char)igetc(); process1( (c&0x8000) ? RomanPage : ArabicPage, c&0x7fff); verify(0xC7); break; case 0xC8: gobble(3); /* Set Page number column */ /* next 3 bytes are <left> <center> <right> */ gobble(3); unsupported(SetPageNumberColumn); verify(0xC8); break; case 0xC9: gobble(20); /* Set tabs */ handle_tabs(); verify(0xC9); break; case 0xCA: process1(CondEOP, igetc()); /* Conditional end of page */ verify(0xCA); break; case 0xCB: /* Set pitch or font */ gobble(2); /* old pitch and font */ c = igetc(); process2(SetFont, c, igetc()); /* pitch and font number */ /* negative pitch means proportional font */ verify(0xCB); break; case 0xCC: /* Indented paragraph */ leave_environment(0); gobble(1); process1(Indent, igetc()); verify(0xCC); environment_status = eIndent; break; /* (really: Temporary margin) */ case 0xCD: /* Indented paragraph (obsolete) */ leave_environment(0); process1(Indent, igetc()); verify(0xCD); environment_status = eIndent; break; /* (really: Temporary margin) */ case 0xCE: gobble(1); process1(TopMarg, igetc()); /* Set top margin */ verify(0xCE); break; case 0xCF: /* Suppress headers/footers for this page */ process1(Supp, (unsigned char)igetc()); verify(0xCF); break; case 0xD0: gobble(2); /* old form length */ /* Set page length */ process1(PageLength, igetc()); /* lines per page */ gobble(1); /* new page length */ verify(0xD0); break; case 0xD1: /* header/footer */ c = igetc(); /* def byte */ gobble(1); /* old half-lines */ if (c&2) { process0(Footer); FF_status = eFooter; } else { process0(Header); FF_status = eHeader; } verify(0xFF); verify(0xFF); /* separator */ gobble(2); /* left and right margin */ break; /* continue processing */ case 0xD2: gobble(5); /* obsolete footnote */ unsupported(Obsolete); gobble_until(0xD2); break; case 0xD3: gobble(2); /* obsolete `set footnote number' */ unsupported(Obsolete); verify(0xD3); break; case 0xD4: /* Advance to half line number */ c = igetc(); /* current line number */ process2(AdvanceToHalfLine, c, igetc());/* desired line # */ verify(0xD4); break; case 0xD5: gobble(1); process1(LPI, igetc()); /* Set LPI (6 or 8) */ verify(0xD5); break; case 0xD6: /* set extended tabs */ /* next 4 bytes are <old start><old increment> <new start><new increment> */ gobble(4); unsupported(SetTabs); verify(0xD6); break; case 0xD7: gobble(63); /* Define math columns */ unsupported(DefineMathColumns); verify(0xD7); break; case 0xD8: gobble(1); process1(AlignChar, igetc()); verify(0xD8); break; /* Set alignment character */ case 0xD9: gobble(2); /* obsolete margin release */ unsupported(Obsolete); verify(0xD9); break; case 0xDA: gobble(1+1); /* Set underline mode */ /* second byte is a bit field. * 1 = double-underline (default single), * 2 = underline spaces (default don't) */ unsupported(SetUnderlineMode); verify(0xDA); break; case 0xDB: /* Set sheet feeder bin */ gobble(1); process1(SetBin, igetc()); verify(0xDB); break; /* We ignore these codes, since they are followed by an 0x0C or an 0x8C */ case 0xDC: gobble(7); verify(0xDC); break; /* End-of-page codes */ case 0xDD: gobble(22); /* define columns */ unsupported(DefineColumn); verify(0xDD); case 0xDE: environment_status = 0; /* End indented paragraph */ gobble(2); process0(eIndent); verify(0xDE); break; case 0xDF: /* invisible characters */ gobble_until(0xDF); break; case 0xE0: /* Doubly-indented paragraph */ leave_environment(0); gobble(1); process1(DIndent, igetc()); verify(0xE0); environment_status = eIndent; break; case 0xE1: process0((unsigned char)igetc()); verify(0xE1); break; /* IBM character */ case 0xE2: handle_note(); break; /* footnote or endnote */ case 0xE3: gobble(74+74); /* footnote attributes */ unsupported(SetFootnoteAttributes); verify(0xE3); break; case 0xE4: gobble(2); /* old */ /* set footnote number */ /* bit 7 of second byte doesn't count, and the value * is offset by one. */ c = igetc() & 0x3f; c = (c << 7) + (igetc() & 0x7f); process1(SetFnNum, 1 + c); verify(0xE4); break; case 0xE5: /* paragraph numbering style */ gobble(7+7+7+7); unsupported(SetParagraphNumberingStyle); verify(0xE5); break; case 0xE6: /* paragraph number */ gobble(2+7); unsupported(NumberedParagraph); verify(0xE6); break; case 0xE9: /* begin marked text */ gobble(6); unsupported(BeginMarkedText); verify(0xE9); break; case 0xEA: /* end marked text */ unsupported(EndMarkedText); gobble_until(0xEA); break; case 0xEB: /* define marked text */ gobble(30); unsupported(DefineMarkedText); verify(0xEB); break; case 0xEC: /* define index mark */ gobble(2); unsupported(DefineIndexMark); verify(0xEC); break; case 0xED: /* Table of authorities */ unsupported(DefineIndexMark); gobble_until(0xED); break; case 0xEE: /* paragraph number def */ gobble(42); unsupported(SetParagraphNumberingStyle); verify(0xEE); break; case 0xEF: /* paragraph number */ gobble(16); unsupported(NumberedParagraph); verify(0xEF); break; case 0xF1: gobble(32 + 20); /* Tab settings */ handle_extended_tabs(); verify(0xF1); break; case 0xF3: /* column definition */ gobble(98); unsupported(DefineColumn); verify(0xF3); break; case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xF0: case 0xF2: case 0xF4: case 0xF5: case 0xF6: case 0xF7: case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: unsupported(UnknownCode); break; /* undefined codes */ case 0xFF: handle_FF(); break; default: process0(c); break; } return 1; } /* Now do the other Useful Function. */ void process_input(void) { process0(BEGIN); while (process_token()) do_nothing; process0(END); } /************************************************************************/ /* The main program */ /************************************************************************/ /* First, a pretty little function which tries to open a file and * complains loudly if it cannot. */ FILE *efopen(const char *s, const char *m) { FILE *fp = fopen(s, m); if (fp == NULL) { fprintf(stderr, "Error: Cannot open %s", s); if (errno > 0 && errno < sys_nerr) fprintf(stderr, " (%s)\n", s, sys_errlist[errno]); fprintf(stderr, "\n"); exit(1); } return fp; } #include "dopen.c" /* ickiness with file opening */ /* Our main program does very little, really. * * After checking the command line, it proceeds to open the descriptor * file in text mode, and the input file in binary mode. * It then calls our two Useful Functions in turn, closing each file * after it has served its purpose. */ int Cdecl main(int argc, char **argv) { while (--argc && **++argv == '-') { while (*++*argv) switch (**argv) { case 's': silent = 1; break; case 'n': blipinterval = atoi(&argv[0][1]); goto finarg; default: goto usage; } finarg: ; } blipcount = blipinterval; if (argc != 2) { usage: fprintf(stderr, "usage: wp2x descriptor input > output\n"); exit(2); } dopen(argv[0]); input = efopen(argv[1], "rb"); do_descriptor_file(); fclose(descriptor); process_input(); fclose(input); return 0; }