home *** CD-ROM | disk | FTP | other *** search
- /* FORMAT CONVERSION FROM SGML
- ** ===========================
- **
- **
- ** 22 Nov 92 Fixed quoting of hrefs.
- ** CERN_WEIRDO ifdefed out -- proper SGML expected
- ** REMOVE_SCRIPT ifdefed out -- did ignore lines starting with "."
- */
-
- #import "HTStyle.h"
- #include "HTML.h" /* For directory object building */
-
- #define PUTC(c) (*targetClass.put_character)(target, c)
- #define PUTS(s) (*targetClass.put_string)(target, s)
- #define START(e) (*targetClass.start_element)(target, e, 0, 0)
- #define END(e) (*targetClass.end_element)(target, e)
- #define END_TARGET (*targetClass.end_document)(target)
- #define FREE_TARGET (*targetClass.free)(target)
-
- struct _HTStream {
- CONST HTStreamClass * isa;
- /* ... */
- };
-
-
-
- typedef struct _SGML_style {
- char * start_tag; /* Tag to mark start of a style */
- char * paragraph_tag; /* Tag to mark paragraph mark within style */
- char * tab_tag; /* Tag to mark tab within style */
- char * end_tag; /* Tag to mark end of style */
- char * start_text; /* Text conventionally starting this style */
- char * paragraph_text; /* Text used as a paragraph mark within style*/
- char * end_text; /* Text used to end a style */
- HTStyle * style; /* Paragraph style to be used */
- int free_format; /* Flag: are line ends word breaks only? */
- int litteral; /* Flag: end only at close tag (cheat) ? */
- } SGML_style;
-
- /* Stack of previous styles:
- */
- typedef struct _NestedStyle {
- struct _NestedStyle * next; /* previously nested style or 0 */
- SGML_style * SGML; /* SGML style interrupted */
- } NestedStyle;
-
-
- /* Paragraph Styles used by the SGML parser:
- ** ----------------------------------------
- */
-
- static SGML_style Normal =
- { "", "<P>\n", "\t", "",
- "","", "", 0 ,1, 0};
-
- static SGML_style Heading[6] = {
- { "\n<H1>", "</H1>\n<H1>", "\t", "</H1>", "", "", "", 0, 1, 0},
- { "\n<H2>", "</H2>\n<H2>", "\t", "</H2>", "", "", "", 0, 1, 0},
- { "\n<H3>", "</H3>\n<H3>", "\t", "</H3>", "", "", "", 0, 1, 0},
- { "\n<H4>", "</H4>\n<H4>", "\t", "</H4>", "", "", "", 0, 1, 0},
- { "\n<H5>", "</H5>\n<H5>", "\t", "</H5>", "", "", "", 0, 1, 0},
- { "\n<H6>", "</H6>\n<H6>", "\t", "</H6>", "", "", "", 0, 1, 0}
- };
-
- static SGML_style Glossary = /* Large hanging indent with tab */
- { "\n<DL>\n<DT>", "\n<DT>", "\n<DD>", "\n</DL>\n",
- "", "", "", 0, 1};
-
- static SGML_style listStyle = /* Hanging indent with tab */
- { "\n<UL>\n<LI>", "\n<LI>", "\t", "\n</UL>",
- "\267\t", "\267\t", "", 0, 1, 0};
-
- static SGML_style menuStyle = /* Like UL but less gap */
- { "\n<MENU>\n<LI>", "\n<LI>", "\t", "\n</MENU>",
- "\267\t", "\267\t", "", 0, 1, 0};
-
- static SGML_style addressStyle =
- { "\n<ADDRESS>", "<P>", "\t", "\n</ADDRESS>",
- "", "", "", 0, 1, 0 };
-
- /* Explicit format styles:
- */
- static SGML_style Example = /* Fixed width font, at least 80 chars wide */
- { "\n<XMP>", "\n", "\t", "</XMP>",
- "", "", "", 0 , 0, 1};
-
- static SGML_style Preformatted = /* Fixed width font, at least 80 chars wide */
- { "\n<PRE>", "\n", "\t", "</PRE>",
- "", "", "", 0 , 0, 0}; /* not litteral */
-
- static SGML_style Fixed = /* Fixed width font, at least 80 chars wide */
- { "\n<FIXED>", "<P>", "\t", "</FIXED>",
- "", "", "", 0 , 1, 0};
-
- static SGML_style Listing = /* Fixed width font, at least 132 chars wide */
- { "\n<LISTING>", "\n", "\t", "</LISTING>",
- "", "", "", 0 , 0, 1};
-
- /* Table of all possible SGML paragraph styles
- */
- static SGML_style * styleTable[] = {
- &Normal, &Heading[0], &Heading[1], &Heading[2],
- &Heading[3], &Heading[4], &Heading[5],
- &Glossary, &listStyle, &menuStyle, &addressStyle, &Preformatted, &Fixed, &Example, &Listing
- }; /* style table */
-
- #define NUMBER_OF_STYLES (sizeof(styleTable)/sizeof(styleTable[0]))
-
-
- /* Write SGML File back OUT
- ** ------------------------
- **
- ** This is currently quite NeXT-specific.
- **
- ** We run through te runs. When a characteristic of a run changes, we
- ** output the approporiate SGML code. When several characteristics change at
- ** the same place, we output the code in an order such that the resulting
- ** structures wil be nested. This means first unwrapping the old ones, and
- ** then entering the new ones. For example, it is better to produce
- **
- ** <h2><a>...</a></h2><a>...</a>
- ** than
- **
- ** <h2><a>...</h2></a><a>...</a>
- **
- ** The special treatment of newlines is because we want to strip extra newlines
- ** out. We ignore newlines at the beginning and end of the para style,
- ** and we treat multiple newlines as a single paragraph mark.
- **
- ** Bugs: @@@ Highlighting is ignored.
- ** @@@ end text is ignored.
- */
-
- #define LINE_WRAP 64 /* Start thinking about line wrap here */
-
- static int SGML_gen_newlines; /* Number of newlines pending during SGML generation */
- static SGML_gen_errors; /* Number of unrcognizable runs */
- static SGML_style * currentSGML;
- static const char * saveName; /* pointer to name node is being saved under */
- static char * prefix; /* Pointer to prefix string to be junked */
- static int lineLength; /* Number of characters on a line so far */
-
- /* This function, for any paragraph style, finds the SGML style, if any
- */
- SGML_style * findSGML(void *para)
- {
- int i;
- if (!para) return &Normal; /* Totally unstyled becomes Normal */
- for (i=0; i<NUMBER_OF_STYLES; i++) {
- SGML_style * S = styleTable[i];
- if (S) {
- HTStyle * style = S->style;
- if(style) {
- if (style->paragraph == para)
- return S;
- }
- }
- }
- if (TRACE) printf("HT: Can't find SGML style!\n");
- SGML_gen_errors++;
- return &Normal;
- }
-
- /* Change Run
- ** ==========
- */
- /* This function generates the code for one run, given the previous run.
- **
- */
- - (void) changeRunFrom: (NXRun *) last to: (NXRun *) r
- {
- int chars_left = r->chars;
-
- if (r->info != last->info) { /* End anchor */
- if (last->info) PUTS ("</A>");
- }
-
- if (r->paraStyle != last->paraStyle)
- if (last->paraStyle) { /* End paragraph */
- if (currentSGML) PUTS(currentSGML->end_tag);
- else PUTS("<P>\n");
- lineLength = 0; /* At column 1 */
- }
-
-
- if (r->paraStyle != last->paraStyle) { /* Start paragraph */
- currentSGML = findSGML(r->paraStyle);
- if (currentSGML) {
-
- if (currentSGML->free_format)
- while(chars_left && WHITE(*read_pointer)) {/* Strip leading */
- (chars_left)--; /* white space */
- (void) NEXT_TEXT_CHAR;
- }
- PUTS(currentSGML->start_tag);
- prefix = currentSGML->start_text;
- }
- SGML_gen_newlines=0; /* Cancel */
- }
-
- if (r->info != last->info) { /* Start anchor */
-
- if (SGML_gen_newlines) { /* Got anchor, need paragraph separator */
- PUTS(currentSGML->paragraph_tag);
- SGML_gen_newlines=0; /* paragraph flushed. */
- }
- if (r->info) {
- HTChildAnchor * a = (HTChildAnchor *) r->info;
- HTAnchor * d = HTAnchor_followMainLink((HTAnchor*)a);
- char * this = HTAnchor_address((HTAnchor*)a);
-
- PUTS("<A\nNAME=");
- PUTS(strrchr(this, '#')+1);
- free(this);
-
- if (d) {
- char * absolute = HTAnchor_address(d);
- char * relative = HTRelative(absolute, saveName);
-
- PUTS(" HREF=\"");
- PUTS(relative);
- PUTC('"');
-
- free(relative);
- free(absolute);
- }
- PUTC('>');
- }
- }
-
- /* Now output the textual part of the run
- **
- ** Within the prefix region (prefix!=0), we discard white space and
- ** characters matching *prefix++. Note the prefix string may contain white space.
- **
- ** The SGML_gen_newlines flag means that newlines have been found. They are
- ** not actually implemented unless some more non-white text is found, so that
- ** trailing newlines on the end of paragraphs are stripped.
- **
- ** The line wrapping is primitive in the extreme, as only text characters are
- ** counted. In practise it limits the length of any line to a reasonable amount,
- ** though this is not guarranteed.
- */
- {
- while (chars_left) {
- char c = NEXT_TEXT_CHAR;
- chars_left--;
- if (prefix) {
- if (*prefix) {
- if (c==*prefix) {
- ++prefix;
- continue; /* Strip prefix characters */
- }
- if (WHITE(c)) continue; /* Strip white space */
- if (TRACE) printf(
- "HTML: WARNING: Paragraph prefix incomplete: %i found where %i expected.\n",
- c, *prefix);
- }
- prefix=0; /* Prefix is over */
- }
-
- if (c=='\n') { /* Paragraph Marks: */
- if (currentSGML->free_format) {
- SGML_gen_newlines++; /* Just flag it */
- prefix = currentSGML->paragraph_text;
- } else {
- PUTS(currentSGML->paragraph_tag);
- }
- lineLength = 0; /* At column 1 */
-
- } else { /* Not newline */
-
- if (SGML_gen_newlines) {/* Got text, need paragraph separator */
- PUTS(currentSGML->paragraph_tag);
- SGML_gen_newlines=0; /* paragraph flushed. */
- lineLength = 0; /* At column 1 */
- }
- if (c=='\t') {
- if (currentSGML) PUTS(currentSGML->tab_tag);
- else PUTC('\t');
- } else { /* Not tab or newline */
- lineLength ++; /* @@bug doesn't count entity names */
- if ((currentSGML->free_format)
- && (lineLength++ > LINE_WRAP) /* Wrap lines if we can */
- && (c==' ')) {
- c = '\n';
- lineLength = 0;
- }
-
- if (currentSGML->litteral) {
- PUTC(c);
- } else {
- switch(c) {
- case '<': PUTS("<"); break;
- case '&': PUTS("&"); break;
- default: PUTC(c); break;
- } /* switch */
- } /* not litteral */
- }
- }
- }
- }
-
- } /* changeRunFrom:to: */
-
-
-
- /* This is the body of the SGML output method.
- */
- - writeSGML:(HTStrunctured *) target relativeTo:(const char *)aName
- {
- NXRun * r = theRuns->runs;
- int sor; /* Character position of start of run */
- NXRun dummy;
- char buffer[64];
- dummy.paraStyle = 0;
- dummy.info = 0;
- dummy.chars = 0;
-
- HTStructuredClass targetClass = *target->isa; /* copy access routines
-
- #define PUTC
-
- SGML_gen_newlines=0; /* Number of newlines read but not inserted */
- HT = self;
- saveName = aName;
-
- SGML_gen_errors = 0;
- currentSGML = 0;
- prefix = 0; /* No prefix to junk */
-
- START_INPUT;
- lineLength = 0; /* Starting in column 1 */
-
- START(HTML_HTML);
- START(HTML_HEAD);
- START(HTML_TITLE);
- PUTS([window title]);
- END(HTML_TITLE);
-
- if (nextAnchorNumber) {
- sprintf(buffer, "\n<NEXTID N=\"z%i\">\n", nextAnchorNumber);
- PUTS(buffer);
- }
- END(HTML_HEAD);
- START(HTML_BODY);
-
- /* Change style tags etc
- */
- [self changeRunFrom:&dummy to:r]; /* Start first run */
-
- for (sor=r++->chars; sor<textLength; sor=sor+(r++)->chars) {
- if (TRACE) printf("%4i: %i chars in run %3i.\n",
- sor, r->chars, r-theRuns->runs);
- [self changeRunFrom:r-1 to: r]; /* Runs 2 to N */
- }
- [self changeRunFrom:r to:&dummy]; /* Close last run */
-
- tFlags.changeState = 0; /* Please notify delegate if changed */
- END(HTML_BODY);
- END(HTML_HTML);
- return (SGML_gen_errors) ? nil : self;
- }
-