home *** CD-ROM | disk | FTP | other *** search
-
- /*
- * Word recognizer (with hyphenation)
- *
- * This program acts as a very simple filter for files of
- * text that may have hyphenated words at the end of an input
- * line. Output consists of each word on a seperate line
- * with hyphenated words rejoined. Note: a word is said to
- * start with the first alphabetic character and end with the
- * last alphabetic character. Embedded graphics will be removed.
- */
-
- /*
- * Basic elements
- */
- white = [\n\r\t ]; /* End of a word */
- bol = [\n] white*; /* Beginning of a line */
- eol = [\0\n\r]; /* End of input line */
- letter = [A-Za-z]; /* Is a letter */
- graphic = [!-@\[-`{-~]; /* Not a letter */
- text = [!-~]; /* All printing chars. */
- garbage = [\1-\377]; /* Whatever remains */
-
- /*
- * A word contains "junk", at least one letter, then at
- * least another letter, then more junk.
- *
- * A hyphenated word is a word-<NEWLINE> followed by a word
- * on the next line.
- */
-
- word = graphic* letter text* letter graphic*;
- junk = (letter white) | (graphic* white);
-
- %{
- #define TRUE 1
- #define FALSE 0
-
- main()
- {
- while (yylex())
- ;
- }
- %}
-
- %%
-
- /*
- * A hyphenated word
- */
-
- word "-" eol / bol letter letter
- {
- output(TRUE);
- return(LEXSKIP);
- }
- /*
- * An ordinary word
- */
-
- word {
- output(FALSE);
- return(LEXSKIP);
- }
-
-
- /*
- * Junk (one letter words or all graphics)
- */
-
- junk
- {
- return(LEXSKIP);
- }
-
- /*
- * Other stuff
- */
-
- eol | white | garbage
- {
- return(LEXSKIP);
- }
-
- %%
-
- output(flag)
- int flag;
- /*
- * Output the current token. The parameter is TRUE if this is
- * the start of a hyphenated word.
- */
- {
- register char *tokptr; /* Locate token start */
- char *tokend; /* Locate token end */
- char *token();
- char buffer[100];
-
- tokptr = token(&tokend);
- /*
- * Skip over leading and trailing non-alpha stuff
- */
- while (!isalpha(*tokptr) && tokptr < tokend)
- tokptr++;
- while (!isalpha(*--tokend) && tokend > tokptr);
- strncpy(buffer, tokptr, tokend-tokptr+1);
- buffer[tokend-tokptr+1]='\0';
- printf("%s", buffer);
- if (!flag)
- putchar('\n');
- }