home *** CD-ROM | disk | FTP | other *** search
Lex Description | 1995-01-14 | 13.5 KB | 580 lines |
- %{
- /* $Id: lex.l,v 2.0.1.23 1994/01/21 07:48:57 greyham Exp $
- *
- * C manual page generator
- * Lexical analyzer specification
- */
-
- #include <ctype.h>
-
- static char *cur_file; /* current file name (malloced) */
- int line_num = 1; /* current line number in file */
- static int curly = 0; /* number of curly brace nesting levels */
- static int square = 0; /* number of square bracket nesting levels */
- static int ly_count = 0; /* number of occurances of %% */
-
- /* temporary string buffer */
- static char buf[MAX_TEXT_LENGTH];
-
- #define DYNBUF_ALLOC 240 /* size of increment of dynamic buf */
- static char *dynbuf; /* start of dynamic buf */
- static int dynbuf_size; /* number of bytes allocated */
- static int dynbuf_current; /* current end of buffer */
-
- static boolean comment_ateol; /* does comment start & end at end of a line? */
- static boolean comment_remember;/* remember contents of current comment? */
- static boolean comment_caller; /* state we were in before */
-
- typedef struct {
- #ifdef FLEX_SCANNER
- YY_BUFFER_STATE buffer;
- #else
- FILE *fp;
- #endif
- char *file;
- int line_num;
- } IncludeStack;
-
- static int inc_depth = 0; /* include nesting level */
- static IncludeStack inc_stack[MAX_INC_DEPTH]; /* stack of included files */
-
- static void update_line_num _((void));
- static void do_include _((char *filename, int sysinc));
- static void new_dynbuf();
- static void add_dynbuf _((int c));
- static char *return_dynbuf();
- static void get_cpp_directive();
- static boolean process_line_directive _((const char *new_file));
-
- /*
- * The initial comment processing is done primarily by the rather complex lex
- * rules in the various comment start states, the main functions being removal
- * of leading *'s, /'s and whitespace on a line, the removal of trailing
- * whitespace on a line, and the coalescing of separate comments on adjacent
- * lines. The remaining bits of textual content are collected by the following
- * functions, which simply strip leading and trailing blank lines.
- */
- void start_comment _((boolean ateol));
- int end_comment _((boolean ateol));
- void add_comment _((const char *s));
- void newline_comment _((void));
-
- static int comment_newlines; /* number of newlines hit in comment */
- static boolean comment_started; /* have preceeding empty lines been skipped */
-
- #ifdef FLEX_SCANNER /* flex uses YY_START instead of YYSTATE */
- #define YYSTATE YY_START
- #ifndef YY_START /* flex 2.3.8 & before didn't support it at all */
- #define YY_START ((yy_start - 1) / 2)
- #endif
- #endif
-
- #undef yywrap /* for flex */
-
- /* SKIP skipping value assignment in an enum */
- %}
-
- WS [ \t]
- WLF [ \t\n\f]*
- LETTER [A-Za-z_]
- DIGIT [0-9]
- ID {LETTER}({LETTER}|{DIGIT})*
- STRING \"(\\.|\\\n|[^"\\])*\"
- QUOTED ({STRING}|\'(\\\'|[^'\n])*\'|\\.)
-
- %p 5000
- %e 2000
- %s CPP1 INIT1 INIT2 CURLY SQUARE LEXYACC SKIP COMMENT COMMLINE CPPCOMMENT
- %%
-
-
- <LEXYACC>^"%%" {
- if (++ly_count >= 2)
- BEGIN INITIAL;
- }
- <LEXYACC>^"%{" BEGIN INITIAL;
- <LEXYACC>{QUOTED} update_line_num();
- <LEXYACC>. ;
- <INITIAL>^"%}" BEGIN LEXYACC;
-
- <INITIAL>^{WS}*#{WS}* BEGIN CPP1;
-
- <CPP1>define{WS}+{ID} {
- sscanf(yytext, "define %s", buf);
- get_cpp_directive();
- new_symbol(typedef_names, buf, DS_EXTERN);
- }
-
- <CPP1>include{WS}*\"[^"]+\" {
- sscanf(yytext, "include \"%[^\"]\"", buf);
- get_cpp_directive();
- do_include(buf, FALSE);
- }
- <CPP1>include{WS}*\<[^>]+\> {
- sscanf(yytext, "include <%[^>]>", buf);
- get_cpp_directive();
- do_include(buf, TRUE);
- }
-
- <CPP1>line{WS}+[0-9]+{WS}+\".*$ {
- sscanf(yytext, "line %d \"%[^\"]\"",
- &line_num, buf);
- --line_num;
- BEGIN INITIAL;
-
- if (process_line_directive(buf))
- return T_BASEFILE;
- }
- <CPP1>[0-9]+{WS}+\".*$ {
- sscanf(yytext, "%d \"%[^\"]\"", &line_num, buf);
- --line_num;
- BEGIN INITIAL;
-
- if (process_line_directive(buf))
- return T_BASEFILE;
- }
- <CPP1>[0-9]+.*$ {
- sscanf(yytext, "%d ", &line_num);
- --line_num;
- BEGIN INITIAL;
- }
-
- <CPP1>. get_cpp_directive();
-
- <INITIAL>"(" return '(';
- <INITIAL>")" return ')';
- <INITIAL>"*" return '*';
- <INITIAL,SKIP>"," {
- BEGIN INITIAL; /* stop skipping */
- return ',';
- }
- <INITIAL>";" return ';';
- <INITIAL>"..." return T_ELLIPSIS;
- <INITIAL>{STRING} { update_line_num(); return T_STRING_LITERAL; }
-
- <INITIAL>auto return T_AUTO;
- <INITIAL>extern return T_EXTERN;
- <INITIAL>register return T_REGISTER;
- <INITIAL>static return T_STATIC;
- <INITIAL>typedef return T_TYPEDEF;
- <INITIAL>char return T_CHAR;
- <INITIAL>double return T_DOUBLE;
- <INITIAL>float return T_FLOAT;
- <INITIAL>int return T_INT;
- <INITIAL>void return T_VOID;
- <INITIAL>long return T_LONG;
- <INITIAL>short return T_SHORT;
- <INITIAL>signed return T_SIGNED;
- <INITIAL>__signed__ return T_SIGNED;
- <INITIAL>__signed return T_SIGNED;
- <INITIAL>unsigned return T_UNSIGNED;
- <INITIAL>enum { enum_state = KEYWORD; return T_ENUM; }
- <INITIAL>struct return T_STRUCT;
- <INITIAL>union return T_UNION;
- <INITIAL>const return T_CONST;
- <INITIAL>__const__ return T_CONST;
- <INITIAL>__const return T_CONST;
- <INITIAL>volatile return T_VOLATILE;
- <INITIAL>__volatile__ return T_VOLATILE;
- <INITIAL>__volatile return T_VOLATILE;
- <INITIAL>inline return T_INLINE;
- <INITIAL>__inline__ return T_INLINE;
- <INITIAL>__inline return T_INLINE;
- <INITIAL>cdecl return T_CDECL;
- <INITIAL>far return T_FAR;
- <INITIAL>huge return T_HUGE;
- <INITIAL>interrupt return T_INTERRUPT;
- <INITIAL>near return T_NEAR;
- <INITIAL>pascal return T_PASCAL;
- <INITIAL>__extension__ ;
-
- <INITIAL>{ID} {
- if (enum_state == BRACES) BEGIN SKIP;
- yylval.text = strduplicate(yytext);
- if (is_typedef_name(yytext))
- return T_TYPEDEF_NAME;
- else
- return T_IDENTIFIER;
- }
-
- <INITIAL>"=" BEGIN INIT1;
- <INIT1>"{" { curly = 1; BEGIN INIT2; }
- <INIT1>[,;] {
- unput(yytext[yyleng-1]);
- BEGIN INITIAL;
- return T_INITIALIZER;
- }
- <INIT1>{QUOTED} update_line_num();
- <INIT1>. ;
-
- <INIT2>"{" ++curly;
- <INIT2>"}" {
- if (--curly == 0) {
- BEGIN INITIAL;
- return T_INITIALIZER;
- }
- }
- <INIT2>{QUOTED} update_line_num();
- <INIT2>. ;
-
- <INITIAL,SKIP>"{" {
- if (enum_state == KEYWORD)
- {
- enum_state = BRACES;
- return '{';
- }
- else
- {
- curly = 1;
- BEGIN CURLY;
- }
- }
- <INITIAL,SKIP>"}" {
- BEGIN INITIAL; /* stop skipping */
- return '}';
- }
-
- <CURLY>"{" ++curly;
- <CURLY>"}" {
- if (--curly == 0) {
- BEGIN INITIAL;
- return T_BRACES;
- }
- }
- <CURLY,SKIP>{QUOTED} update_line_num();
- <CURLY,SKIP>. ;
-
- <INITIAL>"[" {
- new_dynbuf(); add_dynbuf(yytext[0]);
- square = 1; BEGIN SQUARE;
- }
- <SQUARE>"[" { ++square; add_dynbuf(yytext[0]); }
- <SQUARE>"]" {
- add_dynbuf(yytext[0]);
- if (--square == 0) {
- BEGIN INITIAL;
- yylval.text = return_dynbuf();
- return T_BRACKETS;
- }
- }
- <SQUARE>{QUOTED}|. {
- int i;
- for (i = 0; i < yyleng; ++i)
- {
- if (yytext[i] == '\n') ++line_num;
- add_dynbuf(yytext[i]);
- }
- }
-
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>^{WS}*"/*""*"*{WS}+ {
- comment_caller = YYSTATE;
- start_comment(FALSE);
- BEGIN COMMENT; }
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>^{WS}*"/*""*"*[^/] {
- yyless(yyleng-1);
- comment_caller = YYSTATE;
- start_comment(FALSE);
- BEGIN COMMENT; }
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>"/*""*"*{WS}+ {
- comment_caller = YYSTATE;
- start_comment(TRUE);
- BEGIN COMMENT; }
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>"/*""*"*[^/] {
- yyless(yyleng-1);
- comment_caller = YYSTATE;
- start_comment(TRUE);
- BEGIN COMMENT; }
- <COMMLINE>^{WS}*"/"+{WS}* |
- <COMMLINE>^{WS}*"/"*"*"*{WS}+ BEGIN COMMENT;
- <COMMLINE>^{WS}*"/"*"*"*[^/] { yyless(yyleng-1); BEGIN COMMENT; }
- <COMMLINE>. { yyless(0); BEGIN COMMENT; }
- <COMMLINE>\n newline_comment();
- <COMMENT>{WS}*"*"+"/"{WS}*\n{WS}*"/*""*"*{WS}+ newline_comment();
- <COMMENT>{WS}*"*"+"/"{WS}*\n{WS}*"/*""*"*[^/] {
- yyless(yyleng-1); newline_comment(); }
- <COMMENT>{WS}*"*"+"/"{WS}*$ { int ret = end_comment(TRUE);
- BEGIN comment_caller;
- if (ret) return ret; }
- <COMMENT>{WS}*"*"+"/" { int ret = end_comment(FALSE);
- BEGIN comment_caller;
- if (ret) return ret; }
- <COMMENT>[^*\n \t]* |
- <COMMENT>{WS}* |
- <COMMENT>"*"+[^*/\n]* add_comment(yytext);
- <COMMENT>{WS}*\n { newline_comment(); BEGIN COMMLINE; }
-
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>^{WS}*"//"[/*]*{WS}* {
- comment_caller = YYSTATE;
- start_comment(FALSE);
- BEGIN CPPCOMMENT; }
- <INITIAL,INIT1,INIT2,CURLY,SQUARE,LEXYACC,SKIP>"//"[/*]*{WS}* {
- comment_caller = YYSTATE;
- start_comment(TRUE);
- BEGIN CPPCOMMENT; }
- <CPPCOMMENT>.* add_comment(yytext);
- <CPPCOMMENT>\n{WS}*"//"[/*]*{WS}* newline_comment();
- <CPPCOMMENT>\n { int ret = end_comment(TRUE);
- ++line_num;
- BEGIN comment_caller;
- if (ret) return ret; }
-
- [ \t\f]+ ;
- \n ++line_num;
-
- . {
- output_error();
- fprintf(stderr, "bad character '%c'\n", yytext[0]);
- }
- %%
-
- /* If the matched text contains any new line characters, then update the
- * current line number.
- */
- static void
- update_line_num ()
- {
- char *p = yytext;
- while (*p != '\0') {
- if (*p++ == '\n')
- line_num++;
- }
- }
-
- void start_comment(ateol)
- boolean ateol; /* does comment start at end of an existing line? */
- {
- comment_remember = (comment_caller == INITIAL || comment_caller == SKIP) &&
- (inbasefile || enum_state == BRACES);
-
- if (comment_remember)
- {
- comment_ateol = ateol;
- comment_newlines = 0;
- comment_started = FALSE;
- new_dynbuf();
- }
- }
-
- int end_comment(ateol)
- boolean ateol; /* does comment end at end of line? */
- {
- if (comment_remember)
- {
- if (!ateol) comment_ateol = FALSE;
- yylval.text = return_dynbuf();
- if (yylval.text[0] == '\0')
- {
- free(yylval.text);
- return 0;
- }
- #ifdef DEBUG
- fprintf(stderr,"`%s'\n", yylval.text);
- #endif
- return comment_ateol ? T_EOLCOMMENT : T_COMMENT;
- }
- return 0;
- }
-
- /* add a newline to the comment, deferring to remove trailing ones */
- void newline_comment()
- {
- ++line_num;
-
- if (!comment_remember || !comment_started) return;
-
- comment_newlines++;
- }
-
- /* add some true text to the comment */
- void add_comment(s)
- const char *s;
- {
- #ifdef DEBUG
- fprintf(stderr,"`%s'\n", s);
- #endif
- if (!comment_remember) return;
-
- comment_started = TRUE;
-
- while (comment_newlines)
- {
- add_dynbuf('\n');
- comment_newlines--;
- }
-
- while(*s)
- add_dynbuf(*s++);
- }
-
- /* Scan rest of preprocessor statement.
- */
- static void
- get_cpp_directive ()
- {
- int c, lastc = '\0';
-
- while ((c = input()) > 0) {
- switch (c) {
- case '\n':
- if (lastc != '\\') {
- unput(c);
- BEGIN INITIAL;
- return;
- }
- line_num++;
- break;
- case '*':
- if (lastc == '/')
- {
- /* might be able to attach comments to #defines one day */
- comment_caller = YYSTATE;
- start_comment(TRUE);
- BEGIN COMMENT;
- }
- break;
- case '/':
- if (lastc == '/')
- {
- /* might be able to attach comments to #defines one day */
- comment_caller = YYSTATE;
- start_comment(TRUE);
- BEGIN CPPCOMMENT;
- }
- break;
- }
- lastc = c;
- }
- }
-
- /* Process include directive.
- */
- static void
- do_include (filename, sysinc)
- char *filename; /* file name */
- int sysinc; /* 1 = do not search current directory */
- {
- char path[MAX_TEXT_LENGTH];
- int i;
- FILE *fp;
- IncludeStack *sp;
-
- if (inc_depth >= MAX_INC_DEPTH) {
- output_error();
- fprintf(stderr, "includes too deeply nested\n");
- return;
- }
-
- for (i = sysinc != 0; i < num_inc_dir; ++i) {
- strcpy(path, inc_dir[i]);
- strcat(path, filename);
- if ((fp = fopen(path, "r")) != NULL) {
- sp = inc_stack + inc_depth;
- sp->file = cur_file;
- sp->line_num = line_num;
- #ifdef FLEX_SCANNER
- sp->buffer = YY_CURRENT_BUFFER;
- yy_switch_to_buffer(yy_create_buffer(fp, YY_BUF_SIZE));
- #else
- sp->fp = yyin;
- yyin = fp;
- #endif
- ++inc_depth;
- cur_file = strduplicate(filename);
- line_num = 0;
- return;
- }
- }
- }
-
- /* returns TRUE if the basefile status has changed */
- static boolean process_line_directive(new_file)
- const char *new_file;
- {
- boolean new_stdin;
-
- /* strip leading ./ that Sun acc prepends */
- if (!strncmp(new_file,"./",2))
- new_file += 2;
-
- new_stdin = new_file[0] == '\0' || !strcmp(new_file,"stdin");
-
- /* return BASEFILE token only when file changes */
- if ((cur_file == NULL && !new_stdin) ||
- (cur_file != NULL &&strcmp(cur_file, new_file)))
- {
- safe_free(cur_file);
- cur_file = new_stdin ? NULL : strduplicate(new_file);
- yylval.boolean = basefile ? !strcmp(cur_file,basefile) :
- cur_file == basefile;
- return TRUE;
- }
- return FALSE;
- }
-
- /* When the end of the current input file is reached, pop any
- * nested includes.
- */
- int
- yywrap ()
- {
- IncludeStack *sp;
-
- if (inc_depth > 0) {
- --inc_depth;
- sp = inc_stack + inc_depth;
- fclose(yyin);
- #ifdef FLEX_SCANNER
- yy_delete_buffer(YY_CURRENT_BUFFER);
- yy_switch_to_buffer(sp->buffer);
- #else
- yyin = sp->fp;
- #endif
- safe_free(cur_file);
- cur_file = sp->file;
- line_num = sp->line_num + 1;
- return 0;
- } else {
- return 1;
- }
- }
-
-
- static void new_dynbuf()
- {
- if ((dynbuf = malloc(dynbuf_size = DYNBUF_ALLOC)) == 0)
- outmem();
-
- dynbuf_current = 0;
- }
-
- static void add_dynbuf(c)
- int c;
- {
- if (dynbuf_current == dynbuf_size &&
- ((dynbuf = realloc(dynbuf,dynbuf_size += DYNBUF_ALLOC)) == 0))
- outmem();
-
- dynbuf[dynbuf_current++] = c;
- }
-
- static char *return_dynbuf()
- {
- add_dynbuf('\0');
-
- /* chop it back to size */
- if ((dynbuf = realloc(dynbuf,dynbuf_current)) == 0)
- outmem();
-
- return dynbuf;
- }
-
- /* Output an error message along with the current line number in the
- * source file.
- */
- void
- output_error ()
- {
- errors++;
- fprintf(stderr, "%s:%d: ", cur_file ? cur_file : "stdin", line_num);
- }
-