home *** CD-ROM | disk | FTP | other *** search
Text File | 1991-10-07 | 72.7 KB | 2,162 lines |
- /*
- EPSHeader
-
- File: ctag.c
- Author: J. Kercheval
- Created: Sun, 07/14/1991 17:24:44
- */
- /*
- EPSRevision History
-
- J. Kercheval Sat, 07/27/1991 22:08:04 creation
- J. Kercheval Sun, 08/18/1991 20:58:13 completion of C_get_token()
- J. Kercheval Wed, 08/21/1991 22:34:49 place function recognition
- J. Kercheval Wed, 08/21/1991 23:11:17 add defines and macros
- J. Kercheval Wed, 08/21/1991 23:54:33 add typedef and class parsing
- J. Kercheval Thu, 08/22/1991 23:53:51 add global variables
- J. Kercheval Thu, 08/22/1991 23:54:05 add enum, struct, union
- J. Kercheval Thu, 08/22/1991 23:54:28 add globals via typedefs
- J. Kercheval Sun, 08/25/1991 23:09:28 complete semantic parser
- J. Kercheval Tue, 08/27/1991 23:28:34 fix bug in typedef, struct, enum and union declarations
- J. Kercheval Sat, 08/31/1991 23:58:03 add prototype parsing
- J. Kercheval Tue, 09/03/1991 22:28:55 move many macros to functions
- J. Kercheval Tue, 09/03/1991 23:05:34 clean code and consolidate to functions
- J. Kercheval Sun, 09/08/1991 13:24:53 minor bug fix in function and global variable parser
- J. Kercheval Sun, 09/08/1991 21:31:06 fix bug in lexical parser
- J. Kercheval Sun, 09/08/1991 23:44:46 \v is not a valid literal in Epsilon, remove it
- J. Kercheval Mon, 09/09/1991 21:49:00 fix bug in function parser
- J. Kercheval Mon, 09/09/1991 22:44:46 fix bug in define parser
- J. Kercheval Tue, 09/10/1991 22:06:09 fix typedef parser
- J. Kercheval Wed, 09/11/1991 02:04:48 add extern symbol recognition
- J. Kercheval Wed, 09/11/1991 19:49:11 fix bug in function pointer variable declaration
- J. Kercheval Wed, 09/11/1991 20:38:13 add support for function pointer variable declarations after first declaration
- J. Kercheval Wed, 09/11/1991 21:51:37 move #directive parsing between semantic and lexical parser
- J. Kercheval Thu, 09/12/1991 22:44:43 add support for #ifdef blocks to avoid unmatched parens in ToLevelZero parsing
- J. Kercheval Fri, 09/13/1991 01:17:05 add when_loading() to remap def_srch_case_map[]
- J. Kercheval Wed, 09/18/1991 22:05:02 fix bug in GetToken and DiscardLine
- J. Kercheval Thu, 09/19/1991 22:26:09 fix bug in lexical parser when parsing non C syntax files
- J. Kercheval Thu, 10/03/1991 12:47:53 add .cc and .cpp extensions
- J. Kercheval Thu, 10/03/1991 18:15:10 add support for Static declarations
- J. Kercheval Fri, 10/04/1991 11:13:23 add support for tagging enumeration constants
- J. Kercheval Mon, 10/07/1991 09:36:07 create CParseEnumerationConstants()
- */
-
- /*
- * This file implements tagging for .C, .H and .E files which contain
- * standard C and C++ syntax. This file defines no new commands and is
- * intended to work with the tags package included with V5.0 of Epsilon.
- * There is no problem using modified tags packages providing calls are made
- * to tags_suffix_???() routines in the same way Epsilon does this and that
- * an output routine add_tag() is used. All that should be required is to
- * compile and load this file and this module will be used transparently to
- * you. If you wish to costumize the types of tags output modify the global
- * variables CTagWant?????? which when TRUE allow the output of that type of
- * tag and when FALSE prohibit that type of tag.
- *
- * This module implements tagging for union, struct, enum, typedef, #define,
- * global variables, classes, prototypes and functions (all of which may be
- * specifically turned on and off. The performance cost for this level of
- * accuracyis not trivial. This parser knows a lot about the syntax of C and
- * takes a fair amount of time. You should expect to see a file complete
- * somewhere in the range of 125%-150% of the time as Lugaru's tagger for C.
- * This is not only because of the detail of the tags but also the larger
- * number of them. This tagger is not intended to do all of your work for
- * you but is designed to be used in conjunction with the tags generator I
- * have developed and is now available. This file implements the same
- * lexical and semantic parser as is found in that executable. Use the
- * executable in your make file for very fast and updated tags. If you have
- * problems finding it, contact me and I can point the way...
- *
- * There is defined at the end of this module a when_loading() function which
- * alters the default search case map to allow *correct* (or at least
- * consistent sorting with sort routines external to Epsilon. In particular,
- * to produce the same sort order as any UNIX, VMS or HP style sort or with
- * the tags generator this module is supposed to coexist with this mapping
- * must be done. You should see no difference in the location of sorted
- * buffers except for lines starting with ^, [, \, ] and _.
- *
- * This code is dedicated to the public domain with the caveat that Lugaru is
- * welcome to use this within their distribution source code which is
- * supplied with Epsilon.
- *
- * Good Tagging,
- *
- * jbk@wrq.com
- *
- * John Kercheval
- * 127 NW Bowdoin Pl #105
- * Seattle, WA 98107-4960
- * September 8, 1991
- */
-
- #include <eel.h>
-
- #define BOOLEAN int
- #define TRUE 1
- #define FALSE 0
-
- #define CBUFSIZE 4096
- #define MAX_TOKEN_LENGTH 4096
-
-
- /* the following variable determine the behavior of the parser with respect
- * to the token types which are output as a tag. Note that use of the
- * CTagWantExtern variable is a modifier and will only be effective when
- * other options are used (ie. CTagWantProtoType must be specified to obtain
- * extern prototypes, CTagWantExtern alone yields nothing). Note also that
- * the CTagWantExtern modifier has no effect for function, define and macro
- * tags which are tagged only according only to the CTagWantFunction,
- * CTagWantDefine and CTagWantMacro variables respectively. CTagWantStatic
- * is also a modifier and will allow tags of internal statically defined
- * variables and other declarations. CTagWantStatic also has no effect on
- * Define and Macro tags.
- */
- BOOLEAN CTagWantFunction = TRUE;
- BOOLEAN CTagWantProtoType = FALSE;
- BOOLEAN CTagWantStructure = TRUE;
- BOOLEAN CTagWantTypeDefinition = TRUE;
- BOOLEAN CTagWantMacro = TRUE;
- BOOLEAN CTagWantEnumeration = TRUE;
- BOOLEAN CTagWantEnumerationConstant = TRUE;
- BOOLEAN CTagWantUnion = TRUE;
- BOOLEAN CTagWantGlobalVariable = TRUE;
- BOOLEAN CTagWantClass = TRUE;
- BOOLEAN CTagWantDefine = TRUE;
- BOOLEAN CTagWantExtern = FALSE;
- BOOLEAN CTagWantStatic = TRUE;
-
-
- /* function for determining if character is whitespace */
- #define IsWhite(c) ( _C_white_boolean_table[c] )
-
- /* the indexed table for white space character lookup */
- BOOLEAN _C_white_boolean_table[256];
-
- /* list of whitespace characters */
- char C_white[] = " \f\t\n\r";
-
-
- /* function for determining if character is a delimiter */
- #define IsDelim(c) ( _C_delim_boolean_table[c] )
-
- /* the indexed table for token delimiter lookup */
- BOOLEAN _C_delim_boolean_table[256];
-
- /* list of token delimiters */
- char C_delim[] = " \f\t\n\r\"[](){}#;:,.'=-+*/%&|^~!<>?";
-
-
- /* function for determining if character is a puncuator */
- #define IsPunctuator(c) ( _C_punctuator_boolean_table[c] )
-
- /* the indexed table for punctuator character lookup */
- BOOLEAN _C_punctuator_boolean_table[256];
-
- /* list of punctuators */
- char C_declaration_delim[] = "[](){},;=";
-
-
- char C_open_brace[] = "{[("; /* open brace set */
- char C_close_brace[] = ")]}"; /* close brace set */
-
-
- /*
- * These defines are used to denote the type of the current tag
- */
- #define NOP 0
- #define Function 1
- #define ProtoType 2
- #define Structure 3
- #define TypeDefinition 4
- #define Macro 5
- #define Enumeration 6
- #define EnumerationConstant 7
- #define Union 8
- #define GlobalVariable 9
- #define Class 10
- #define Define 11
- #define Extern 12
- #define Static 13
-
- /* convenient definition */
- typedef int SymbolType;
-
-
- /* the current file buffer state */
- typedef struct BufferStruct {
- int token_line_location; /* current token line in buffer */
- char *inbuf; /* the buffer currently being parsed */
- } Buffer;
-
-
- /* the current input token state */
- typedef struct TokenStruct {
- char sbuf1[MAX_TOKEN_LENGTH]; /* the first token buffer */
- int charloc1; /* the char location of sbuf1 */
- int tokenline1; /* the line number of sbuf1 */
-
- char sbuf2[MAX_TOKEN_LENGTH]; /* the second token buffer */
- int charloc2; /* the char location of sbuf2 */
- int tokenline2; /* the line number of sbuf2 */
-
- char *cur_token; /* pointer to the current token buffer */
- int *cur_char_location; /* the location of current token */
- int *cur_token_line; /* the line of the current token */
-
- char *prev_token; /* pointer to the last token buffer */
- int *prev_char_location; /* the location of previous token */
- int *prev_token_line; /* the line of the previous token */
-
- int token_count; /* temporary variable, used by ToPunctuator */
- int else_nesting_level; /* the current nesting level */
-
- BOOLEAN extern_active; /* minor state for this statement */
- BOOLEAN static_active; /* minor state for this statement */
- } Token;
-
-
- #define SYMBOL_SIZE 20
-
- /* a list of known C tokens and keywords */
- char C_token_list[][SYMBOL_SIZE] =
- {
- "*ivclsdfuaretp_hn", /* list of starting characters of symbols
- * below */
- "*", /* pointer */
- "int", /* integer declaration */
- "void", /* void type */
- "char", /* character */
- "long", /* long integer */
- "short", /* short integer */
- "double", /* double floating point */
- "float", /* floating point */
- "signed", /* signed integer */
- "unsigned", /* unsigned integer */
- "auto", /* auto variable (local duration) */
- "register", /* register variable */
- "static", /* static variable */
- "struct", /* structure define */
- "union", /* union define */
- "enum", /* enum defined */
- "typedef", /* type definition */
- "const", /* constant variable */
- "extern", /* external declaration */
- "class", /* class declaration */
- "friend", /* class modifier */
- "private", /* class modifier */
- "protected", /* class modifier */
- "public", /* class modifier */
- "volatile", /* Compiler warning */
- "_based", /* pointer type */
- "_cdecl", /* parameter calling sequence, C style */
- "cdecl", /* parameter calling sequence, C style */
- "_far", /* pointer type */
- "far", /* pointer type */
- "_huge", /* pointer type */
- "huge", /* pointer type */
- "_near", /* pointer type */
- "near", /* pointer type */
- "_pascal", /* parameter calling sequence, PASCAL style */
- "pascal", /* parameter calling sequence, PASCAL style */
- "_fortran", /* parameter calling sequence, FORTRAN style */
- "_fastcall", /* parameter calling sequence, via registers */
- "\0"
- };
-
-
- /*----------------------------------------------------------------------------
- *
- * CParserInit() initializes the tables required by the parser. The tables
- * used are a simple boolean index which are true if the character
- * corresponding to the index is a member of the associated table.
- *
- ---------------------------------------------------------------------------*/
-
- CParserInit()
- {
- char *s;
- int i;
-
- /* init the entire block to FALSE */
- for (i = 0; i < 256; i++) {
- _C_delim_boolean_table[i] = FALSE;
- _C_white_boolean_table[i] = FALSE;
- _C_punctuator_boolean_table[i] = FALSE;
- }
-
- /* set the characters in the delim set to TRUE */
- for (s = C_delim; *s; s++) {
- _C_delim_boolean_table[*s] = TRUE;
- }
-
- /* set the characters in the white set to TRUE */
- for (s = C_white; *s; s++) {
- _C_white_boolean_table[*s] = TRUE;
- }
-
- /* set the characters in the punctuator set to TRUE */
- for (s = C_declaration_delim; *s; s++) {
- _C_punctuator_boolean_table[*s] = TRUE;
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * strchr() is the standard string library function strchr()
- *
- ---------------------------------------------------------------------------*/
-
- char *strchr(s, c)
- char *s;
- char c;
- {
- char *ret = s;
-
- while (*ret) {
- if (*ret == c)
- return ret;
- ret++;
- }
-
- if (*ret == c)
- return ret;
-
- return NULL;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * FillBuffer() fills the passed buffer parameter with bufsize characters
- * (or as many as are available) and places and null character '\0' at the
- * end of the buffer. This routine returns TRUE if successful and FALSE if
- * eof(infile) is true. Note: if a bufsize parameter is passed and the read
- * is successful for bufsize characters, then buffer[bufsize] will be
- * overwritten with the null character. Do not pass a bufsize the maximum
- * size of the buffer. This null terminated buffering scheme assumes the
- * source file has no null character embedded within it.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN FillBuffer(inbuf, ctag_buffer, bufsize)
- char *inbuf;
- char *ctag_buffer;
- int bufsize;
- {
- char *old_buf;
- int new_point;
-
- /* init buffer */
- *ctag_buffer = '\0';
-
- /* go to the input buffer */
- old_buf = bufname;
- bufname = inbuf;
-
- /* return if end of buffer */
- if (point == size())
- return FALSE;
-
- /* read the buffer from the file */
- if (point + bufsize > size()) {
- new_point = size();
- }
- else {
- new_point = point + bufsize;
- }
- grab(point, new_point, ctag_buffer);
-
- /* place the end of buffer mark, adjust point and return success */
- ctag_buffer[new_point - point] = '\0';
- point = new_point;
- return TRUE;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CSymbolWanted() returns true if flags are true for the symbol type passed
- * and false otherwise.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CSymbolWanted(type)
- SymbolType type;
- {
- switch (type) {
- case Function:
- return CTagWantFunction;
- break;
- case ProtoType:
- return CTagWantProtoType;
- break;
- case GlobalVariable:
- return CTagWantGlobalVariable;
- break;
- case Define:
- return CTagWantDefine;
- break;
- case Macro:
- return CTagWantMacro;
- break;
- case Structure:
- return CTagWantStructure;
- break;
- case TypeDefinition:
- return CTagWantTypeDefinition;
- break;
- case Enumeration:
- return CTagWantEnumeration;
- break;
- case EnumerationConstant:
- return CTagWantEnumerationConstant;
- break;
- case Union:
- return CTagWantUnion;
- break;
- case Class:
- return CTagWantClass;
- break;
- default:
- return FALSE;
- break;
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CTokenType() takes the token passed and determines if the token is a
- * special token. Special tokens require specialized handling in the parser.
- * The function returns the type of token according to the SymbolTypeEnum
- * enumeration. This routine can only tell so much from one symbol but will
- * return some type for all the *interesting* tokens. Anything that is
- * loosely defined is given back with the closest type available and the
- * parser must give it contextual meaning
- *
- ---------------------------------------------------------------------------*/
-
- SymbolType CTokenType(token)
- char *token;
- {
- /* look for dirty rejection */
- if (!strchr("cestu#", token[0]))
- return NOP;
-
- /* macro and non macro defines */
- if (!strcmp(token, "#"))
- return Define;
-
- /* structure declarations */
- if (!strcmp(token, "struct"))
- return Structure;
-
- /* type declaration */
- if (!strcmp(token, "typedef"))
- return TypeDefinition;
-
- /* enumeration declaration */
- if (!strcmp(token, "enum"))
- return Enumeration;
-
- /* union declaration */
- if (!strcmp(token, "union"))
- return Union;
-
- /* class declaration */
- if (!strcmp(token, "class"))
- return Class;
-
- /* external declaration */
- if (!strcmp(token, "extern"))
- return Extern;
-
- /* static declaration */
- if (!strcmp(token, "static"))
- return Static;
-
- /* do not recognize it as anything special */
- return NOP;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CIsDeclarationToken() takes the token passed and determines if the token
- * is a declaration keyword used in C. The user may define new declaration
- * keywords via use of the typedef keyword. This alters the syntax of C. If
- * the syntax is changed in this way it is probable that this routine would
- * not return the correct value. For the standard uses of this routine that
- * information should not hinder performance for the vast majority of the
- * cases.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CIsDeclarationToken(token)
- char *token;
- {
- int index;
-
- /* look for dirty rejection */
- if (!strchr(C_token_list[0], token[0]))
- return FALSE;
-
- /* march through array until membership is determined */
- for (index = 1; *C_token_list[index]; (index)++) {
-
- /* return true if token found */
- if (!strcmp(token, C_token_list[index])) {
- return TRUE;
- }
- }
-
- /* did not find it */
- return FALSE;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * COutputToken() will output a token of a given type. The token is output
- * if the passed token type is requested from the command line.
- *
- ---------------------------------------------------------------------------*/
-
- COutputToken(token, token_buffer, token_type, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- SymbolType token_type;
- char *infname;
- char *outbuf;
- {
- /* check that the symbol is wanted and output it if so */
- if (CSymbolWanted(token_type)) {
-
- if (token->extern_active) {
- if (!CTagWantExtern) {
- if (token_type != Function &&
- token_type != Define &&
- token_type != Macro) {
- return;
- }
- }
- }
- /* return if statics are not wanted */
- if (token->static_active) {
- if (!CTagWantStatic) {
- if (token_type != Define &&
- token_type != Macro) {
- return;
- }
- }
- }
- add_tag(token->prev_token, infname,
- *(token->prev_char_location) -
- strlen(token->prev_token));
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CGetToken() will obtain the next token in the line pointed to by lptr
- * and in addition will return FALSE if EOL is reached or a comment character
- * is the first non whitespace character found. This routine is passed an
- * inbut buffer (Cbuf) and a current pointer into the buffer. It is the
- * responsibility of this routine to refill the buffer if required. Quoted
- * strings and single quoted characters are returned as a single token.
- * Comments are completely ignored by this parser. The token will not exceed
- * max_token_length - 1 in length (not including the end of line delimiter)
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CGetToken(inbuf, token, max_token_length, line_number)
- char *inbuf;
- char *token;
- int max_token_length;
- int *line_number;
- {
- /* a state of the lexical parser */
- #define Parse 0
- #define BeginCommentMaybe 1
- #define InComment 2
- #define InCommentEndMaybe 3
- #define InCPPComment 4
- #define InQuoteNormal 5
- #define InQuoteLiteral 6
- #define InSingleQuoteNormal 7
- #define InSingleQuoteLiteral 8
- #define EndSingleQuote 9
- #define WhiteSpace 10
- #define Exit 11
-
- typedef int State;
-
- State current_state; /* the current state of the parser */
-
- char c; /* the current character being examined */
- char *t; /* pointer into token */
-
- int token_length; /* the current token_length cannot exceed
- * max token length */
-
- /* init */
- current_state = WhiteSpace;
- t = token;
- *t = '\0';
- token_length = 0;
-
- /* parse the file for the next token */
- while (TRUE) {
-
- /* if the buffer has been completely used, return FALSE */
- if (point == size())
- return FALSE;
-
- c = curchar();
- point++;
-
- /* react on the state machine */
- switch (current_state) {
-
- case Parse:
- switch (c) {
-
- case '/':
-
- /* return if we already have a token */
- if (t != token) {
- point--;
- current_state = Exit;
- }
- else {
- /* this may be the begin if a comment or the
- * division symbol, read the next character after
- * verifying it the buffer doesn't need refilling */
- current_state = BeginCommentMaybe;
- *t = c;
- }
- break;
-
- case '\"':
-
- /* return if we already have a token */
- if (t != token) {
- point--;
- current_state = Exit;
- }
- else {
- current_state = InQuoteNormal;
- *t++ = c;
- token_length++;
- }
- break;
-
- case '\'':
-
- /* return if we already have a token */
- if (t != token) {
- point--;
- current_state = Exit;
- }
- else {
- current_state = InSingleQuoteNormal;
- *t++ = c;
- token_length++;
- }
- break;
-
- default:
-
- /* if it is a delimiter than stop processing */
- if (IsDelim(c)) {
-
- /* if a token exists then back up in buffer */
- if (t != token) {
- point--;
- }
- else {
- *t++ = c;
- token_length++;
- }
- current_state = Exit;
- }
- else {
-
- /* normal character, store it in the token */
- *t++ = c;
- token_length++;
- }
- break;
- }
- break;
-
- case WhiteSpace:
-
- /* pass over whitespace, backup one char if no longer in
- * white space region */
- if (!IsWhite(c)) {
- current_state = Parse;
- point--;
- }
- else {
-
- /* check for newline */
- if (c == '\n') {
- (*line_number)++;
- }
- }
- break;
-
- case BeginCommentMaybe:
- switch (c) {
-
- case '/':
- current_state = InCPPComment;
- break;
-
- case '*':
- current_state = InComment;
- break;
-
- default:
- t++;
- token_length++;
- point--;
- current_state = Exit;
- break;
- }
- break;
-
- case InComment:
- switch (c) {
-
- case '*':
- /* this is potentially the end of the comment */
- current_state = InCommentEndMaybe;
- break;
-
- case '\n':
- /* new line just increment state variables */
- (*line_number)++;
- break;
-
- default:
- break;
- }
- break;
-
- case InCommentEndMaybe:
- switch (c) {
-
- case '/':
- /* this is indeed the end of the comment */
- current_state = WhiteSpace;
- break;
-
- case '*':
- /* this is also perhaps the end of comment */
- break;
-
- case '\n':
- /* new line just increment state variables */
- (*line_number)++;
-
- default:
- /* still part of the current comment */
- current_state = InComment;
- break;
- }
- break;
-
- case InCPPComment:
- if (c == '\n') {
- current_state = WhiteSpace;
- (*line_number)++;
- }
- break;
-
- case InQuoteNormal:
- switch (c) {
-
- case '\"':
- /* end of InQuoteNormal state */
- current_state = Exit;
- break;
-
- case '\\':
- /* InQuoteLiteral state */
- current_state = InQuoteLiteral;
- break;
-
- default:
- /* normal dull behavior */
- break;
- }
- *t++ = c;
- token_length++;
- break;
-
- case InQuoteLiteral:
- /* this char is simply copied */
- current_state = InQuoteNormal;
- *t++ = c;
- token_length++;
- break;
-
- case InSingleQuoteNormal:
- switch (c) {
-
- case '\\':
- /* InQuoteLiteral state */
- current_state = InSingleQuoteLiteral;
- break;
-
- default:
- /* Just copy the character and move to close quote */
- current_state = EndSingleQuote;
- break;
- }
- *t++ = c;
- token_length++;
- break;
-
- case InSingleQuoteLiteral:
- /* this char is simply copied */
- current_state = EndSingleQuote;
- *t++ = c;
- token_length++;
- break;
-
- case EndSingleQuote:
-
- /* end of InSingleQuote states */
- current_state = Exit;
- *t++ = c;
- token_length++;
- break;
-
- case Exit:
- *t = '\0';
- point--;
- return TRUE;
- break;
-
- default: /* not reached */
- break;
- }
-
- /* if the token_length has gotten too large then return */
- if (token_length == max_token_length - 1) {
- *t = '\0';
- point--;
- return TRUE;
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CFillToken() will obtain the next lexical parser from the buffer and move
- * the token into the Token structure. TRUE is returned if the lexical
- * parser returns TRUE, otherwise FALSE is returned.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CFillToken(token, token_buffer)
- Token *token;
- Buffer *token_buffer;
- {
- BOOLEAN token_found;
-
- /* obtain the next token */
- token_found = CGetToken(token_buffer->inbuf,
- token->cur_token,
- MAX_TOKEN_LENGTH,
- &(token_buffer->token_line_location));
-
- /* if one is around then update the state for that token */
- if (token_found) {
- /* update location variables */
- *(token->cur_char_location) = point;
- *(token->cur_token_line) = token_buffer->token_line_location;
- }
-
- return token_found;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CTokenSwap() will swap the token variables and set the prev_ variables
- * correctly
- *
- ---------------------------------------------------------------------------*/
-
- CTokenSwap(token)
- Token *token;
- {
- char *charswap; /* temporary swap variable */
- int *longintswap; /* temporary swap variable */
-
- /* swap the active token string */
- charswap = token->cur_token;
- token->cur_token = token->prev_token;
- token->prev_token = charswap;
-
- /* swap the active character location */
- longintswap = token->cur_char_location;
- token->cur_char_location = token->prev_char_location;
- token->prev_char_location = longintswap;
-
- /* swap the active line */
- longintswap = token->cur_token_line;
- token->cur_token_line = token->prev_token_line;
- token->prev_token_line = longintswap;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CDiscardLine() will move past all the characters up to the next EOL that
- * is not preceded by a line continuation character. This routine will
- * return TRUE if there was a '(' character as the first character. This
- * return value is useful for determining if #defines are macros or simple
- * defines.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CDiscardLine(inbuf, line_number)
- char *inbuf;
- int *line_number;
- {
- char c; /* the current character being examined */
-
- BOOLEAN line_continue; /* TRUE if line continuation true */
- BOOLEAN is_macro; /* TRUE if the first delimiter char is '(' */
- BOOLEAN first_char; /* TRUE when first character is active */
-
- /* init */
- c = '\0';
- line_continue = FALSE;
- is_macro = FALSE;
- first_char = TRUE;
-
- /* if the end of buffer is reached then return */
- if (point == size())
- return is_macro;
-
- /* loop until non continued EOL encountered */
- do {
-
- /* determine if the first character is a '(' */
- if (first_char) {
- if (c == '(')
- is_macro = TRUE;
- first_char = FALSE;
- }
-
- /* handle the newline */
- if (c == '\n') {
- line_continue = FALSE;
- (*line_number)++;
- }
-
- c = curchar();
- point++;
-
- if (c == '\\')
- line_continue = TRUE;
-
-
- } while (c != '\n' || line_continue);
-
- (*line_number)++;
- return is_macro;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseDefine() will parse macros and defines in standard C syntax
- * distinguish between a macro and a define, if there is a punctuator '(',
- * then it is a macro. Take the token just before the first space or
- * punctuator
- *
- ---------------------------------------------------------------------------*/
-
- CParseDefine(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- SymbolType tmptype; /* a temporay type variable */
-
- BOOLEAN token_found;
- BOOLEAN is_macro;
-
- token_found = CFillToken(token, token_buffer);
- if (token_found) {
-
- /* save the previous values */
- CTokenSwap(token);
-
-
- /* get rid of the rest of the line and return the define type */
- is_macro =
- CDiscardLine(token_buffer->inbuf,
- &(token_buffer->token_line_location));
-
- /* react on the token */
- if (is_macro) {
- tmptype = Macro;
- }
- else {
- tmptype = Define;
- }
-
- /* output the token */
- COutputToken(token, token_buffer, tmptype,
- infname, outbuf);
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParsePreprocessorDirective() will parse preprocessor directives in
- * standard C syntax
- *
- ---------------------------------------------------------------------------*/
-
- CParsePreprocessorDirective(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
-
- token_found = CFillToken(token, token_buffer);
- if (token_found) {
-
- /* deal with a define directive */
- if (!strcmp(token->cur_token, "define")) {
- CParseDefine(token, token_buffer, infname, outbuf);
- }
- else {
-
- /* increment the else block level pointer */
- if (!strcmp(token->cur_token, "else")) {
- token->else_nesting_level++;
- }
- else {
-
- /* decrement the else block level pointer */
- if (!strcmp(token->cur_token, "endif")) {
- if (token->else_nesting_level)
- token->else_nesting_level--;
- }
- else {
-
- /* if an else has not already been seen then increment
- * the level */
- if (!strcmp(token->cur_token, "elif")) {
- token->else_nesting_level++;
- }
- }
- }
-
- /* remove the rest of the directive line including line
- * continuation characters */
- CDiscardLine(token_buffer->inbuf,
- &(token_buffer->token_line_location));
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CNextToken() will obtain the next token in the buffer and update the
- * appropriate variables.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CNextToken(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
- BOOLEAN cycle;
-
- do {
- /* obtain the next token */
- token_found = CFillToken(token, token_buffer);
-
- /* check for preprocessing directives and parse them if found */
- if (token->cur_token[0] == '#' && token_found) {
-
- /* parse the directive and loop back to get another token */
- CParsePreprocessorDirective(token, token_buffer, infname, outbuf);
- cycle = TRUE;
- }
- else {
-
- /* we found a token to pass to the semantic parser */
- cycle = FALSE;
- }
- } while (cycle);
-
- /* return it */
- return token_found;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CToLevelZero() will increment the nesting level and then parse tokens
- * until level zero has been reached again. If tokens are no longer
- * available this loop will stop.
- *
- ---------------------------------------------------------------------------*/
-
- CToLevelZero(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- int nesting_level = 1;
-
- token->else_nesting_level = 0;
-
- while (nesting_level) {
- if (CGetToken(token_buffer->inbuf, token->cur_token,
- MAX_TOKEN_LENGTH,
- &(token_buffer->token_line_location))) {
- if (token->cur_token[0] == '#') {
- CParsePreprocessorDirective(token, token_buffer,
- infname, outbuf);
- }
- else {
-
- /* only count open brace, parens and brackets within blocks
- * of one element of an ifdef code block */
- if (!token->else_nesting_level) {
- if (strchr(C_open_brace, token->cur_token[0]))
- nesting_level++;
- else
- if (strchr(C_close_brace, token->cur_token[0]))
- nesting_level--;
- }
- }
- }
- else
- nesting_level = 0;
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CToPunctuator() will parse tokens until the next punctuator has been
- * reached. If tokens are no longer available this loop will stop. If this
- * loop is successful the found flag declared in the host routine will be
- * set.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CToPunctuator(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN punctuator_found;
-
- /* init and parse through until the first punctuator is found */
- token->token_count = 0;
- punctuator_found = FALSE;
- while (!punctuator_found) {
- token->token_count++;
- CTokenSwap(token);
- if (!CNextToken(token, token_buffer, infname, outbuf)) {
- break;
- }
- else {
- if (IsPunctuator(token->cur_token[0]))
- punctuator_found = TRUE;
- }
- }
-
- /* return value */
- return punctuator_found;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseParens() will move through a declaration in parentheses and place
- * the correct valid token as prev_token. This return TRUE if a '[' was seen
- * within the parens and false otherwise.
- *
- ---------------------------------------------------------------------------*/
-
- BOOLEAN CParseParens(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
- BOOLEAN variable_seen;
- int brace_ignore = 1;
-
- token->else_nesting_level = 0;
-
- token_found = TRUE;
- variable_seen = FALSE;
- while (brace_ignore &&
- token_found) {
-
- token_found = CNextToken(token, token_buffer, infname, outbuf);
-
- if (token_found &&
- !token->else_nesting_level) {
- switch (token->cur_token[0]) {
-
- case '(':
-
- /* increment brace_ignore and continue */
- brace_ignore++;
- break;
-
- case ')':
-
- /* just decrement brace_ignore if it is positive. If
- * brace ignore is not positive at this point then we
- * certainly have a syntax error. Ignore this fact if
- * so. */
- if (brace_ignore) {
- brace_ignore--;
- }
- break;
-
- case '[':
-
- /* move to end of array bounds */
- variable_seen = TRUE;
- CToLevelZero(token, token_buffer, infname, outbuf);
- break;
-
- default:
- CTokenSwap(token);
- break;
- }
- }
- }
-
- return variable_seen;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * COutputCommaDelimitedToken() will output a token and then parse the
- * statement until ';' or ',' is reached. The token is output if the passed
- * token type is requested from the command line.
- *
- ---------------------------------------------------------------------------*/
-
- COutputCommaDelimitedToken(token, token_buffer, token_type, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- SymbolType token_type;
- char *infname;
- char *outbuf;
- {
- BOOLEAN punctuator_found;
-
- /* output the token */
- COutputToken(token, token_buffer, token_type, infname, outbuf);
-
- /* go to the next list punctuator (',' or ';') */
- punctuator_found = TRUE;
- while (token->cur_token[0] != ',' &&
- token->cur_token[0] != ';' &&
- punctuator_found) {
- if (strchr(C_open_brace, token->cur_token[0])) {
- CToLevelZero(token, token_buffer, infname, outbuf);
- }
- punctuator_found = CToPunctuator(token, token_buffer,
- infname, outbuf);
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseCommaDelimitedList() will parse a token list seperated by commas
- * until a ';' is found. The tokens are output if the passed type is
- * requested from the command line.
- *
- ---------------------------------------------------------------------------*/
-
- CParseCommaDelimitedList(token, token_buffer, token_type, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- SymbolType token_type;
- char *infname;
- char *outbuf;
- {
- BOOLEAN punctuator_found;
-
- /* parse through the list */
- punctuator_found = TRUE;
- while (token->cur_token[0] != ';' &&
- punctuator_found) {
- punctuator_found = CToPunctuator(token, token_buffer,
- infname, outbuf);
- if (punctuator_found) {
- switch (token->cur_token[0]) {
-
- case '(':
- /* this is an embedded variable declaration, either a
- * complex variable pointer or function pointer, fall
- * through after picking out the internal token */
- CParseParens(token, token_buffer, infname, outbuf);
-
- case '[':
- case ',':
- case ';':
- case '=':
-
- /* this is one of the proper ending tokens for this type
- * of declaration list, so output it and parse to the
- * next correct punctuator */
- COutputToken(token, token_buffer, token_type,
- infname, outbuf);
- while (token->cur_token[0] != ',' &&
- token->cur_token[0] != ';' &&
- punctuator_found) {
- if (strchr(C_open_brace, token->cur_token[0])) {
- CToLevelZero(token, token_buffer,
- infname, outbuf);
- }
- punctuator_found = CToPunctuator(token, token_buffer,
- infname, outbuf);
- }
- break;
- default:
- break;
- }
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseFunctionOrGlobalVariable() will parse a function, prototype or
- * global variable syntax.
- *
- ---------------------------------------------------------------------------*/
-
- CParseFunctionOrGlobalVariable(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- char *line_buf; /* the first token buffer */
- int charloc; /* the char location of sbuf1 */
- int tokenline; /* the line number of sbuf1 */
-
- BOOLEAN token_found;
- BOOLEAN punctuator_found;
- BOOLEAN last_token_known;
- BOOLEAN variable_seen;
-
- /* init */
- charloc = 0;
- tokenline = 1;
- line_buf = malloc(MAX_TOKEN_LENGTH);
- line_buf[0] = '\0';
-
- /* save the previous token */
- last_token_known = CIsDeclarationToken(token->prev_token);
- if (!last_token_known) {
-
- /* If this is not a known token then it may be a function name. Save
- * it then look further at the syntax. This also may be a symbol
- * previously defined via a typedef which alters the syntax of C/C++ */
- strcpy(line_buf, token->prev_token);
- charloc = *(token->prev_char_location);
- tokenline = *(token->prev_token_line);
- }
-
- /* This is a function or prototype or global variable go to brace_ignore
- * level zero again. */
- variable_seen = CParseParens(token, token_buffer, infname, outbuf);
-
- /* Check to see if this is a function, prototype, or global variable. If
- * the token is a ';' and last_token_known is false then we assume a
- * function. Strange variable declarations may fool this, but not
- * likely. If the character is a '(' then it is certainly a function or
- * prototype unless variable_seen is TRUE, then it is a variable. If the
- * character is a '[', ',' then it is certainly a variable declaration.
- * If the character is a ';' and last_token_known is true then it is a
- * variable declaration. If the token is anything else then it is a
- * function. */
- token_found = CNextToken(token, token_buffer, infname, outbuf);
- if (token_found) {
- switch (token->cur_token[0]) {
-
- case ';':
-
- /* determine if a prototype or a variable declaration. if the
- * last_token_known is true then it is a global variable. If
- * the token was a symbol defined by a typedef then this
- * distinction is incorrect since typedef actually alters
- * syntax. This is correct for the large majority of cases
- * since most do not enclose simple variable declarations in
- * parens. */
- if (last_token_known) {
-
- /* this is a global variable */
- COutputToken(token, token_buffer, GlobalVariable,
- infname, outbuf);
- }
- else {
-
- /* this is a prototype, copy saved token back to
- * prev_token, output and continue */
- strcpy(token->prev_token, line_buf);
- *(token->prev_char_location) = charloc;
- *(token->prev_token_line) = tokenline;
- COutputToken(token, token_buffer, ProtoType,
- infname, outbuf);
- }
- break;
-
- case '(':
-
- if (variable_seen) {
-
- /* this is a variable declaration */
- COutputCommaDelimitedToken(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- }
- else {
-
- /* move to level zero again */
- CToLevelZero(token, token_buffer, infname, outbuf);
-
- /* obtain the next token */
- token_found = CNextToken(token, token_buffer,
- infname, outbuf);
-
- if (token_found) {
-
- /* check if prototype, function or function pointer
- * variable declaration */
- switch (token->cur_token[0]) {
-
- case '=':
-
- /* this is a function pointer variable
- * declaration */
- COutputCommaDelimitedToken(token,
- token_buffer,
- GlobalVariable,
- infname, outbuf);
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- break;
-
- case ';':
-
- /* this is a prototype, output it */
- COutputToken(token, token_buffer,
- ProtoType, infname, outbuf);
- break;
-
- default:
-
- /* this is a function */
- COutputToken(token, token_buffer,
- Function, infname, outbuf);
-
- /* parse through function */
- punctuator_found = TRUE;
- while (token->cur_token[0] != '{' &&
- punctuator_found) {
- punctuator_found =
- CToPunctuator(token, token_buffer,
- infname, outbuf);
- }
- if (punctuator_found) {
- CToLevelZero(token, token_buffer,
- infname, outbuf);
- }
- break;
- }
- }
- }
- break;
-
- case '[':
- case '=':
- case ',':
-
- /* global variables */
- COutputCommaDelimitedToken(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- break;
-
- default:
-
- /* this is a function, copy saved token back to prev_token,
- * output and continue */
- strcpy(token->prev_token, line_buf);
- *(token->prev_char_location) = charloc;
- *(token->prev_token_line) = tokenline;
- COutputToken(token, token_buffer,
- Function, infname, outbuf);
-
- /* parse through function */
- punctuator_found = TRUE;
- while (token->cur_token[0] != '{' &&
- punctuator_found) {
- punctuator_found =
- CToPunctuator(token, token_buffer, infname, outbuf);
- }
- if (punctuator_found) {
- CToLevelZero(token, token_buffer, infname, outbuf);
- }
- break;
- }
- }
- free(line_buf);
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseNOP() will parse an as of yet unrecognized statement. If I run into
- * a punctuator at this time then I have found either a structure declaration
- * (C++ 2.0), or a global variable declaration. If the punctuator is '[',
- * ',', '=', or ';' then it is a global variable declaration. If the
- * punctuator is a '{' then we have a structure declaration at this time we
- * should not run into any closing punctuators or syntax is in a bad way
- *
- ---------------------------------------------------------------------------*/
-
- CParseNOP(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
-
- switch (token->cur_token[0]) {
- case ';':
- case '=':
- case ',':
- case '[':
-
- /* global variables are here */
- COutputCommaDelimitedToken(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- token->extern_active = FALSE;
- token->static_active = FALSE;
- break;
-
- case '{':
-
- /* this is a structure (C++ syntax) */
- /* output it */
- COutputToken(token, token_buffer, Structure, infname, outbuf);
-
- /* move through declaration */
- CToLevelZero(token, token_buffer, infname, outbuf);
-
- /* get the next token */
- token_found = CNextToken(token, token_buffer, infname, outbuf);
-
- /* if a token is available then output the list */
- if (token_found) {
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- }
- token->extern_active = FALSE;
- token->static_active = FALSE;
- break;
-
- case '(':
-
- CParseFunctionOrGlobalVariable(token, token_buffer,
- infname, outbuf);
- token->extern_active = FALSE;
- token->static_active = FALSE;
- break;
-
- default:
-
- /* true NOP */
- break;
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseEnumerationConstants() will parse constants within an enumeration
- * declaration
- *
- ---------------------------------------------------------------------------*/
-
- CParseEnumerationConstants(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN punctuator_found;
-
- char *open_brace = "({[";
-
- /* obtain the enumeration constants */
- punctuator_found = TRUE;
-
- while (token->cur_token[0] != '}' &&
- punctuator_found) {
- punctuator_found = CToPunctuator(token, token_buffer,
- infname, outbuf);
- if (punctuator_found) {
- switch (token->cur_token[0]) {
-
- case ',':
- case '=':
-
- /* this is one of the proper ending tokens for this type
- * of declaration list, so output it and parse to the
- * next correct punctuator */
- COutputToken(token, token_buffer, EnumerationConstant,
- infname, outbuf);
- while (token->cur_token[0] != ',' &&
- token->cur_token[0] != '}' &&
- punctuator_found) {
- if (strchr(open_brace, token->cur_token[0])) {
- CToLevelZero(token, token_buffer,
- infname, outbuf);
- }
- punctuator_found = CToPunctuator(token, token_buffer,
- infname, outbuf);
- }
- break;
-
- default:
- break;
- }
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseDeclarationStatement() will parse struct, enum and union
- * declarations. take the token just before the first punctuator, run
- * through the top level braces and parse for variables if the first
- * punctuator is a ';' then this is a global variable declaration, if the
- * first token[0] is a '{' then this is a global variable declaration.
- *
- ---------------------------------------------------------------------------*/
-
- CParseDeclarationStatement(token, token_buffer, type, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- SymbolType type;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
- BOOLEAN punctuator_found;
- BOOLEAN primary_parse;
-
- punctuator_found = CToPunctuator(token, token_buffer, infname, outbuf);
- if (punctuator_found) {
-
- /* init */
- primary_parse = TRUE;
-
- /* switch on current token */
- switch (token->cur_token[0]) {
-
- /* this is truly an object declaration */
- case '{':
-
- /* output only if this is not a variable declaration */
- if (token->token_count != 1) {
-
- /* output it */
- COutputToken(token, token_buffer, type, infname, outbuf);
- }
-
- /* check if enumeration */
- if (token->token_count != 1 &&
- type == Enumeration) {
-
- /* obtain the enumeration constants */
- CParseEnumerationConstants(token, token_buffer,
- infname, outbuf);
- }
- else {
-
- /* move through declaration and fall through */
- CToLevelZero(token, token_buffer, infname, outbuf);
- }
-
- /* get the next token, if one not available then break out of
- * case */
- token_found = CNextToken(token, token_buffer,
- infname, outbuf);
- if (!token_found)
- break;
-
- /* fall through to take care of variable declarations after
- * setting pre-parse flag */
- primary_parse = FALSE;
-
- case ';':
- case '=':
- case ',':
- case '[':
-
- /* if this is the first seen then output it */
- if (primary_parse) {
- COutputCommaDelimitedToken(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- }
-
- CParseCommaDelimitedList(token, token_buffer,
- GlobalVariable,
- infname, outbuf);
- break;
-
- case '(':
-
- CParseFunctionOrGlobalVariable(token, token_buffer,
- infname, outbuf);
- break;
-
- default:
-
- /* not reached */
- break;
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseTypeDefinition() parses the typedef statement. take the token just
- * before the first *correct* punctuator, the ';', ',' or the '['. Tag any
- * declarations being done here, get the next token
- *
- ---------------------------------------------------------------------------*/
-
- CParseTypeDefinition(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
- BOOLEAN parens_found;
- BOOLEAN special_found;
- BOOLEAN punctuator_found;
-
- int token_count;
- SymbolType tmptype;
-
- token_found = CNextToken(token, token_buffer, infname, outbuf);
-
- if (token_found) {
-
- /* check the type of the token for future use */
- tmptype = CTokenType(token->cur_token);
-
- /* parse the typedef */
- parens_found = FALSE;
- special_found = FALSE;
- token_count = 0;
- while (token->cur_token[0] != ';' &&
- token->cur_token[0] != ',' &&
- token->cur_token[0] != '[' &&
- token_found &&
- !special_found) {
-
- /* parse for defines */
- if (token_found) {
-
- /* handle the punctuator */
- switch (token->cur_token[0]) {
-
- case '{':
-
- /* pass through any defines going on here */
- if (token->cur_token[0] == '{') {
-
- /* if the token count is > 1 here then we have a
- * named declaration and need to output the
- * token, output only if the token type is enum,
- * struct, or union */
- if (token_count > 1 &&
- (tmptype == Structure ||
- tmptype == Enumeration ||
- tmptype == Union)) {
- COutputToken(token, token_buffer,
- tmptype, infname, outbuf);
- }
-
- /* check if enumeration */
- if (tmptype == Enumeration) {
-
- /* obtain the enumeration constants */
- CParseEnumerationConstants(token,
- token_buffer,
- infname, outbuf);
- }
- else {
-
- /* go back to level 0 */
- CToLevelZero(token, token_buffer,
- infname, outbuf);
- }
- }
- break;
-
- case '(':
-
- /* if this is the top level and we have already been
- * through a set of parens then we know this to be a
- * function typedef so we ouput the previous token,
- * otherwise check the previous token and if it is a
- * known keyword then just eat the token and continue */
- if (parens_found) {
- COutputToken(token, token_buffer,
- TypeDefinition, infname, outbuf);
- CToLevelZero(token, token_buffer,
- infname, outbuf);
- special_found = TRUE;
- }
- else {
-
- /* Move back to the top level */
- CParseParens(token, token_buffer,
- infname, outbuf);
-
- /* next paren we find we know we have a token */
- parens_found = TRUE;
-
- /* swap to prevent loss of token */
- CTokenSwap(token);
- }
- break;
-
- default:
-
- /* if we have another token after a paren parse then
- * we know the token in the parens was nothing
- * special */
- parens_found = FALSE;
- break;
- }
- }
-
- /* get another token */
- CTokenSwap(token);
- token_found = CNextToken(token, token_buffer, infname, outbuf);
- token_count++;
- }
-
- /* output the typedef names if appropriate */
- if (token->prev_token[0] != '}' &&
- token_found) {
-
- /* don't output the first token if already done */
- if (!special_found) {
- COutputCommaDelimitedToken(token, token_buffer,
- TypeDefinition,
- infname, outbuf);
- }
-
- /* parse through the rest of the typedef names */
- CParseCommaDelimitedList(token, token_buffer,
- TypeDefinition,
- infname, outbuf);
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CParseClass() will parse the C++ class syntax. take the token just before
- * the first '{', ',' or ':' and run through the top level braces if there
- *
- ---------------------------------------------------------------------------*/
-
- CParseClass(token, token_buffer, infname, outbuf)
- Token *token;
- Buffer *token_buffer;
- char *infname;
- char *outbuf;
- {
- BOOLEAN token_found;
-
- token_found = TRUE;
- while (token->cur_token[0] != '{' &&
- token->cur_token[0] != ':' &&
- token->cur_token[0] != ';' &&
- token_found) {
-
- /* save the current token */
- CTokenSwap(token);
-
- /* get the next token */
- token_found = CNextToken(token, token_buffer, infname, outbuf);
- }
-
- /* output the class name */
- if (token_found) {
- COutputToken(token, token_buffer, Class, infname, outbuf);
-
- /* parse through the remainder of the statement */
- while (token->cur_token[0] != ';' &&
- token_found) {
- if (token->cur_token[0] == '{') {
-
- /* move back to the zero level */
- CToLevelZero(token, token_buffer, infname, outbuf);
- }
-
- token_found = CNextToken(token, token_buffer, infname, outbuf);
- }
- }
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * CTags() tags an input stream assuming standard ANSI 2.0 C/C++ syntax.
- * Long tokens are allowed, ANSI requires only 31 significant, note that if
- * token length exceeds MAX_TOKEN_LENGTH this parser will die a horrible
- * death (or at the very least do ugly things to someone else's memory),
- * with the large size of MAX_TOKEN_LENGTH, anyone caught on this hook
- * deserves what they get...
- *
- ---------------------------------------------------------------------------*/
-
- CTags(inbuf, infname, outbuf)
- char *inbuf;
- char *infname;
- char *outbuf;
- {
- SymbolType type; /* the type of the current token */
-
- Token *token; /* current state variable */
- Buffer *token_buffer; /* input buffer */
-
- BOOLEAN token_found; /* set by CNextToken() */
-
- /* allocate the Buffer and Token memory */
- token = (Token *) malloc(sizeof(Token));
- token_buffer = (Buffer *) malloc(sizeof(Token));
-
- /* init the parser engine */
- point = 0;
- CParserInit();
- token->token_count = 0;
-
- /* init the current token buffers */
- token->cur_token = token->sbuf1;
- token->cur_char_location = &(token->charloc1);
- token->cur_token_line = &(token->tokenline1);
- token->cur_token[0] = '\0';
- *(token->cur_char_location) = 0;
- *(token->cur_token_line) = 1;
-
- /* init the previous token buffers */
- token->prev_token = token->sbuf2;
- token->prev_char_location = &(token->charloc2);
- token->prev_token_line = &(token->tokenline2);
- token->prev_token[0] = '\0';
- *(token->prev_char_location) = 0;
- *(token->prev_token_line) = 1;
-
- /* init the input buffers */
- token_buffer->token_line_location = 1;
- token_buffer->inbuf = inbuf;
-
- /* init Extern and Static state */
- token->extern_active = FALSE;
- token->static_active = FALSE;
-
- /* get the first token */
- token_found = CNextToken(token, token_buffer, infname, outbuf);
-
- /* loop through the file */
- while (token_found) {
-
- /* obtain the token type */
- type = CTokenType(token->cur_token);
-
- /* react on the token type */
- switch (type) {
-
- case NOP:
- CParseNOP(token, token_buffer, infname, outbuf);
- break;
-
- case Structure:
- case Enumeration:
- case Union:
- CParseDeclarationStatement(token, token_buffer,
- type, infname, outbuf);
- break;
-
- case TypeDefinition:
- CParseTypeDefinition(token, token_buffer, infname, outbuf);
- break;
-
- case Class:
- CParseClass(token, token_buffer, infname, outbuf);
- break;
-
- case Extern:
- token->extern_active = TRUE;
- break;
-
- case Static:
- token->static_active = TRUE;
- break;
-
- default:
- /* not reached */
- break;
- }
-
- if (type != Extern &&
- type != Static &&
- type != NOP) {
-
- /* turn off the extern flag */
- token->extern_active = FALSE;
- token->static_active = FALSE;
- }
-
- /* swap state variables and get the next token */
- CTokenSwap(token);
- token_found = CNextToken(token, token_buffer, infname, outbuf);
- }
-
- free(token);
- free(token_buffer);
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * tag_suffix_c(), tag_suffix_h() and tag_suffix_e() are recognized procedure
- * names to the tags package in Epsilon and will be called automatically when
- * tagging needs to happen for these extensions. These are replacement names
- * for the routines of the same name defined in tags.e.
- *
- ---------------------------------------------------------------------------*/
-
- tag_suffix_c()
- {
- /* the third parameter, the output buffer name is not actually used by
- * anyone but is left here for a time when this information may be
- * needed. The current algorithm is to let the funtion add_tag() decide
- * the buffer name to send the output to. As a little more than
- * coincedence, the name used here is the same used in add_tag() defined
- * in tags.e */
- CTags(bufname, filename, "-tags");
- }
-
- tag_suffix_h()
- {
- tag_suffix_c();
- }
-
- tag_suffix_e()
- {
- tag_suffix_c();
- }
-
- tag_suffix_cc()
- {
- tag_suffix_c();
- }
-
- tag_suffix_cpp()
- {
- tag_suffix_c();
- }
-
- /* rebuild the default character maps */
- when_loading()
- {
- #define UCLC(up, low) def_char_class[low] = C_LOWER, \
- def_char_class[up] = C_UPPER, \
- def_srch_case_map[up] = low, \
- def_case_map[low] = up, \
- def_case_map[up] = low
-
- int i, j;
-
- for (i = 0; i < 256; i++)
- def_case_map[i] = def_srch_case_map[i] = i;
- for (i = 'A', j = 'a'; i <= 'Z'; i++, j++)
- UCLC(i, j);
- for (i = 131; i < 154; i++)
- def_char_class[i] = C_LOWER;
- for (i = 160; i < 164; i++)
- def_char_class[i] = C_LOWER;
- UCLC('Ç', 'ç');
- UCLC('Ä', 'ä');
- UCLC('Å', 'å');
- UCLC('É', 'é');
- UCLC('Æ', 'æ');
- UCLC('Ö', 'ö');
- UCLC('Ü', 'ü');
- UCLC('Ñ', 'ñ');
- }
-