home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk C actions compiler
- */
- #include <stdio.h>
- #include "bawk.h"
-
- act_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = ACTION;
- return stmt_compile( actbuf );
- }
-
- pat_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = PATTERN;
- return stmt_compile( actbuf );
- }
-
- stmt_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- /*
- * Read and tokenize C actions from current input file into the
- * action buffer. Strip out comments and whitespace in the
- * process.
- */
- char *actptr, /* actbuf pointer */
- *cp, /* work pointer */
- buf[MAXLINELEN];/* string buffer */
- int braces, /* counts '{}' pairs - return when 0 */
- parens, /* counts '()' pairs */
- i, /* temp */
- c; /* current input character */
-
- braces = parens = 0;
- actptr = actbuf;
- while ( (c = getcharacter()) != -1 )
- {
- /*
- * Skip over spaces, tabs and newlines
- */
- if ( c==' ' || c=='\t' || c=='\n' )
- continue;
- if ( c=='#' )
- {
- /*
- * Skip comments. Comments start with a '#' and
- * end at the next newline.
- */
- while ( (c = getcharacter()) != -1 && c!='\n' )
- ;
- continue;
- }
-
- if ( c=='{' )
- {
- if ( Where==PATTERN )
- {
- /*
- * We're compiling a pattern. The '{' marks
- * the beginning of an action statement.
- * Push the character back and return.
- */
- ungetcharacter( '{' );
- break;
- }
- else
- {
- /*
- * We must be compiling an action statement.
- * '{'s mark beginning of action or compound
- * statements.
- */
- ++braces;
- *actptr++ = T_LBRACE;
- }
- }
- else if ( c=='}' )
- {
- *actptr++ = T_RBRACE;
- if ( ! --braces )
- /*
- * Found the end of the action string
- */
- break;
- }
- else if ( c=='(' )
- {
- ++parens;
- *actptr++ = T_LPAREN;
- }
- else if ( c==')' )
- {
- if ( --parens < 0 )
- error( "mismatched '()'", ACT_ERROR );
- *actptr++ = T_RPAREN;
- }
- else if ( c==',' && !braces && !parens && Where==PATTERN )
- {
- /*
- * found a comma outside of any braces or parens-
- * this must be a regular expression seperator.
- */
- ungetcharacter( ',' );
- break;
- }
-
- /*
- * Check if it's a regular expression:
- */
- else if ( c=='/' )
- {
- /*
- * A '/' inside a pattern string starts a regular
- * expression. Inside action strings, a '/' is
- * the division operator.
- */
- if ( Where == PATTERN )
- goto dopattern;
- else
- *actptr++ = T_DIV;
- }
- else if ( c=='@' )
- {
- dopattern:
- /*
- * Within action strings, only the '@' may be used to
- * delimit regular expressions
- */
- *actptr++ = T_REGEXP;
- ungetcharacter( c );
- actptr += re_compile( actptr );
- }
-
- /*
- * symbol, string or constant:
- */
- else if ( alpha( c ) )
- {
- /*
- * It's a symbol reference. Copy the symbol into
- * string buffer.
- */
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 && alphanum( c ) );
- ungetcharacter( c );
- *cp = 0;
- /*
- * Check if a keyword, builtin function or variable.
- */
- if ( c = iskeyword( buf ) )
- *actptr++ = c;
- else if ( i = isfunction( buf ) )
- {
- *actptr++ = T_FUNCTION;
- storeint( actptr, i );
- actptr += sizeof( i );
- }
- else
- {
- /*
- * It's a symbol name.
- */
- *actptr++ = T_VARIABLE;
- if ( !(cp = findvar( buf )) )
- cp = addvar( buf );
- storeptr( actptr, cp );
- actptr += sizeof( cp );
- }
- }
-
- else if ( c == '"' )
- {
- /*
- * It's a string constant
- */
- *actptr++ = T_STRING;
- actptr = str_compile( actptr, '"' );
- }
- else if ( c == '\'' )
- {
- /*
- * It's a character constant
- */
- *actptr++ = T_CONSTANT;
- str_compile( buf, '\'' );
- storeint( actptr, *buf );
- actptr += sizeof( i );
- }
-
- else if ( num( c ) )
- {
- /*
- * It's a numeric constant
- */
- *actptr++ = T_CONSTANT;
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 && num(c) );
- ungetcharacter( c );
- *cp = 0;
- storeint( actptr, atoi( buf ) );
- actptr += sizeof( i );
- }
-
- /*
- * unary operator:
- */
- else if ( c == '$' )
- *actptr++ = T_DOLLAR;
-
- /*
- * or binary operator:
- */
- else if ( c == '=' )
- {
- if ( (c=getcharacter()) == '=' )
- *actptr++ = T_EQ;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_ASSIGN;
- }
- }
-
- else if ( c == '!' )
- {
- if ( (c=getcharacter()) == '=' )
- *actptr++ = T_NE;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_LNOT;
- }
- }
-
- else if ( c == '<' )
- {
- if ( (c=getcharacter()) == '<' )
- *actptr++ = T_SHL;
- else if ( c == '=' )
- *actptr++ = T_LE;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_LT;
- }
- }
-
- else if ( c == '>' )
- {
- if ( (c=getcharacter()) == '>' )
- *actptr++ = T_SHR;
- else if ( c == '=' )
- *actptr++ = T_GE;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_GT;
- }
- }
-
- else if ( c == '&' )
- {
- if ( (c=getcharacter()) == '&' )
- *actptr++ = T_LAND;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_AND;
- }
- }
-
- else if ( c == '|' )
- {
- if ( (c=getcharacter()) == '|' )
- *actptr++ = T_LIOR;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_IOR;
- }
- }
- else if ( c == '+' )
- {
- if ( (c=getcharacter()) == '+' )
- *actptr++ = T_INCR;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_ADD;
- }
- }
-
- else if ( c == '-' )
- {
- if ( (c=getcharacter()) == '-' )
- *actptr++ = T_DECR;
- else
- {
- ungetcharacter( c );
- *actptr++ = T_SUB;
- }
- }
-
- /*
- * punctuation
- */
- else if ( instr( c, "[](),;*/%+-^~" ) )
- *actptr++ = c;
-
- else
- {
- /*
- * Bad character in input line
- */
- error( "lexical error", ACT_ERROR );
- }
-
- if ( actptr >= Workbuf + MAXWORKBUFLEN )
- error( "action too long", MEM_ERROR );
- }
- if ( braces || parens )
- error( "mismatched '{}' or '()'", ACT_ERROR );
-
- *actptr++ = T_EOF;
-
- return actptr - actbuf;
- }
-
- str_compile( str, delim )
- char *str, delim;
- {
- /*
- * Compile a string from current input file into the given string
- * buffer. Stop when input character is the delimiter in "delim".
- * Returns a pointer to the first character after the string.
- */
- int c;
- char buf[ MAXLINELEN ];
-
- while ( (c = getcharacter()) != -1 && c != delim)
- {
- if ( c == '\\' )
- {
- switch ( c = getcharacter() )
- {
- case -1: goto err;
- case 'b': c = '\b'; break;
- case 'n': c = '\n'; break;
- case 't': c = '\t'; break;
- case 'f': c = '\f'; break;
- case 'r': c = '\r'; break;
- case '0':
- case '1':
- case '2':
- case '3':
- *buf = c;
- for ( c=1; c<3; ++c )
- {
- if ( (buf[c]=getcharacter()) == -1 )
- goto err;
- }
- buf[c] = 0;
- sscanf( buf, "%o", &c );
- break;
- case '\n':
- if ( getcharacter() == -1 )
- goto err;
- default:
- if ( (c = getcharacter()) == -1 )
- goto err;
- }
- }
- *str++ = c;
- }
- *str++ = 0;
-
- return str;
- err:
- sprintf( buf, "missing %c delimiter", delim );
- error( buf, 4 );
- }
-
- storeint( ip, i )
- int *ip, i;
- {
- return *ip = i;
- }
-
- storeptr( pp, p )
- char **pp, *p;
- {
- return *pp = p;
- }
-
- fetchint( ip )
- int *ip;
- {
- return *ip;
- }
-
- char *
- fetchptr( pp )
- char **pp;
- {
- return *pp;
- }
-
- getoken()
- {
- char *cp;
- int i;
-
- switch ( Token = *Actptr++ )
- {
- case T_STRING:
- case T_REGEXP:
- Value.dptr = Actptr;
- Actptr += strlen( Actptr ) + 1;
- break;
- case T_VARIABLE:
- Value.dptr = fetchptr( Actptr );
- Actptr += sizeof( cp );
- break;
- case T_FUNCTION:
- case T_CONSTANT:
- Value.ival = fetchint( Actptr );
- Actptr += sizeof( i );
- break;
- case T_EOF:
- --Actptr;
- default:
- Value.dptr = 0;
- }
-
- #ifdef DEBUG
- if ( Debug > 1 )
- printf( "Token='%c' (0x%x), Value=%d\n",
- Token,Token,Value.ival );
- #endif
-
- return Token;
- }