home *** CD-ROM | disk | FTP | other *** search
Text File | 1988-07-17 | 25.9 KB | 1,054 lines |
-
- /*
- * gawk -- GNU version of awk
- * Copyright (C) 1986 Free Software Foundation
- * Written by Paul Rubin, August 1986
- *
- * Modified by Andrew D. Estes, July 1988
- */
-
- /*
- GAWK is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY. No author or distributor accepts responsibility to anyone
- for the consequences of using it or for whether it serves any
- particular purpose or works at all, unless he says so in writing.
- Refer to the GAWK General Public License for full details.
-
- Everyone is granted permission to copy, modify and redistribute GAWK,
- but only under the conditions described in the GAWK General Public
- License. A copy of this license is supposed to have been given to you
- along with GAWK so you can know your rights and responsibilities. It
- should be in a file named COPYING. Among other things, the copyright
- notice and this notice must be preserved on all copies.
-
- In other words, go ahead and share GAWK, but don't try to stop
- anyone else from sharing it farther. Help stamp out software hoarding!
- */
-
- %{
- #define YYDEBUG 12
-
- #include <stdio.h>
- #include <string.h>
- #include "awk.h"
-
- static int yylex ();
-
-
- /*
- * The following variable is used for a very sickening thing.
- * The awk language uses white space as the string concatenation
- * operator, but having a white space token that would have to appear
- * everywhere in all the grammar rules would be unbearable.
- * It turns out we can return CONCAT_OP exactly when there really
- * is one, just from knowing what kinds of other tokens it can appear
- * between (namely, constants, variables, or close parentheses).
- * This is because concatenation has the lowest priority of all
- * operators. want_concat_token is used to remember that something
- * that could be the left side of a concat has just been returned.
- *
- * If anyone knows a cleaner way to do this (don't look at the Un*x
- * code to find one, though), please suggest it.
- */
- static int want_concat_token;
-
- /* Two more horrible kludges. The same comment applies to these two too */
- static int want_regexp; /* lexical scanning kludge */
- static int want_redirect; /* similarly */
- int lineno = 1; /* JF for error msgs */
-
- /* Speaking of kludges. We don't want to treat arguments as filenames
- ** if there are no pattern action pairs to perform; sooo I am creating
- ** a counter for patterns and actions. -ADE */
- int patterns = 0;
- int actions = 0;
- /* During parsing of a gawk program, the pointer to the next character
- is in this variable. */
- char *lexptr; /* JF moved it up here */
- char *lexptr_begin; /* JF for error msgs */
- %}
-
- %union {
- long lval;
- AWKNUM fval;
- NODE *nodeval;
- NODETYPE nodetypeval;
- char *sval;
- NODE *(*ptrval)();
- }
-
- %type <nodeval> expr start program rule pattern regex opt_argument_expr_list
- %type <nodeval> action redirection argument_expr_list iteration_statement
- %type <nodeval> statement if_statement output_statement expression_statement
- %type <nodeval> opt_exp compound_statement statement_list concat_expr
- %type <nodeval> primary_expr postfix_expr unary_expr arith_expr mult_expr
- %type <nodeval> cond_expr assign_expr primary_pattern and_expr or_expr
- %type <nodetypeval> whitespace
-
- %token <sval> NAME REGEXP YSTRING
- %token <lval> ERROR INCDEC
- %token <fval> NUMBER
- %token <nodetypeval> ASSIGNOP RELOP MATCHOP NEWLINE REDIRECT_OP CONCAT_OP
- %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE
- %token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE
- %token <nodetypeval> LEX_GETLINE LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT
- %token LEX_IN
- %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
- %token <ptrval> LEX_BUILTIN LEX_SUB
-
- /* these are just yylval numbers */
- /* %token <lval> CHAR JF this isn't used anymore */
-
- /* Lowest to highest */
- %left ','
- %left LEX_OR
- %left LEX_AND
- %right ASSIGNOP
- %left CONCAT_OP
- %left '+' '-'
- %left '*' '/'
- %left '%'
- %left '^'
- %right UNARY
- %nonassoc MATCHOP RELOP INCREMENT DECREMENT
-
- %%
-
- start
- : optional_newlines program
- { expression_value = $2; }
- ;
-
-
- program
- : rule
- { $$ = node ($1, Node_rule_list,(NODE *) NULL); }
- | program rule
- /* cons the rule onto the tail of list */
- { $$ = append_right ($1, node($2, Node_rule_list,(NODE *) NULL)); }
- ;
-
- rule
- : pattern action NEWLINE optional_newlines
- {
- ++patterns;
- ++actions;
- $$ = node ($1, Node_rule_node, $2);
- }
- ;
-
-
- primary_pattern
- : /* EMPTY */
- { $$ = NULL; }
- | LEX_BEGIN
- {
- --patterns;
- --actions;
- $$ = node ((NODE *)NULL, Node_K_BEGIN,(NODE *) NULL);
- }
- | LEX_END
- { $$ = node ((NODE *)NULL, Node_K_END,(NODE *) NULL); }
- | expr
- { $$ = $1; }
- ;
-
- pattern
- : primary_pattern
- { $$ = $1; }
- | regex
- { $$ = node (node (make_number((AWKNUM)0), Node_field_spec,
- (NODE *)NULL), Node_match, $1); }
- | '!' primary_pattern %prec UNARY
- { $$ = node ($2, Node_not,(NODE *) NULL); }
- | primary_pattern LEX_AND pattern
- { $$ = node ($1, Node_and, $3); }
- | primary_pattern LEX_OR pattern
- { $$ = node ($1, Node_or, $3); }
- | '(' pattern ')'
- {
- $$ = $2;
- want_concat_token = 0;
- }
- | pattern MATCHOP regex
- { $$ = node($1, Node_match, $3); }
- | pattern MATCHOP primary_pattern
- { $$ = node($1, Node_match, $3); }
- | pattern ',' pattern
- { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } /*jfw*/
- ;
-
-
- action
- : /* empty */
- { --actions; $$ = NULL; }
- | compound_statement
- { $$ = $1; }
- ;
-
-
- statement
- : compound_statement optional_newlines
- { $$ = $1; }
- | expression_statement
- { $$ = $1; }
- | if_statement
- { $$ = $1; }
- | iteration_statement
- { $$ = $1; }
- | output_statement
- { $$ = $1; }
- ;
-
- compound_statement
- : '{' optional_newlines statement_list '}'
- { $$ = node ($3, Node_statement_list, (NODE *)NULL); }
- ;
-
- statement_list
- : statement
- { $$ = node ($1, Node_statement_list, (NODE *)NULL); }
- | statement_list statement
- { $$ = append_right($1,
- node($2, Node_statement_list, (NODE *)NULL)); }
- ;
-
- expression_statement
- : ';' optional_newlines
- { $$ = (NODE *)NULL; }
- | expr statement_term
- { $$ = node ($1, Node_statement_list, (NODE *)NULL); }
- ;
-
-
- if_statement
- : LEX_IF '(' expr ')' optional_newlines statement
- { $$ = node ($3, Node_K_if,
- node ($6, Node_if_branches, (NODE *)NULL)); }
- | LEX_IF '(' expr ')' optional_newlines statement
- LEX_ELSE optional_newlines statement
- { $$ = node ($3, Node_K_if,
- node ($6, Node_if_branches, $9)); }
- ;
-
-
- iteration_statement
- : LEX_WHILE '(' expr ')'
- { want_concat_token = 0; }
- optional_newlines statement
- { $$ = node ($3, Node_K_while, $7); }
- | LEX_FOR '(' opt_exp ';' expr ';' opt_exp ')'
- { want_concat_token = 0; }
- optional_newlines statement
- { $$ = node ($11, Node_K_for, (NODE *)make_for_loop ($3, $5, $7)); }
- | LEX_FOR '(' opt_exp ';' ';' opt_exp ')'
- { want_concat_token = 0; }
- optional_newlines statement
- { $$ = node ($10, Node_K_for,
- (NODE *)make_for_loop ($3, (NODE *)NULL, $6)); }
- | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')'
- { want_concat_token = 0; }
- optional_newlines statement
- { $$ = node ($10, Node_K_arrayfor,
- (NODE *)make_for_loop(variable($3), (NODE *)NULL, variable($6))); }
- | LEX_BREAK statement_term
- /* for break, maybe we'll have to remember where to break to */
- { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
- | LEX_CONTINUE statement_term
- { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
- | LEX_NEXT statement_term
- { $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
- | LEX_EXIT statement_term
- { $$ = node ((NODE *)NULL, Node_K_exit, (NODE *)NULL); }
- | LEX_EXIT '(' expr ')' statement_term
- { $$ = node ($3, Node_K_exit, (NODE *)NULL); }
- ;
-
-
- output_statement
- : LEX_PRINT
- { ++want_redirect; want_concat_token = 0; }
- opt_argument_expr_list redirection statement_term
- {
- want_redirect = 0;
- /* $4->lnode = NULL; */
- $$ = node ($3, Node_K_print, $4);
- }
- | LEX_PRINTF
- { ++want_redirect; want_concat_token = 0}
- opt_argument_expr_list redirection statement_term
- {
- want_redirect = 0;
- /* $4->lnode = NULL; */
- $$ = node ($3, Node_K_printf, $4);
- }
- | LEX_PRINTF '(' argument_expr_list ')'
- { ++want_redirect;
- want_concat_token = 0; }
- redirection statement_term
- {
- want_redirect = 0;
- $$ = node ($3, Node_K_printf, $6);
- }
- ;
-
-
- optional_newlines
- : /* EMPTY */
- | optional_newlines NEWLINE
- { $<nodetypeval>$ = Node_illegal; }
- ;
-
- statement_term
- : NEWLINE optional_newlines
- { $<nodetypeval>$ = Node_illegal; }
- | ';' optional_newlines
- { $<nodetypeval>$ = Node_illegal; }
- ;
-
- regex
- : '/'
- { ++want_regexp; }
- REGEXP '/'
- {
- want_regexp = 0;
- $$ = make_regex($3);
- }
- ;
-
- redirection
- : /* EMPTY */
- { $$ = NULL; /* node (NULL, Node_redirect_nil, NULL); */ }
- | REDIRECT_OP expr
- { $$ = node ($2, $1, (NODE *)NULL); }
- ;
-
-
- /* optional expression, as in for loop */
- opt_exp
- : /* EMPTY */
- { $$ = NULL; /* node(NULL, Node_builtin, NULL); */ }
- | expr
- { $$ = $1; }
- ;
-
- opt_argument_expr_list
- : /* EMPTY */
- { $$ = NULL; }
- | argument_expr_list
- { $$ = $1; }
- ;
-
- primary_expr
- : NAME
- { $$ = variable ($1); }
- | NUMBER
- { $$ = make_number($1); }
- | YSTRING
- { $$ = make_string ($1, -1); }
- | LEX_BUILTIN '(' opt_argument_expr_list ')'
- { ++want_concat_token; $$ = snode ($3, Node_builtin, $1); }
- | LEX_BUILTIN
- { ++want_concat_token; $$ = snode ((NODE *)NULL, Node_builtin, $1); }
- | LEX_SUB '(' regex ',' argument_expr_list ')'
- { want_concat_token;
- $$ = snode(node($3, Node_expression_list, $5), Node_builtin, $1); }
- | LEX_GETLINE
- { ++want_redirect; }
- opt_exp redirection
- {
- want_redirect = 0;
- $$ = node($3, Node_K_getline, $4);
- }
- | '(' expr ')'
- { $$ = $2; }
- ;
-
- postfix_expr :
- primary_expr
- { $$ = $1; }
- | NAME '[' expr ']'
- { $$ = node (variable($1), Node_subscript, $3); }
- | postfix_expr INCREMENT
- { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
- | postfix_expr DECREMENT
- { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
- ;
-
- argument_expr_list
- : assign_expr
- { $$ = node ($1, Node_expression_list, (NODE *)NULL); }
- | argument_expr_list ',' optional_newlines assign_expr
- { $$ = append_right($1,
- node ($4, Node_expression_list, (NODE *)NULL)); }
- ;
-
- unary_expr
- : postfix_expr
- { $$ = $1; }
- | INCREMENT unary_expr
- { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
- | DECREMENT unary_expr
- { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
- | '-' unary_expr %prec UNARY
- { $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
- | '$' unary_expr %prec UNARY
- { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
- ;
-
- mult_expr
- : unary_expr
- { $$ = $1; }
- | unary_expr '^' mult_expr
- { $$ = node ($1, Node_pow, $3); }
- | unary_expr '%' mult_expr
- { $$ = node ($1, Node_mod, $3); }
- | unary_expr '*' mult_expr
- { $$ = node ($1, Node_times, $3); }
- | unary_expr '/' mult_expr
- { $$ = node ($1, Node_quotient, $3); }
- ;
-
- arith_expr
- : mult_expr
- { $$ = $1; }
- | mult_expr '+' arith_expr
- { $$ = node ($1, Node_plus, $3); }
- | mult_expr '-' arith_expr
- { $$ = node ($1, Node_minus, $3); }
- ;
-
- concat_expr
- : arith_expr
- { $$ = $1; }
- | arith_expr CONCAT_OP concat_expr
- { $$ = node($1, Node_concat, $3); }
- | arith_expr RELOP concat_expr
- { $$ = node($1, $2, $3); }
- ;
-
- and_expr
- : concat_expr
- { $$ = $1; }
- | concat_expr LEX_AND concat_expr
- { $$ = node ($1, Node_and, $3); }
- ;
-
- or_expr
- : and_expr
- { $$ = $1; }
- | and_expr LEX_OR and_expr
- { $$ = node ($1, Node_or, $3); }
- ;
-
- cond_expr
- : or_expr
- { $$ = $1; }
- | or_expr '?' or_expr ':' or_expr
- { $$ = node ($1, Node_cond_exp, node($3, Node_illegal, $5)); }
- ;
-
- assign_expr
- : cond_expr
- { $$ = $1; }
- | concat_expr ASSIGNOP assign_expr
- { $$ = node ($1, $2, $3); }
- ;
-
- expr
- : assign_expr
- { $$ = $1; }
- ;
-
- whitespace
- : /* EMPTY */
- { $$ = Node_illegal; }
- | CONCAT_OP
- | NEWLINE
- | whitespace CONCAT_OP
- | whitespace NEWLINE
- ;
-
- %%
-
-
- struct token {
- char *operator;
- NODETYPE value;
- int class;
- NODE *(*ptr)();
- };
-
- #ifndef NULL
-
- #define NULL 0
-
- #endif
-
- NODE *do_atan2(),*do_close(), *do_cos(), *do_exp(), *do_getline(),
- *do_gsub(), *do_index(), *do_length(), *do_log(), *do_match(),
- *do_rand(), *do_sin(), *do_sqrt(),
- *do_srand(), *do_sprintf(), *do_sub(), *do_substr(), *do_system(),
- *do_split(), *do_int();
-
- /* Special functions for debugging */
- #ifndef FAST
- NODE *do_prvars(), *do_bp();
- #endif
-
- /* Tokentab is sorted ascii ascending order, so it can be binary searched. */
- /* (later. Right now its just sort of linear search (SLOW!!) */
-
- #define END(s) (s-1 + sizeof(s)/sizeof(s[0]))
-
- static struct token tokentab[] = {
- {"BEGIN", Node_illegal, LEX_BEGIN, 0},
- {"END", Node_illegal, LEX_END, 0},
- {"atan2", Node_builtin, LEX_BUILTIN, do_atan2},
- #ifndef FAST
- {"bp", Node_builtin, LEX_BUILTIN, do_bp},
- #endif
- {"break", Node_K_break, LEX_BREAK, 0},
- {"close", Node_builtin, LEX_BUILTIN, do_close},
- {"continue", Node_K_continue, LEX_CONTINUE, 0},
- {"cos", Node_builtin, LEX_BUILTIN, do_cos},
- {"else", Node_illegal, LEX_ELSE, 0},
- {"exit", Node_K_exit, LEX_EXIT, 0},
- {"exp", Node_builtin, LEX_BUILTIN, do_exp},
- {"for", Node_K_for, LEX_FOR, 0},
- {"getline", Node_K_getline, LEX_GETLINE, do_getline},
- {"gsub", Node_builtin, LEX_SUB, do_gsub},
- {"if", Node_K_if, LEX_IF, 0},
- {"in", Node_illegal, LEX_IN, 0},
- {"index", Node_builtin, LEX_BUILTIN, do_index},
- {"int", Node_builtin, LEX_BUILTIN, do_int},
- {"length", Node_builtin, LEX_BUILTIN, do_length},
- {"log", Node_builtin, LEX_BUILTIN, do_log},
- {"match", Node_builtin, LEX_BUILTIN, do_match},
- {"next", Node_K_next, LEX_NEXT, 0},
- {"print", Node_K_print, LEX_PRINT, 0},
- {"printf", Node_K_printf, LEX_PRINTF, 0},
- #ifndef FAST
- {"prvars", Node_builtin, LEX_BUILTIN, do_prvars},
- #endif
- {"rand", Node_builtin, LEX_BUILTIN, do_rand},
- {"sin", Node_builtin, LEX_BUILTIN, do_sin},
- {"split", Node_builtin, LEX_BUILTIN, do_split},
- {"sprintf", Node_builtin, LEX_BUILTIN, do_sprintf},
- {"srand", Node_builtin, LEX_BUILTIN, do_srand},
- {"sqrt", Node_builtin, LEX_BUILTIN, do_sqrt},
- {"sub", Node_builtin, LEX_SUB, do_sub},
- {"substr", Node_builtin, LEX_BUILTIN, do_substr},
- {"system", Node_builtin, LEX_BUILTIN, do_system},
- {"while", Node_K_while, LEX_WHILE, 0},
- {NULL, Node_illegal, ERROR, 0}
- };
-
- /* Read one token, getting characters through lexptr. */
-
- static int
- yylex ()
- {
- register int c;
- register int namelen;
- register char *tokstart;
- register struct token *toktab, *low, *high, *mid;
- int dif;
- double atof(); /* JF know what happens if you forget this? */
-
- static did_newline = 0; /* JF the grammar insists that actions end
- with newlines. This was easier than hacking
- the grammar. */
- int do_concat;
-
- int seen_e = 0; /* These are for numbers */
- int seen_point = 0;
-
- retry:
-
- if(!lexptr)
- return 0;
-
- if (want_regexp) {
- want_regexp = 0;
- /* there is a potential bug if a regexp is followed by an equal sign:
- "/foo/=bar" would result in assign_quotient being returned as the
- next token. Nothing is done about it since it is not valid awk,
- but maybe something should be done anyway. */
-
- tokstart = lexptr;
- while (c = *lexptr++) {
- switch (c) {
- case '\\':
- if (*lexptr++ == '\0') {
- yyerror ("unterminated regexp ends with \\");
- return ERROR;
- }
- break;
- case '/': /* end of the regexp */
- lexptr--;
- yylval.sval = tokstart;
- return REGEXP;
- case '\n':
- case '\0':
- yyerror ("unterminated regexp");
- return ERROR;
- }
- }
- }
- do_concat=want_concat_token;
- want_concat_token=0;
-
- if(*lexptr=='\0') {
- lexptr=0;
- return NEWLINE;
- }
-
- /* if lexptr is at white space between two terminal tokens or parens,
- it is a concatenation operator. */
- if(do_concat && (*lexptr==' ' || *lexptr=='\t')) {
- while (*lexptr == ' ' || *lexptr == '\t')
- lexptr++;
- if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '('
- || *lexptr == '.' || *lexptr == '$') /* the '.' is for decimal pt */
- return CONCAT_OP;
- }
-
- while (*lexptr == ' ' || *lexptr == '\t')
- lexptr++;
-
- tokstart = lexptr; /* JF */
-
- switch (c = *lexptr++) {
- case 0:
- return 0;
-
- case '\n':
- lineno++;
- return NEWLINE;
-
- case '#': /* it's a comment */
- while (*lexptr != '\n' && *lexptr != '\0')
- lexptr++;
- goto retry;
-
- case '\\':
- if(*lexptr=='\n') {
- lexptr++;
- goto retry;
- } else break;
- case ')':
- case ']':
- ++want_concat_token;
- /* fall through */
- case '(': /* JF these were above, but I don't see why they should turn on concat. . . &*/
- case '[':
-
- case '{':
- case ',': /* JF */
- case '$':
- case ';':
- case ':':
- case '?':
- /* set node type to ILLEGAL because the action should set it to
- the right thing */
- yylval.nodetypeval = Node_illegal;
- return c;
-
- case '^':
- if (*lexptr=='=') {
- yylval.nodetypeval=Node_assign_pow;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '*':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_times;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '/':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_quotient;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '%':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_mod;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '+':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_plus;
- lexptr++;
- return ASSIGNOP;
- }
- if(*lexptr=='+') {
- yylval.nodetypeval=Node_illegal;
- lexptr++;
- return INCREMENT;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '!':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_notequal;
- lexptr++;
- return RELOP;
- }
- if(*lexptr=='~') {
- yylval.nodetypeval=Node_nomatch;
- lexptr++;
- return MATCHOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '<':
- if (want_redirect) {
- yylval.nodetypeval = Node_redirect_input;
- return REDIRECT_OP;
- }
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_leq;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_less;
- return RELOP;
-
- case '=':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_equal;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_assign;
- return ASSIGNOP;
-
- case '>':
- if(want_redirect) {
- if (*lexptr == '>') {
- yylval.nodetypeval = Node_redirect_append;
- lexptr++;
- } else
- yylval.nodetypeval = Node_redirect_output;
- return REDIRECT_OP;
- }
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_geq;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_greater;
- return RELOP;
-
- case '~':
- yylval.nodetypeval=Node_match;
- return MATCHOP;
-
- case '}':
- if (did_newline)
- {
- did_newline = 0;
- return c;
- }
- did_newline++;
- --lexptr;
- return NEWLINE;
-
- case '"':
- while (*lexptr != '\0') {
- switch (*lexptr++) {
- case '\\':
- if (*lexptr++ != '\0')
- break;
- /* fall through */
- case '\n':
- yyerror ("unterminated string");
- return ERROR;
- case '\"':
- yylval.sval = tokstart + 1; /* JF Skip the doublequote */
- ++want_concat_token;
- return YSTRING;
- }
- }
- return ERROR; /* JF this was one level up, wrong? */
-
- case '-':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_minus;
- lexptr++;
- return ASSIGNOP;
- }
- if(*lexptr=='-') {
- yylval.nodetypeval=Node_illegal;
- lexptr++;
- return DECREMENT;
- }
- /* JF I think space tab comma and newline are the legal places for
- a UMINUS. Have I missed any? */
- if((!isdigit(*lexptr) && *lexptr!='.') || (lexptr>lexptr_begin+1 &&
- !index(" \t,\n",lexptr[-2]))) {
- /* set node type to ILLEGAL because the action should set it to
- the right thing */
- yylval.nodetypeval = Node_illegal;
- return c;
- }
- /* FALL through into number code */
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case '.':
- /* It's a number */
- if(c=='-') namelen=1;
- else namelen=0;
- for (; (c = tokstart[namelen]) != '\0'; namelen++) {
- switch (c) {
- case '.':
- if (seen_point)
- goto got_number;
- ++seen_point;
- break;
- case 'e':
- case 'E':
- if (seen_e)
- goto got_number;
- ++seen_e;
- if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
- namelen++;
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- break;
- default:
- goto got_number;
- }
- }
-
- /*
- ** There seems to be a bug (feature?) in the Microsoft Large Model
- ** atof function. If the string to convert is too long, atof returns a
- ** zero without bothering to scan the string. The following hack simply
- ** truncates tokstart for the duration of the call. -ADE-
- **/
-
- got_number:
- lexptr = tokstart + namelen;
- *lexptr = '\0';
- yylval.fval = atof(tokstart);
- *lexptr = c;
- ++want_concat_token;
- return NUMBER;
-
- case '&':
- if(*lexptr=='&') {
- yylval.nodetypeval=Node_and;
- lexptr++;
- return LEX_AND;
- }
- return ERROR;
-
- case '|':
- if(want_redirect) {
- lexptr++;
- yylval.nodetypeval = Node_redirect_pipe;
- return REDIRECT_OP;
- }
- if(*lexptr=='|') {
- yylval.nodetypeval=Node_or;
- lexptr++;
- return LEX_OR;
- }
- return ERROR;
- }
-
- if (!(is_identchar(c))) {
- yyerror ("Invalid char '%c' in expression\n", c);
- return ERROR;
- }
-
- /* its some type of name-type-thing. Find its length */
- for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
- ;
-
-
- /* See if it is a special token. */
-
- low = tokentab;
- high = END(tokentab);
- while (low <= high)
- {
- mid = low + (high-low)/2;
- if(!(dif = strncmp(tokstart,mid->operator,namelen)) &&
- *tokstart==mid->operator[0] && mid->operator[namelen]=='\0')
- {
- lexptr=tokstart+namelen;
- if(mid->class == LEX_BUILTIN || mid->class == LEX_SUB)
- yylval.ptrval = mid->ptr;
- else
- yylval.nodetypeval = mid->value;
- return mid->class;
- }
- else if (dif > 0)
- low = mid+1;
- else
- high = mid-1;
- }
-
- /* for (toktab = tokentab; toktab->operator != NULL; toktab++) {
- ** if(*tokstart==toktab->operator[0] &&
- ** !strncmp(tokstart,toktab->operator,namelen) &&
- ** toktab->operator[namelen]=='\0') {
- ** lexptr=tokstart+namelen;
- ** if(toktab->class == LEX_BUILTIN || toktab->class == LEX_SUB)
- ** yylval.ptrval = toktab->ptr;
- ** else
- ** lexptr=tokstart+namelen;
- ** if(toktab->class == LEX_BUILTIN || toktab->class == LEX_SUB)
- ** yylval.ptrval = toktab->ptr;
- ** else
- ** yylval.nodetypeval = toktab->value;
- ** return toktab->class;
- ** }
- ** }
- /*
- /* It's a name. See how long it is. */
- yylval.sval = tokstart;
- lexptr = tokstart+namelen;
- ++want_concat_token;
- return NAME;
- }
-
- /*VARARGS1*/
- void
- yyerror (mesg,a1,a2,a3,a4,a5,a6,a7,a8)
- char *mesg;
- {
- register char *ptr,*beg;
-
- /* Find the current line in the input file */
- if(!lexptr) {
- beg="(END OF FILE)";
- ptr=beg+13;
- } else {
- if (*lexptr == '\n' && lexptr!=lexptr_begin)
- --lexptr;
- for (beg = lexptr;beg!=lexptr_begin && *beg != '\n';--beg)
- ;
- for (ptr = lexptr;*ptr && *ptr != '\n';ptr++) /*jfw: NL isn't guaranteed*/
- ;
- if(beg!=lexptr_begin)
- beg++;
- }
- fprintf (stderr, "Error near line %d, '%.*s'\n",lineno, ptr-beg, beg);
- /* figure out line number, etc. later */
- fprintf (stderr, mesg, a1, a2, a3, a4, a5, a6, a7, a8);
- fprintf (stderr,"\n");
- exit (1);
- }
-
- /* Parse a C escape sequence. STRING_PTR points to a variable
- containing a pointer to the string to parse. That pointer
- is updated past the characters we use. The value of the
- escape sequence is returned.
-
- A negative value means the sequence \ newline was seen,
- which is supposed to be equivalent to nothing at all.
-
- If \ is followed by a null character, we return a negative
- value and leave the string pointer pointing at the null character.
-
- If \ is followed by 000, we return 0 and leave the string pointer
- after the zeros. A value of 0 does not mean end of string. */
-
- static int
- parse_escape (string_ptr)
- char **string_ptr;
- {
- register int c = *(*string_ptr)++;
- switch (c)
- {
- case 'a':
- return '\a';
- case 'b':
- return '\b';
- case 'e':
- return 033;
- case 'f':
- return '\f';
- case 'n':
- return '\n';
- case 'r':
- return '\r';
- case 't':
- return '\t';
- case 'v':
- return '\v';
- case '\n':
- return -2;
- case 0:
- (*string_ptr)--;
- return 0;
- case '^':
- c = *(*string_ptr)++;
- if (c == '\\')
- c = parse_escape (string_ptr);
- if (c == '?')
- return 0177;
- return (c & 0200) | (c & 037);
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- {
- register int i = c - '0';
- register int count = 0;
- while (++count < 3)
- {
- if ((c = *(*string_ptr)++) >= '0' && c <= '7')
- {
- i *= 8;
- i += c - '0';
- }
- else
- {
- (*string_ptr)--;
- break;
- }
- }
- return i;
- }
- default:
- return c;
- }
- }
-