home *** CD-ROM | disk | FTP | other *** search
- /**
- * $Revision: 1.1 $
- * $Log: C:/AWK/AWK1.C_V $
- *
- * Rev 1.1 09 Sep 1988 18:29:00 vince
- * MC 5.1 version
- *
- * Rev 1.0 09 Sep 1988 18:02:52 vince
- * Original source
- *
- *
- * awk1 -- Expression tree constructors and main program for gawk.
- *
- * Copyright (C) 1986 Free Software Foundation
- * Written by Paul Rubin, August 1986
- *
- * Modifications by Andrew D. Estes, July 1988
- */
-
- /**
- * GAWK is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY. No author or distributor accepts responsibility to anyone
- * for the consequences of using it or for whether it serves any
- * particular purpose or works at all, unless he says so in writing.
- * Refer to the GAWK General Public License for full details.
- *
- * Everyone is granted permission to copy, modify and redistribute GAWK,
- * but only under the conditions described in the GAWK General Public
- * License. A copy of this license is supposed to have been given to you
- * along with GAWK so you can know your rights and responsibilities. It
- * should be in a file named COPYING. Among other things, the copyright
- * notice and this notice must be preserved on all copies.
- *
- * In other words, go ahead and share GAWK, but don't try to stop
- * anyone else from sharing it farther. Help stamp out software hoarding!
- */
-
- #include <stdio.h>
- #include <string.h>
- #include "regex.h"
- #include "awk.h"
-
- extern char *index(); /* Let's be honest here */
-
- /**
- * Temporary nodes are stored here. ob_dummy is a dummy object used to keep
- * the obstack library from free()ing up the entire stack.
- */
- struct obstack temp_strings;
- char *ob_dummy;
-
- /*
- * The parse tree and field nodes are stored here. Parse_end is a dummy item
- * used to free up unneeded fields without freeing the program being run
- */
- struct obstack other_stack;
- char *parse_end;
-
- /* The global null string */
- NODE *Nnull_string;
-
- /* The special variable that contains the name of the current input file */
- extern NODE *FILENAME_node;
-
- /* The name the program was invoked under, for error messages */
- char *myname;
-
- /* A block of gAWK code to be run before running the program */
- NODE *begin_block = 0;
-
- /* A block of gAWK code to be run after the last input file */
- NODE *end_block = 0;
-
- FILE *input_file; /* Where to read from */
-
- #ifndef FAST
- /* non-zero means in debugging is enabled. Probably not very useful */
- int debugging;
- #endif
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- register int i;
- register NODE *tmp;
- int max_argc;
- char **do_vars;
- #ifndef FAST
- /* Print out the parse tree. For debugging */
- register int dotree = 0;
- extern int yydebug;
- #endif
- extern char *lexptr;
- extern char *lexptr_begin;
- extern int patterns, actions; /* ADE */
- extern char *get_argv(); /* ADE */
- FILE *fp, *fopen();
-
- do_vars = argv; /* ADE */
- max_argc = argc; /* ADE */
- --argc;
- myname = *argv++;
- if (!argc)
- usage();
-
- /* Tell the regex routines how they should work. . . */
- re_set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR);
-
- /* Set up the stack for temporary strings */
- obstack_init(&temp_strings);
- ob_dummy = obstack_alloc(&temp_strings, 0);
-
- /* Set up the other stack for other things */
- obstack_init(&other_stack);
- /* initialize the null string */
- Nnull_string = make_string("", 0);
- /* This was to keep Nnull_string from ever being free()d It didn't work */
- /* Nnull_string->stref=32000; */
- /* Set up the special variables */
- /*
- * Note that this must be done BEFORE arg parsing else -R and -F break horribly
- */
- init_vars();
-
-
- for (; *argv && **argv == '-'; argc--, argv++)
- {
- switch (argv[0][1])
- {
- #ifndef FAST
- case 'd':
- debugging++;
- dotree++;
- break;
-
- case 'D':
- debugging++;
- yydebug = 2;
- break;
- #endif
- /* This feature isn't in un*x awk, but might be useful */
- case 'R':
- set_rs(&argv[0][2]);
- break;
-
- case 'F':
- set_fs(&argv[0][2]);
- break;
-
- /*
- * It would be better to read the input file in as we parse it.
- * Its done this way for hysterical reasons. Feel free to fix it.
- */
-
- /*
- * I don't know if this is useful or not but a -f followed by a
- * '-' will allow the awk program to be read from stdin. This
- * gets around the DOS/OS|2 line length limitation -ADE-
- */
-
- case 'f':
- if (lexptr)
- panic("Can only use one -f option");
- if (!strcmp(argv[1], "-"))
- fp = stdin;
- else
- fp = fopen(argv[1], "r");
- if (fp == NULL)
- er_panic(argv[1]);
- else
- {
- char *curptr;
- int siz, nread;
-
- curptr = lexptr = malloc(2000);
- if (curptr == NULL)
- panic("Memory exhausted"); /* jfw: instead of abort() */
- siz = 2000;
- i = siz - 1;
- while ((nread = fread(curptr, sizeof(char), i, fp)) > 0)
- {
- curptr += nread;
- i -= nread;
- if (i == 0)
- {
- lexptr = realloc(lexptr, siz * 2);
- if (lexptr == NULL)
- panic("Memory exhausted"); /* jfw: instead of abort() */
- curptr = lexptr + siz - 1;
- i = siz;
- siz *= 2;
- }
- }
- *curptr = '\0';
- if (fp != stdin)
- fclose(fp);
- }
- argc--;
- argv++;
- break;
-
- case '\0': /* A file */
- break;
-
- default:
- panic("Unknown option %s", argv[0]);
- }
- }
- #ifndef FAST
- if (debugging)
- setbuf(stdout, 0); /* jfw: make debugging easier */
- #endif
- /* No -f option, use next arg */
- if (!lexptr)
- {
- if (!argc)
- usage();
- lexptr = *argv++;
- --argc;
- }
- /* This must be done after finding program ADE */
- init_args(max_argc - argc, max_argc, do_vars);
-
- /* Read in the program */
- lexptr_begin = lexptr;
- (void) yyparse();
-
- /* Free up the space used for reading in the program -ADE- */
-
- free(lexptr_begin);
-
- /*
- * Anything allocated on the other_stack after here will be freed when
- * the next input line is read.
- */
- parse_end = obstack_alloc(&other_stack, 0);
-
- #ifndef FAST
- if (dotree)
- print_parse_tree(expression_value);
- #endif
- /* Set up the field variables */
- init_fields();
-
- /* Look for BEGIN and END blocks. Only one of each allowed */
- for (tmp = expression_value; tmp; tmp = tmp->rnode)
- {
- if (!tmp->lnode || !tmp->lnode->lnode)
- continue;
- if (tmp->lnode->lnode->type == Node_K_BEGIN)
- begin_block = tmp->lnode->rnode;
- else
- if (tmp->lnode->lnode->type == Node_K_END)
- end_block = tmp->lnode->rnode;
- }
- if (begin_block && interpret(begin_block) == 0)
- exit(0); /* jfw */
-
- do_vars = argv;
- /*
- * We want to get the command line args from the builtin variables * ARGC and ARGV[]. -ade-
- */
- while (get_argc() > 0 && index(get_argv(), '='))
- {
- inc_argv();
- ++argv;
- get_argc_dec();
- }
- if (do_vars == argv)
- do_vars = 0;
- if (argc == 0)
- {
- static char *dumb[2] = {"-", 0};
-
- set_argc(1); /* ade */
- set_argv("-"); /* ade */
- }
- /* no need to open files if there is nothing to do -ADE- */
- while (get_argc_dec() && (patterns > 0 || actions > 0))
- {
- if (!strcmp(get_argv(), "-"))
- {
- input_file = stdin;
- FILENAME_node->var_value = Nnull_string;
- ADD_ONE_REFERENCE(Nnull_string);
- }
- else
- {
- extern NODE *deref;
-
- input_file = fopen(get_argv(), "r"); /* ade */
- /* This should print the error message from errno */
- if (!input_file)
- er_panic(get_argv()); /* ade */
- /* This is a kludge. */
- deref = FILENAME_node->var_value;
- do_deref();
- FILENAME_node->var_value = make_string(get_argv(), strlen(get_argv()));
- set_fnr(0);
- }
- /* This is where it spends all its time. The infamous MAIN LOOP */
- if (inrec(0, input_file) == 0)
- {
- if (do_vars)
- {
- while (do_vars != argv && *do_vars)
- {
- char *cp;
-
- cp = index(*do_vars, '=');
- *cp++ = '\0';
- variable(*do_vars)->var_value = make_string(cp, strlen(cp));
- do_vars++;
- }
- do_vars = 0;
- }
- do
- obstack_free(&temp_strings, ob_dummy);
- while (interpret(expression_value) && inrec(0, input_file) == 0);
- }
- if (input_file != stdin)
- fclose(input_file);
- inc_argv(); /* ade */
- }
- if (end_block)
- (void) interpret(end_block);
- exit(0);
- }
-
- /* These exit values are arbitrary */
- panic(str, arg)
- char *str;
- {
- fprintf(stderr, "%s: ", myname);
- fprintf(stderr, str, arg);
- fprintf(stderr, "\n");
- exit(12);
- }
-
- er_panic(str)
- char *str;
- {
- fprintf(stderr, "%s: ", myname);
- perror(str);
- exit(15);
- }
-
- usage()
- {
- fprintf(stderr, "%s: usage: %s {-f progfile | program } [-F{c} -R{c}] file . . .\n", myname, myname);
- exit(11);
- }
-
- /*
- * This allocates a new node of type ty.
- * Note that this node will not go away unless freed, so don't use it for tmp storage
- */
- NODE *newnode(ty)
- NODETYPE ty;
- {
- register NODE *r;
-
- r = (NODE *) malloc(sizeof(NODE));
- if (r == NULL)
- panic("Memory exhausted"); /* -ade-: instead of abort() */
- r->type = ty;
- return r;
- }
-
-
- /*
- * Duplicate a node. (For global strings, "duplicate" means crank up
- * the reference count.) This creates global nodes. . .
- */
- NODE *dupnode(n)
- NODE *n;
- {
- register NODE *r;
-
- if (n->type == Node_string)
- {
- n->stref++;
- return n;
- }
- else if (n->type == Node_temp_string)
- {
- r = newnode(Node_string);
- r->stlen = n->stlen;
- r->stref = 1;
- r->stptr = malloc(n->stlen + 1);
- if (r->stptr == NULL)
- panic("Memory exhausted"); /* -ade-: instead of abort() */
- bcopy(n->stptr, r->stptr, n->stlen);
- r->stptr[r->stlen] = '\0'; /* JF for hackval */
- return r;
- }
- else
- {
- r = newnode(Node_illegal);
- *r = *n;
- return r;
- }
- }
-
- /* This allocates a node with defined lnode and rnode. */
- /*
- * This should only be used by yyparse+co while reading in the program
- */
- NODE *node(left, op, right)
- NODE *left, *right;
- NODETYPE op;
- {
- register NODE *r;
-
- r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
- r->type = op;
- r->lnode = left;
- r->rnode = right;
- return r;
- }
-
- /* This allocates a node with defined subnode and proc */
- /* Otherwise like node() */
- NODE *snode(subn, op, procp)
- NODETYPE op;
- NODE *(*procp) ();
- NODE *subn;
- {
- register NODE *r;
-
- r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
- r->type = op;
- r->subnode = subn;
- r->proc = procp;
- return r;
- }
-
- /*
- * (jfw) This allocates a Node_line_range node with defined condpair and
- * zeroes the trigger word to avoid the temptation of assuming that calling
- * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
- */
- /* Otherwise like node() */
- NODE *mkrangenode(cpair)
- NODE *cpair;
- {
- register NODE *r;
-
- r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
- r->type = Node_line_range;
- r->condpair = cpair;
- r->triggered = 0;
- return r;
- }
-
- /* this allocates a node with defined numbr */
- /* This creates global nodes! */
- NODE *make_number(x)
- AWKNUM x;
- {
- register NODE *r;
-
- r = newnode(Node_number);
- r->numbr = x;
- return r;
- }
-
- /*
- * This creates temporary nodes. They go away quite quickly, so don't use
- * them for anything important
- */
- #ifndef FAST
- NODE *tmp_number(x)
- AWKNUM x;
- {
- #ifdef DONTDEF
- return make_number(x);
- #endif
- NODE *r;
-
- r = (NODE *) obstack_alloc(&temp_strings, sizeof(NODE));
- r->type = Node_number;
- r->numbr = x;
- return r;
- }
- #endif
-
- /*
- * Make a string node. If len==0, the string passed in S is supposed to end
- * with a double quote, but have had the beginning double quote already
- * stripped off by yylex. If LEN!=0, we don't care what s ends with.
- * This creates a global node
- */
- NODE *make_string(s, len)
- char *s;
- {
- register NODE *r;
- register char *pf, *pt;
- register int c;
-
- /*
- * the aborts are impossible because yylex is supposed to have already checked for unterminated strings
- */
- if (len == -1)
- { /* Called from yyparse, find our own len */
- #ifndef FAST
- if (s[-1] != '\"') /* Didn't start with " */
- abort();
- #endif
-
- for (pf = pt = s; *pf != '\0' && *pf != '\"';)
- {
- c = *pf++;
- switch (c)
- {
- #ifndef FAST
- case '\0':
- abort();
- #endif
-
- case '\\':
- #ifndef FAST
- if (*pf == '\0')
- abort();
- #endif
-
- c = *pf++;
- switch (c)
- {
- case '\\': /* no massagary needed */
- case '\'':
- case '\"':
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- c -= '0';
- while (*pf && *pf >= '0' && *pf <= '7')
- {
- c = c * 8 + *pf++ - '0';
- }
- break;
- case 'b':
- c = '\b';
- break;
- case 'f':
- c = '\f';
- break;
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 't':
- c = '\t';
- break;
- case 'v':
- c = '\v';
- break;
- default:
- *pt++ = '\\';
- break;
- }
- /* FALL THROUGH */
- default:
- *pt++ = c;
- break;
- }
- }
- #ifndef FAST
- if (*pf == '\0')
- abort(); /* JF hit the end of the buf */
- #endif
- len = pt - s; /* JF was p - s - 1 */
- }
-
- r = newnode(Node_string);
- r->stptr = (char *) malloc(len + 1);
- if (r->stptr == 0)
- panic("Memory exhausted"); /* -ade- was abort() */
- r->type = Node_string;
- r->stlen = len;
- r->stref = 1;
- bcopy(s, r->stptr, len);
- r->stptr[len] = '\0'; /* JF a hack */
-
- return r;
- }
-
- /* #ifndef FAST */
- /* This should be a macro for speed, but the C compiler chokes. */
- /* Read the warning under tmp_number */
- NODE *tmp_string(s, len)
- char *s;
- {
- register NODE *r;
-
- #ifdef DONTDEF
- return make_string(s, len);
- #endif
- r = (NODE *) obstack_alloc(&temp_strings, sizeof(NODE));
- r->stptr = (char *) obstack_alloc(&temp_strings, len + 1);
- r->type = Node_temp_string;
- r->stlen = len;
- r->stref = 1;
- bcopy(s, r->stptr, len);
- r->stptr[len] = '\0'; /* JF a hack */
-
- return r;
- }
- /* #endif */
-
- /* Generate compiled regular expressions */
-
- /* like make_regexp but returns a NODE* ADE */
-
- NODE *make_regex(s)
- char *s;
- {
- register NODE *r;
-
- r = newnode(Node_regex);
- r->rereg = make_regexp(s);
- return r;
- }
-
- /* make a re_pattern_buffer on the fly from a node ADE */
-
- struct re_pattern_buffer *make_regexp_n(tree)
- NODE *tree;
- {
- return (make_regexp(tree->stptr));
- }
-
- struct re_pattern_buffer *make_regexp(s)
- char *s;
- {
- typedef struct re_pattern_buffer RPAT;
- RPAT *rp;
- char *p, *err;
-
- rp = (RPAT *) obstack_alloc(&other_stack, sizeof(RPAT));
- bzero((char *) rp, sizeof(RPAT));
- rp->buffer = (char *) malloc(8); /* JF I'd obstack allocate it, except the regex routines try to realloc() it, which fails. */
- /* Note that this means it may never be freed. Someone fix, please? */
-
- rp->allocated = 8;
- rp->fastmap = (char *) obstack_alloc(&other_stack, 256);
-
- for (p = s; *p != '\0'; p++)
- {
- if (*p == '\\')
- p++;
- else if (*p == '/')
- break;
- }
- #ifndef FAST
- /*
- * commented out so it will not abort when
- * passed an expression -ADE-
- * if (*p != '/')
- * abort ();
- */
- #endif
-
- /*
- * JF was re_compile_pattern, but that mishandles ( ) and |, so I had to write my own front end. Sigh.
- */
-
- if ((err = re_compile_pattern(s, p - s, rp)) != NULL)
- {
- fprintf(stderr, "illegal regexp: ");
- yyerror(err); /* fatal */
- }
-
- return rp;
- }
-
- /* Build a for loop */
- FOR_LOOP_HEADER *make_for_loop(init, cond, incr)
- NODE *init, *cond, *incr;
- {
- register FOR_LOOP_HEADER *r;
-
- r = (FOR_LOOP_HEADER *) obstack_alloc(&other_stack, sizeof(FOR_LOOP_HEADER));
- r->init = init;
- r->cond = cond;
- r->incr = incr;
- return r;
- }
-
- /* Name points to a variable name. Make sure its in the symbol table */
- NODE *variable(name)
- char *name;
- {
- register NODE *r;
- NODE *lookup(), *install();
-
- if ((r = lookup(variables, name)) == NULL)
- {
- r = install(variables, name, node(Nnull_string, Node_var, (NODE *) NULL));
- /* JF make_number (0.0) is WRONG */
- }
- return r;
- }
-
- /* Create a special variable */
- NODE *spc_var(name, value)
- char *name;
- NODE *value;
- {
- register NODE *r;
- NODE *lookup(), *install();
-
- if ((r = lookup(variables, name)) == NULL)
- r = install(variables, name, node(value, Node_var, (NODE *) NULL));
- return r;
- }
-
- /*
- * Install a name in the hash table specified, even if it is already there.
- * Name stops with first non alphanumeric. Caller must check against
- * redefinition if that is desired.
- */
- NODE *install(table, name, value)
- HASHNODE **table;
- char *name;
- NODE *value;
- {
- register HASHNODE *hp;
- register int i, len, bucket;
- register char *p;
-
- len = 0;
- p = name;
- while (is_identchar(*p))
- p++;
- len = p - name;
-
- i = sizeof(HASHNODE) + len + 1;
- hp = (HASHNODE *) obstack_alloc(&other_stack, i);
- bucket = hashf(name, len, HASHSIZE);
- hp->next = table[bucket];
- table[bucket] = hp;
- hp->length = len;
- hp->value = value;
- hp->name = ((char *) hp) + sizeof(HASHNODE);
- hp->length = len;
- bcopy(name, hp->name, len);
- return hp->value;
- }
-
- /*
- * find the most recent hash node for name name (ending with first non-
- * identifier char) installed by install
- */
- NODE *lookup(table, name)
- HASHNODE **table;
- char *name;
- {
- register char *bp;
- register HASHNODE *bucket;
- register int len;
-
- for (bp = name; is_identchar(*bp); bp++)
- ;
- len = bp - name;
- bucket = table[hashf(name, len, HASHSIZE)];
- while (bucket)
- {
- if (bucket->length == len && strncmp(bucket->name, name, len) == 0)
- return bucket->value;
- bucket = bucket->next;
- }
- return NULL;
- }
-
- #define HASHSTEP(old, c) ((old << 1) + c)
- #define MAKE_POS(v) (v & ~0x80000000)
-
- /*
- * return hash function on name. must be compatible with the one computed a
- * step at a time, elsewhere (JF: Where? I can't find it!)
- */
- int hashf(name, len, hashsize)
- register char *name;
- register int len;
- int hashsize;
- {
- register int r = 0;
-
- while (len--)
- r = HASHSTEP(r, *name++);
-
- return MAKE_POS(r) % hashsize;
- }
-
- /*
- * Add new to the rightmost branch of LIST. This uses n^2 time, but doesn't
- * get used enough to make optimizing worth it. . .
- */
- /* You don't believe me? Profile it yourself! */
-
- NODE *append_right(list, new)
- NODE *list, *new;
- {
- register NODE *oldlist;
-
- oldlist = list;
- while (list->rnode != NULL)
- list = list->rnode;
- list->rnode = new;
- return oldlist;
- }