home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk main program
- */
- #define MAIN 1
- #include <stdio.h>
- #include "bawk.h"
-
- /*
- * Main program
- */
- main( argc, argv )
- int argc;
- char **argv;
- {
- char gotrules, didfile, getstdin;
-
- getstdin =
- didfile =
- gotrules = 0;
-
- /*
- * Initialize global variables:
- */
- Beginact =
- Endact =
- Rules =
- Rulep =
- #ifdef DEBUG
- Debug =
- #endif
- Filename =
- Linecount =
- Saw_break = 0;
- Stackptr = Stackbtm - 1;
- Stacktop = Stackbtm + MAXSTACKSZ;
- Nextvar = Vartab;
-
- strcpy( Fieldsep, " \t" );
- strcpy( Recordsep, "\n" );
-
- /*
- * Parse command line
- */
- while ( --argc )
- {
- if ( **(++argv) == '-' )
- {
- /*
- * Process dash options.
- */
- switch ( tolower( *(++(*argv)) ) )
- {
- #ifdef DEBUG
- case 'd':
- ++Debug;
- break;
- #endif
- case 0:
- ++getstdin;
- --argv;
- goto dosomething;
- break;
- default: usage();
- }
- }
- else
- {
- dosomething:
- if ( gotrules )
- {
- /*
- * Already read rules file - assume this is
- * is a text file for processing.
- */
- if ( ++didfile == 1 && Beginact )
- doaction( Beginact );
- if ( getstdin )
- {
- --getstdin;
- newfile( 0 );
- }
- else
- newfile( *argv );
- process();
- }
- else
- {
- /*
- * First file name argument on command line
- * is assumed to be a rules file - attempt to
- * compile it.
- */
- if ( getstdin )
- {
- --getstdin;
- newfile( 0 );
- }
- else
- newfile( *argv );
- compile();
- gotrules = 1;
- }
- }
- }
- if ( !gotrules )
- usage();
-
- if ( ! didfile )
- {
- /*
- * Didn't process any files yet - process stdin.
- */
- newfile( 0 );
- if ( Beginact )
- doaction( Beginact );
- process();
- }
- if ( Endact )
- doaction( Endact );
- }
-
- /*
- * Regular expression/action file compilation routines.
- */
- compile()
- {
- /*
- * Compile regular expressions and C actions into Rules struct,
- * reading from current input file "Fileptr".
- */
- int c, len;
-
- #ifdef DEBUG
- if ( Debug )
- error( "compiling...", 0 );
- #endif
-
- while ( (c = getcharacter()) != -1 )
- {
- if ( c==' ' || c=='\t' || c=='\n' )
- /* swallow whitespace */
- ;
- else if ( c=='#' )
- {
- /*
- * Swallow comments
- */
- while ( (c=getcharacter()) != -1 && c!='\n' )
- ;
- }
- else if ( c=='{' )
- {
- #ifdef DEBUG
- if ( Debug )
- error( "action", 0 );
- #endif
- /*
- * Compile (tokenize) the action string into our
- * global work buffer, then allocate some memory
- * for it and copy it over.
- */
- ungetcharacter( '{' );
- len = act_compile( Workbuf );
-
- if ( Rulep && Rulep->action )
- {
- Rulep->nextrule = getmem( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( !Rulep )
- {
- /*
- * This is the first action encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = getmem( sizeof( *Rulep ) );
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- Rulep->action = getmem( len );
- movemem( Workbuf, Rulep->action, len );
- }
- else if ( c==',' )
- {
- #ifdef DEBUG
- if ( Debug )
- error( "stop pattern", 0 );
- #endif
- /*
- * It's (hopefully) the second part of a two-part
- * pattern string. Swallow the comma and start
- * compiling an action string.
- */
- if ( !Rulep || !Rulep->pattern.start )
- error( "stop pattern without a start",
- RE_ERROR );
- if ( Rulep->pattern.stop )
- error( "already have a stop pattern",
- RE_ERROR );
- len = pat_compile( Workbuf );
- Rulep->pattern.stop = getmem( len );
- movemem( Workbuf, Rulep->pattern.stop, len );
- }
- else
- {
- /*
- * Assume it's a regular expression pattern
- */
- #ifdef DEBUG
- if ( Debug )
- error( "start pattern", 0 );
- #endif
-
- ungetcharacter( c );
- len = pat_compile( Workbuf );
-
- if ( *Workbuf == T_BEGIN )
- {
- /*
- * Saw a "BEGIN" keyword - compile following
- * action into special "Beginact" buffer.
- */
- len = act_compile( Workbuf );
- Beginact = getmem( len );
- movemem( Workbuf, Beginact, len );
- continue;
- }
- if ( *Workbuf == T_END )
- {
- /*
- * Saw an "END" keyword - compile following
- * action into special "Endact" buffer.
- */
- len = act_compile( Workbuf );
- Endact = getmem( len );
- movemem( Workbuf, Endact, len );
- continue;
- }
- if ( Rulep )
- {
- /*
- * Already saw a pattern/action - link in
- * another Rules structure.
- */
- Rulep->nextrule = getmem( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( !Rulep )
- {
- /*
- * This is the first pattern encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = getmem( sizeof( *Rulep ) );
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( Rulep->pattern.start )
- error( "already have a start pattern",
- RE_ERROR );
-
- Rulep->pattern.start = getmem( len );
- movemem( Workbuf, Rulep->pattern.start, len );
- }
- }
- endfile();
- }
-
- /*
- * Text file main processing loop.
- */
- process()
- {
- /*
- * Read a line at a time from current input file at "Fileptr",
- * then apply each rule in the Rules chain to the input line.
- */
- int i;
-
- #ifdef DEBUG
- if ( Debug )
- error( "processing...", 0 );
- #endif
-
- Recordcount = 0;
-
- while ( getline() )
- {
- /*
- * Parse the input line.
- */
- Fieldcount = parse( Linebuf, Fields, Fieldsep );
- #ifdef DEBUG
- if ( Debug>1 )
- {
- printf( "parsed %d words:\n", Fieldcount );
- for(i=0; i<Fieldcount; ++i )
- printf( "<%s>\n", Fields[i] );
- }
- #endif
-
- Rulep = Rules;
- do
- {
- if ( ! Rulep->pattern.start )
- {
- /*
- * No pattern given - perform action on
- * every input line.
- */
- doaction( Rulep->action );
- }
- else if ( Rulep->pattern.startseen )
- {
- /*
- * Start pattern already found - perform
- * action then check if line matches
- * stop pattern.
- */
- doaction( Rulep->action );
- if ( dopattern( Rulep->pattern.stop ) )
- Rulep->pattern.startseen = 0;
- }
- else if ( dopattern( Rulep->pattern.start ) )
- {
- /*
- * Matched start pattern - perform action.
- * If a stop pattern was given, set "start
- * pattern seen" flag and process every input
- * line until stop pattern found.
- */
- doaction( Rulep->action );
- if ( Rulep->pattern.stop )
- Rulep->pattern.startseen = 1;
- }
- }
- while ( Rulep = Rulep->nextrule );
-
- /*
- * Release memory allocated by parse().
- */
- while ( Fieldcount )
- free( Fields[ --Fieldcount ] );
- }
- }
-
- /*
- * Miscellaneous functions
- */
- parse( str, wrdlst, delim )
- char *str;
- char *wrdlst[];
- char *delim;
- {
- /*
- * Parse the string of words in "str" into the word list at "wrdlst".
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- * Returns the number of words parsed.
- * CAUTION: the memory for the words in "wrdlst" is allocated
- * by malloc() and should eventually be returned by free()...
- */
- int wrdcnt, wrdlen;
- char wrdbuf[ MAXLINELEN ], c;
-
- wrdcnt = 0;
- while ( *str )
- {
- while ( instr( *str, delim ) )
- ++str;
- if ( !*str )
- break;
- wrdlen = 0;
- while ( (c = *str) && !instr( c, delim ) )
- {
- wrdbuf[ wrdlen++ ] = c;
- ++str;
- }
- wrdbuf[ wrdlen++ ] = 0;
- /*
- * NOTE: allocate a MAXLINELEN sized buffer for every
- * word, just in case user wants to copy a larger string
- * into a field.
- */
- wrdlst[ wrdcnt ] = getmem( MAXLINELEN );
- strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
- }
-
- return wrdcnt;
- }
-
- unparse( wrdlst, wrdcnt, str, delim )
- char *wrdlst[];
- int wrdcnt;
- char *str;
- char *delim;
- {
- /*
- * Replace all the words in "str" with the words in "wrdlst",
- * maintaining the same word seperation distance as found in
- * the string.
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- */
- int wc;
- char strbuf[ MAXLINELEN ], *sp, *wp, *start;
-
- wc = 0; /* next word in "wrdlst" */
- sp = strbuf; /* points to our local string */
- start = str; /* save start address of "str" for later... */
- while ( *str )
- {
- /*
- * Copy the field delimiters from the original string to
- * our local version.
- */
- while ( instr( *str, delim ) )
- *sp++ = *str++;
- if ( !*str )
- break;
- /*
- * Skip over the field in the original string and...
- */
- while ( *str && !instr( *str, delim ) )
- ++str;
-
- if ( wc < wrdcnt )
- {
- /*
- * ...copy in the field in the wordlist instead.
- */
- wp = wrdlst[ wc++ ];
- while ( *wp )
- *sp++ = *wp++;
- }
- }
- /*
- * Tie off the local string, then copy it back to caller's string.
- */
- *sp = 0;
- strcpy( start, strbuf );
- }
-
- instr( c, s )
- char c, *s;
- {
- while ( *s )
- if ( c==*s++ )
- return 1;
- return 0;
- }
-
- char *
- getmem( len )
- unsigned len;
- {
- char *cp;
-
- if ( cp=malloc( len ) )
- return cp;
- error( "out of memory", MEM_ERROR );
- }
-
- char *
- newfile( s )
- char *s;
- {
- Linecount = 0;
- if ( Filename = s )
- {
- #ifdef BDS_C
- if ( fopen( s, Fileptr = Curfbuf ) == -1 )
- #else
- if ( !(Fileptr = fopen( s, "r" )) )
- #endif
- error( "file not found", FILE_ERROR );
- }
- else
- {
- /*
- * No file name given - process standard input.
- */
- Fileptr = stdin;
- Filename = "standard input";
- }
- }
-
- getline()
- {
- /*
- * Read a line of text from current input file. Strip off
- * trailing record seperator (newline).
- */
- int rtn, len;
-
- for ( len=0; len<MAXLINELEN; ++len )
- {
- if ( (rtn = getcharacter()) == *Recordsep || rtn == -1 )
- break;
- Linebuf[ len ] = rtn;
- }
- Linebuf[ len ] = 0;
-
- if ( rtn == -1 )
- {
- endfile();
- return 0;
- }
- return 1;
- }
-
- getcharacter()
- {
- /*
- * Read a character from curren input file.
- * WARNING: your getc() must convert lines that end with CR+LF
- * to LF and CP/M's EOF character (^Z) to a -1.
- * Also, getc() must return a -1 when attempting to read from
- * an unopened file.
- */
- int c;
-
- #ifdef BDS_C
- /*
- * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions <gag>
- */
- if ( (c = getc( Fileptr )) == '\r' )
- {
- if ( (c = getc( Fileptr )) != '\n' )
- {
- ungetc( c );
- c = '\r';
- }
- }
- else if ( c == 26 ) /* ^Z */
- c = -1;
- #else
- c = getc( Fileptr );
- #endif
-
- if ( c == *Recordsep )
- ++Recordcount;
- if ( c=='\n' )
- ++Linecount;
-
- return c;
- }
-
- ungetcharacter( c )
- {
- /*
- * Push a character back into the input stream.
- * If the character is a record seperator, or a newline character,
- * the record and line counters are adjusted appropriately.
- */
- if ( c == *Recordsep )
- --Recordcount;
- if ( c=='\n' )
- --Linecount;
- return ungetc( c, Fileptr );
- }
-
- endfile()
- {
- fclose( Fileptr );
- Filename = Linecount = 0;
- }
-
- error( s, severe )
- char *s;
- int severe;
- {
- char *cp, *errat;
-
- if ( Filename )
- fprintf( stderr, "%s:", Filename );
-
- if ( Linecount )
- fprintf( stderr, " line %d:", Linecount );
-
- fprintf( stderr, " %s\n", s );
- if ( severe )
- exit( severe );
- }
-
- usage()
- {
- error( "Usage: bawk <actfile> [<file> ...]\n", USAGE_ERROR );
- }
-
- movemem( from, to, count )
- char *from, *to;
- int count;
- {
- while ( count-- > 0 )
- *to++ = *from++;
- }
-
- fillmem( array, count, value )
- char *array, value;
- int count;
- {
- while ( count-- > 0 )
- *array++ = value;
- }
-
- strncmp( s, t, n )
- char *s, *t;
- int n;
- {
- while ( --n>0 && *s && *t && *s==*t )
- {
- ++s;
- ++t;
- }
- if ( *s || *t )
- return *s - *t;
- return 0;
- }
-
- num( c )
- char c;
- {
- return '0'<=c && c<='9';
- }
-
- alpha( c )
- char c;
- {
- return ('a'<=c && c<='z') || ('A'<=c && c<='Z') || c=='_';
- }
-
- alphanum( c )
- char c;
- {
- return alpha( c ) || num( c );
- }