home *** CD-ROM | disk | FTP | other *** search
- /*
- * usage: a_pattrn file_name key [ /x ] [ bytes_before ] > report
- * "/x" = include hex, show only 16 bytes instead of 40
- * A_PATTRN List every occurrence of a key character or string in a file.
- * Show 3 (or "bytes_before", range 0 to 15) bytes prior to the
- * key each time. Normally show a total of 40 bytes each time
- * the key is found; if the "/x" argument is set, show only 16
- * bytes, but in hex and ASCII both. The key may be from 1 to
- * 16 characters. Within the key, any non-printing characters,
- * characters which may confuse DOS (> or < or |), linefeeds,
- * blanks, backslash, etc. must be shown in hex form... a
- * backslash and 2 hex digits. Examples:
- * a_pattrn herfile \8E > herfile.8e
- * a_pattrn yourfile * 7 > yourfile.ast
- * a_pattrn myfile Mother
- * a_pattrn hisfile \94\05ke\ff 0 > 5char.pat
- *
- * input: Any file whatsoever.
- *
- * output: One line for each occurrence of the target byte(s) in the file.
- * Sort the result to make patterns show up more clearly.
- *
- * writeup: MIR TUTORIAL ONE, topic 5
- *
- * Written: Douglas Lowry Jan 07 92
- * Modified: Douglas Lowry Apr 03 92 Modify arguments
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * (file 05LICENS) along with this program; if not, write to the
- * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <dos.h>
- #include <ctype.h>
- #include <direct.h>
-
- #define BIGBUF 2048
- #define repeat for(;;)
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool ;
- /*
- * declarations
- */
-
- void process( ), Usage_( ), line_out( );
- unsigned long int xtol_n( );
- char *Cmdname_() { return( "a_pattrn" ) ; }
-
- /*
- * MAIN -
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- FILE *fp_in ;
- Bool ascii ; /* ASCII display only requested */
- int len, i ;
- unsigned long int l_test ;
- unsigned char key[16]; /* character requested by user */
- short int precede, /* bytes before key to be shown */
- key_len,
- pt ; /* pointer within "key" argument*/
-
- /* usage: a_pattrn file_name key [ -x ] [ bytes_before ] > report*/
-
- if( argc < 3 || argc > 5 )
- Usage_();
-
- if(( fp_in = fopen( argv[1], "r+b" )) == NULL )
- {
- fprintf( stderr, "Unable to open input file %s\n", argv[1] );
- Usage_() ;
- }
-
- pt = 0 ;
- for( key_len = 0 ; key_len < 16 ; key_len++ )
- {
- if( !argv[2][pt] )
- break ;
- if( argv[2][pt] != 0x5c ) /* not backslash = printable */
- {
- key[ key_len ] = ( unsigned char ) argv[2][pt] ;
- pt++ ;
- }
- else
- {
- l_test = xtol_n( &argv[2][pt+1], 2 ) ;
- if( l_test > 0xff )
- Usage_();
- key[ key_len ] = ( unsigned char ) l_test ;
- pt += 3 ;
- }
- }
-
- precede = 3 ;
- ascii = TRUE ;
-
- for( i = 3; i < argc ; i++ )
- {
- if( islower( argv[i][1] ))
- argv[i][1] = toupper( argv[i][1] ) ;
- if( argv[i][1] == 'X' && ( argv[i][0] == '/' || argv[i][0] == '-' ))
- ascii = FALSE ;
- else
- {
- precede = atoi( argv[i] );
- if( precede > 16 - key_len )
- precede = 16 - key_len ;
- if( precede < 0 )
- precede = 0 ;
- }
- }
-
- process( key, key_len, ascii, precede, fp_in );
-
- fclose( fp_in );
-
- exit( 0 );
- }
- void
- Usage_( )
- {
- fprintf( stderr,
- "usage: %s file_name key [ /x ] [ bytes_before ] > report\n\
- \"/x\" = include hex, show only 16 bytes instead of 40\n\
- List every occurrence of a key character or string in a file.\n\
- Show 3 (or \"bytes_before\", range 0 to 15) bytes prior to the\n",
- Cmdname_() );
- fprintf( stderr,
- " key each time. Normally show a total of 40 bytes each time\n\
- the key is found; if the \"/x\" argument is set, show only 16\n\
- bytes, but in hex and ASCII both. The key may be from 1 to\n\
- 16 characters. Within the key, any non-printing characters,\n" );
- fprintf( stderr,
- " characters which may confuse DOS (> or < or |), linefeeds,\n\
- blanks, backslash, etc. must be shown in hex form... a\n\
- backslash and 2 hex digits. Examples:\n\
- a_pattrn herfile \8E > herfile.8e\n\
- a_pattrn yourfile * 7 > yourfile.ast\n" ) ;
- fprintf( stderr,
- " a_pattrn myfile Mother\n\
- a_pattrn hisfile \\94\\05ke\\ff 0 > 5char.pat\n\n\
- input: Any file whatsoever.\n\n\
- output: One line for each occurrence of the target byte(s) in the file.\n\
- Sort the result to make patterns show up more clearly.\n\n\
- writeup: MIR TUTORIAL ONE, topic 5\n" ) ;
- exit( 1 );
- }
- /*
- * XTOL_N Converts a specified number of bytes of hexadecimal string
- * to unsigned long integer. Returns 0xffffffff if any
- * non-hex character is encountered within the byte count,
- * or if the byte count exceeds 8.
- */
- unsigned long int
- xtol_n( string, bytes )
- unsigned char string[] ;
- short int bytes ;
- {
- int pt ;
- unsigned long int test_l ; /* value of hex expression */
- unsigned char this_hex, /* value of one character */
- uc ;
-
- if( !bytes )
- return( 0 ) ;
- if( bytes > 8 )
- return( 0xffffffff );
-
- test_l = 0 ;
- for( pt = 0 ; pt < bytes ; pt++ )
- {
- if( !isxdigit( string[ pt ] ))
- return( 0xffffffff );
- uc = ( unsigned char ) string[ pt ] ;
- if( isdigit( uc ))
- this_hex = uc - ( unsigned char ) '0' ;
- else if( isupper( uc ))
- this_hex = uc - ( unsigned char ) 'A' + 10 ;
- else
- this_hex = uc - ( unsigned char ) 'a' + 10 ;
- test_l = ( test_l << 4 ) + ( unsigned long ) this_hex ;
- }
-
- return( test_l );
- }
- /*
- * PROCESS - Passes through 1 file looking for key, outputting
- * [preceding and] following characters when found.
- */
-
- void
- process( key, key_len, ascii, precede, fp_in )
- unsigned char key[16]; /* characters requested by user */
- short int key_len,
- precede; /* bytes before key to be shown */
- Bool ascii ; /* 40 bytes ASCII, no hex display*/
- FILE *fp_in ;
- {
- unsigned char buffer[ BIGBUF ];
- Bool need_data,
- good_key ; /* found a match */
- long int cum_byt; /* cumulative bytes into file */
- int length, /* of buffer contents */
- pt, /* current byte in buffer */
- display, /* 16 or 40 bytes long */
- adjust,
- i, j, pt2 ;
-
- cum_byt = adjust = 0;
- need_data = TRUE ;
- display = 16 ;
- if( ascii )
- display = 40 ;
-
- repeat
- {
- if( need_data )
- {
- length = fread( &buffer[ adjust ], sizeof( char ),
- ( BIGBUF - adjust ), fp_in );
- length += adjust;
- if( !length )
- break ;
- if( adjust )
- pt = precede;
- else
- pt = 0 ;
- adjust = 0 ;
- need_data = FALSE ;
-
- /* After the end of last buffer in the file, */
- /* reduce any trailing bytes to NULLs. */
-
- if( length < BIGBUF )
- {
- for( i= length, j= 0 ; ( i < BIGBUF && j < display ) ;
- i++, j++ )
- buffer[i] = '\0' ;
- }
- }
-
- if( length < BIGBUF && pt == length )
- break ; /* Normal exit */
-
- if( pt + display + 1 > length && length == BIGBUF )
- {
- for( adjust = 0, j = pt - precede ; j < length ; j++,
- adjust++ )
- buffer[ adjust ] = buffer[ j ] ;
- need_data = TRUE ;
- continue ;
- }
-
- if( buffer[pt] != key[0] )
- {
- pt++ ;
- cum_byt++ ;
- continue; /* the most typical action */
- }
-
- good_key = TRUE ;
- for( pt2 = pt + 1, i = 1 ; i < key_len ; pt2++, i++ )
- {
- if( buffer[pt2] != key[i] )
- {
- good_key = FALSE ;
- break ;
- }
- }
- if( good_key )
- line_out( cum_byt, &buffer[ pt - precede ], ascii ) ;
- pt++ ;
- cum_byt++ ;
- }
- return;
- }
- /*
- * LINE_OUT Output an offset followed by 16 bytes, first in
- * hexadecimal, then in printable form, with periods
- * substituting for non-printable characters. Where
- * ASCII only is requested, 40 bytes are output.
- */
- void
- line_out( offset, buf, ascii )
- long int offset ;
- unsigned char *buf ;
- Bool ascii ;
- {
- int display,
- i ;
-
- display = 16 ;
- if( ascii )
- display = 40 ;
-
- printf( "%08ld: ", offset );
- if( !ascii )
- {
- for( i = 0 ; i < 16 ; i++ )
- printf( "%02x ", buf[ i ] );
- }
- printf( " " );
- for( i = 0 ; i < display ; i++ )
- {
- if( isprint( buf[i] ))
- putchar( buf[i] );
- else
- putchar( '.' );
- }
-
- /* If we test the output once per line, */
- /* that will catch any write errors. */
-
- if( putchar( '\n' ) != '\n' )
- {
- fprintf( stderr, "FATAL... Unable to write output.\n\n" );
- exit( 1 );
- }
-
- return ;
- }