home *** CD-ROM | disk | FTP | other *** search
- /*
- * Usage - dump file_name [/a] [ from_byte [ to_byte ] ] > report
- *
- * DUMP - Lists the contents of a specified portion of any file,
- * reporting 16 bytes per line. "/a" causes accented high
- * bit characters to be printed.
- *
- * input: Any file whatsoever.
- *
- * output: Printable ASCII report, listing offset, then 16 bytes
- * in hexadecimal format, with printable ASCII on the
- * right; periods substitute for non-printable bytes.
- *
- * writeup: MIR TUTORIAL ONE, topic 5
- *
- * Written: Douglas Lowry Jan 09 92
- * Modified: Douglas Lowry Feb 28 92 Add /a argument
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * (file 05LICENS) along with this program; if not, write to the
- * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
-
- #define BIGBUF 2048 /* must be a multiple of 16 */
- #define repeat for(;;)
-
- /*
- * declarations
- */
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool;
-
- void Usage_(), process(), dump_line() ;
- char *Cmdname_() { return( "dump" ); }
-
- /*
- * MAIN
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- FILE *fp ;
- Bool accent, /* user wants accented letters to print */
- got_low ; /* already read in a from byte arg */
- char c, c10 ;
- long fr_byte, to_byte; /* byte range */
- int i ;
-
- /* Usage - dump file_name [/a] [ from_byte [ to_byte ] ] > report */
-
- c10 = argv[1][0] ;
- if( argc < 2 || argc > 5 || c10 == '-' || c10 == '/' || c10 == '?' )
- Usage_() ;
-
- if(( fp = fopen( argv[1], "rb" )) == NULL )
- {
- fprintf( stderr, "\nUnable to open file %s.\n", argv[1] );
- Usage_();
- }
-
- fr_byte = 0 ;
- to_byte = 0x0fffffff;
- accent = got_low = FALSE ;
- for( i = 2 ; i < argc ; i++ )
- {
- if( argv[i][0] == '-' && ( argv[i][1] == 'a' || argv[i][1] == 'A' ))
- accent = TRUE ;
- else if( got_low )
- to_byte = atol( argv[i] );
- else
- {
- fr_byte = atol( argv[i] );
- got_low = TRUE ;
- }
- }
-
- if( fr_byte )
- {
- if( fseek( fp, fr_byte, SEEK_SET ))
- {
- fprintf( stderr, "Unable to position %s to %ld\n",
- argv[1], fr_byte );
- Usage_() ;
- }
- }
-
- process( fp, fr_byte, to_byte, accent ) ;
-
- fclose( fp );
- exit( 0 );
- }
- /*
- * Usage
- */
- void
- Usage_()
- {
- fprintf( stderr,
- "\nUsage: %s file_name [/a] [ from_byte [ to_byte ] ] > report\n\n\
- Lists the contents of a specified portion of any file,\n\
- reporting 16 bytes per line. \"/a\" causes accented high\n\
- bit characters to be printed.\n\n\
- input: Any file whatsoever.\n\n", Cmdname_() );
- fprintf( stderr,
- "Output: Printable ASCII report, listing offset, then 16 bytes\n\
- in hexadecimal format, with printable ASCII on the\n\
- right; periods substitute for non-printable bytes.\n\n\
- writeup: MIR TUTORIAL ONE, topic 5\n\n" ) ;
- exit( 1 ) ;
- }
- /*
- * PROCESS - Passes through file from starting position,
- * displaying 16 bytes per line.
- */
- void
- process( fp, fr_byte, to_byte, accent )
- FILE *fp ;
- long int fr_byte, /* beginning offset */
- to_byte ; /* ending offset */
- Bool accent; /* user wants accented letters to print */
- {
- unsigned char buffer[ BIGBUF ];
- long int offset; /* cumulative bytes into file */
- int length, /* of buffer contents */
- i, j, pt ;
-
- offset = fr_byte ;
-
- repeat
- {
- if( offset > to_byte )
- break ;
- length = fread( buffer, sizeof( char ), BIGBUF, fp );
- if( !length )
- break ;
-
- /* After the end of last buffer in the file, */
- /* reduce any trailing bytes to NULLs. */
-
- if( length < BIGBUF )
- {
- for( i= length, j= 0 ; ( i < BIGBUF && j < 16 ) ; i++, j++ )
- buffer[i] = '\0' ;
- }
-
- for( pt = 0 ; pt < length ; pt += 16 )
- {
- dump_line( stdout, offset, &buffer[ pt ], accent ) ;
- offset += 16 ;
- if( offset > to_byte )
- break ;
- }
- }
- return;
- }
- /*
- * DUMP_LINE Output an offset followed by 16 bytes, first in
- * hexadecimal, then in printable form, with periods
- * substituting for non-printable characters
- */
- void
- dump_line( fp_out, offset, buf, accent )
- FILE *fp_out ;
- long int offset ;
- unsigned char *buf ;
- Bool accent; /* user wants accented letters to print */
- {
-
- #define NON_PRINT 0
- #define WHITE_SPACE 1
- #define PUNCTUATION 2
- #define DIGIT 3
- #define CONSONANT 4
- #define VOWEL 5
- #define HI_CONSONANT 6
- #define HI_VOWEL 7
- #define TYPE_CT 8 /* count of above types */
-
- static unsigned char table[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* ctls */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ctls */
- /* bl ! " # $ % & ' ( ) * + , - . / */
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
- /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
- /* @ A B C D E F G H I J K L M N O */
- 4, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
- /* P Q R S T U V W X Y Z [ \ ] ^ _ */
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 2, 2, 2, 2, 2,
- /* ` a b c d e f g h i j k l m n o */
- 2, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
- /* p q r s t u v w x y z { | } ~ NULL */
- 4, 4, 4, 4, 4, 5, 4, 4, 4, 5, 4, 2, 2, 2, 2, 0,
- /* Ç ü é â ä à å ç ê ë è ï î ì Ä Å */
- 6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7,
- /* É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ */
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0,
- /* á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » */
- 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
-
- int type, /* of character per above table */
- i ;
- unsigned char c ;
-
- fprintf( fp_out, "%08ld: ", offset );
- for( i = 0 ; i < 16 ; i++ )
- fprintf( fp_out, "%02x ", buf[ i ] );
- fprintf( fp_out, " " );
- for( i = 0 ; i < 16 ; i++ )
- {
- c = buf[ i ] ;
- type = table[ c ] ;
- if( type == WHITE_SPACE )
- c = ' ' ;
- if( type == NON_PRINT )
- c = '.' ;
- if( !accent )
- {
- if( type == HI_CONSONANT || type == HI_VOWEL )
- c = '.' ;
- }
- fputc( c, fp_out );
- }
-
- /* If we test the output once per line, */
- /* that will catch any write errors. */
-
- if( fputc( '\n', fp_out ) != '\n' )
- {
- fprintf( stderr, "FATAL... Unable to write output.\n\n" );
- exit( 1 );
- }
-
- return ;
- }