home *** CD-ROM | disk | FTP | other *** search
- /*
- * Usage - head file_name [ line_count ] [/a][/t] > text
- *
- * HEAD Displays in printable format the first line_count lines
- * within a file; the default is 10 lines. This clone of
- * the UNIX HEAD and TAIL utilities provides a quick check on
- * the likely contents of a file. If the "/a" option is used,
- * accented characters are treated as printable text. If
- * "/t" is specified, the display is of the TAIL of the
- * file, the LAST line_count lines.
- *
- * input: Normally an ASCII text file.
- *
- * output: The specified number of lines is either displayed on the
- * screen or sent to a file. Each non-printable character is
- * replaced by an ^ symbol. If any line length exceeds 120
- * characters, a warning is issued. If any line length exceeds
- * 1024 or the file includes null bytes, the program advises
- * that the target file is not ASCII text.
- *
- * writeup: MIR TUTORIAL ONE, topic 5
- *
- * Written: Douglas Lowry Jan 10 92
- * Modified: Douglas Lowry May 11 92 Correct re small files
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License (file 05LICENS) along with this program; if not,
- * write to the Free Software Foundation, Inc., 675 Mass Ave,
- * Cambridge, MA 02139, USA.
- */
-
- #include <stdio.h>
- #include <ctype.h>
- #include <io.h>
-
- #define MAX_BYTES 1024
-
- #define repeat for(;;)
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool;
- /*
- * declarations
- */
-
- void Usage_(), process();
- Bool locate() ;
- char *Cmdname_() { return( "head" ); }
-
- /*
- * MAIN
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- char c10 ; /* argv[1][0] */
- FILE *fp ;
- Bool accent, /* user wants accented chars to show */
- tail, /* show last lines, not first lines */
- tag ; /* argument is a tag (-.. or /..) */
- int i ;
- long int line_ct ;
-
- /* Usage - head file_name [ line_count ] [/a][/t] > text */
-
- c10 = argv[1][0] ;
- if( argc < 2 || argc > 5 || c10 == '-' || c10 == '/' || c10 == '?' )
- Usage_();
-
- if(( fp = fopen( argv[1], "rb" )) == NULL )
- {
- fprintf( stderr, "Can't open file %s\n", argv[1] ) ;
- Usage_() ;
- }
-
- line_ct = 10 ;
- accent = tail = FALSE ;
- for( i = 2 ; i < argc ; i++ )
- {
- tag = FALSE ;
- if( argv[i][0] == '-' || argv[i][0] == '/' )
- tag = TRUE ;
- if( islower( argv[i][1] ))
- argv[i][1] = toupper( argv[i][1] ) ;
- if( tag && argv[i][1] == 'A' )
- accent = TRUE ;
- else if( tag && argv[i][1] == 'T' )
- tail = TRUE ;
- else
- {
- line_ct = atol( argv[i] );
- if( line_ct < 1 )
- line_ct = 10 ;
- }
- }
-
- process( fp, line_ct, accent, tail );
-
- fclose( fp ) ;
- exit( 0 );
- }
- /*
- * Usage
- */
- void
- Usage_()
- {
- fprintf( stderr,
- "usage: %s file_name [ line_count ] [/a][/t] > text\n\n\
- Displays in printable format the first line_count lines\n\
- within a file; the default is 10 lines. This clone of\n\
- the UNIX HEAD and TAIL utilities provides a quick check on\n",
- Cmdname_() );
- fprintf( stderr,
- " the likely contents of a file. If the \"/a\" option is used,\n\
- accented characters are treated as printable text. If\n\
- \"/t\" is specified, the display is of the TAIL of the\n\
- file, the LAST line_count lines.\n\n" ) ;
- fprintf( stderr,
- "input: Normally an ASCII text file.\n\n\
- output: The specified number of lines is either displayed on the\n\
- screen or sent to a file. Each non-printable character is\n\
- replaced by an ^ symbol. If any line length exceeds 120\n\
- characters, a warning is issued. If any line length exceeds\n" );
- fprintf( stderr,
- " 1024 or the file includes null bytes, the program advises\n\
- that the target file is not ASCII text.\n\n\
- writeup: MIR TUTORIAL ONE, topic 5\n\n" ) ;
- exit( 1 ) ;
- }
-
- #define NON_PRINT 0
- #define WHITE_SPACE 1
- #define PUNCTUATION 2
- #define DIGIT 3
- #define CONSONANT 4
- #define VOWEL 5
- #define HI_CONSONANT 6
- #define HI_VOWEL 7
- #define TYPE_CT 8 /* count of above types */
-
- /*
- * PROCESS
- */
- void
- process( fp, line_ct, accent, tail )
- FILE *fp ;
- long int line_ct ;
- Bool accent, /* user wants accented chars to show */
- tail ; /* show last lines, not first lines */
- {
- unsigned char table[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* ctls */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ctls */
- /* bl ! " # $ % & ' ( ) * + , - . / */
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
- /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
- /* @ A B C D E F G H I J K L M N O */
- 4, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
- /* P Q R S T U V W X Y Z [ \ ] ^ _ */
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 2, 2, 2, 2, 2,
- /* ` a b c d e f g h i j k l m n o */
- 2, 5, 4, 4, 4, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5,
- /* p q r s t u v w x y z { | } ~ NULL */
- 4, 4, 4, 4, 4, 5, 4, 4, 4, 5, 4, 2, 2, 2, 2, 0,
- /* Ç ü é â ä à å ç ê ë è ï î ì Ä Å */
- 6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7,
- /* É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ */
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0,
- /* á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » */
- 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- unsigned char line_in[ MAX_BYTES ] ;
- Bool printable, /* file is ASCII text */
- at_eof ; /* at end of file */
- long int over_120, /* To warn re line length */
- line_no;
- int ch, /* one character */
- len, i ;
-
- over_120 = 0 ;
- printable = TRUE ;
- at_eof = FALSE ;
-
- if( tail )
- {
- if( !locate( fp, line_ct ))
- return ;
- }
-
- for( line_no = 0 ; line_no < line_ct ; line_no++ )
- {
- for( len = 0 ; len < MAX_BYTES ; len++ )
- {
- if(( ch = fgetc( fp )) == EOF )
- {
- if( feof( fp ))
- {
- at_eof = TRUE ;
- break ;
- }
- }
- if( isprint( ch ) || ch == '\t' )
- line_in[ len ] = ch ;
- else if( !ch )
- {
- printable = FALSE ;
- break ;
- }
- else if( ch == '\n' )
- break ;
- else if( ch == '\015' || ch == '\032' )
- len-- ; /* delete carriage return, EOF */
- else if( !accent )
- line_in[ len ] = '^' ;
- else if( table[ch] == NON_PRINT )
- line_in[ len ] = '^' ;
- else
- line_in[ len ] = ch ;
- }
- line_in[ len ] = '\0' ;
-
- if( len > MAX_BYTES - 2 || !printable )
- {
- fprintf( stderr,
- "\nNot printable ASCII. Use f_print filter for display.\n" ) ;
- fclose( fp ) ;
- Usage_();
- }
- if( len > 120 )
- over_120++ ;
-
- if( puts( line_in ))
- {
- fprintf( stderr, "Unable to write... FATAL.\n\n" ) ;
- exit( 1 ) ;
- }
-
- if( at_eof )
- break ;
- }
-
- if( over_120 )
- fprintf( stderr, "\n*** %d LINES OVER 120 BYTES LONG ***\n\n",
- over_120 ) ;
- return ;
- }
- /*
- * LOCATE - Find beginning point for the last line_ct lines
- */
- Bool
- locate( fp, line_ct )
- FILE *fp ;
- long int line_ct ;
- {
- unsigned char buf[ MAX_BYTES ] ;
- Bool gotcha ; /* found desired offset */
- long int line_no,
- buf_len,
- fil_len,
- bgn_at ; /* offset of buffer start */
- int i ;
-
- fil_len = filelength( fileno( fp )) ;
- if( fil_len < 1 )
- return( FALSE ) ;
- bgn_at = fil_len ;
- line_no = 0 ;
- gotcha = FALSE ;
-
- while( !gotcha )
- {
- bgn_at -= MAX_BYTES ;
- if( bgn_at < 0 )
- bgn_at = 0 ;
- if( fseek( fp, bgn_at, SEEK_SET ))
- {
- fprintf( stderr, "Unable to position file. FATAL!\n\n" );
- return( FALSE ) ;
- }
- buf_len = fread( buf, sizeof( char ), MAX_BYTES, fp ) ;
- if( fil_len >= MAX_BYTES && buf_len < MAX_BYTES )
- {
- fprintf( stderr, "Trouble reading back in file. FATAL!\n\n" );
- return( FALSE ) ;
- }
-
- for( i = buf_len - 1 ; i > -1 ; i-- )
- {
- if( buf[i] == '\n' )
- {
- if( ++line_no > line_ct )
- {
- bgn_at += ( i + 1 ) ;
- gotcha = TRUE ;
- break ;
- }
- }
- if( !buf[i] ) /* null byte */
- {
- fprintf( stderr,
- "\nNot printable ASCII. Use f_print filter for display.\n" ) ;
- fclose( fp ) ;
- Usage_();
- }
- if( !bgn_at )
- {
- gotcha = TRUE ;
- break ; /* Must start at beginning */
- }
- }
- }
-
- if( fseek( fp, bgn_at, SEEK_SET ))
- {
- fprintf( stderr, "Unable to position file. FATAL!\n\n" );
- return( FALSE ) ;
- }
-
- return( TRUE ) ;
- }