home *** CD-ROM | disk | FTP | other *** search
- /*
- * usage: p_fixed control_file fixed_length_input > ASCII_output
- *
- * P_FIXED Converts a fixed record length file to ASCII with field
- * numbers. A control file governs field lengths and
- * handling of empty data.
- *
- * input: [1] A control file as in P_FIXED.CTL (also appears at
- * end of source code).
- * [2] The fixed length records data
- *
- * output: ASCII output with one or more lines per field. New records
- * are signalled by a line containing 000; all other lines
- * begin with a three digit field number. Non-printable
- * characters are shown in hex format with leading backslash.
- * Additional processing may be needed to bring individual
- * fields into production indexing format.
- *
- * writeup: MIR TUTORIAL ONE, topic 9
- *
- * Written: Douglas Lowry May 11 92
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * (file 05LICENS) along with this program; if not, write to the
- * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
- #include <stdio.h>
- #include <ctype.h>
-
-
- #define repeat for(;;)
- #define MAX_BYTES 2048
-
- #define BLANKS 1 /* retain field if blank */
- #define ZEROS 2 /* retain field if zeros */
- #define NULLS 4 /* retain field if nulls */
- #define L_BLANKS 8 /* retain leading blanks in field */
- #define L_ZEROS 16 /* retain leading zeros in field */
- #define T_BLANKS 32 /* retain trailing blanks in field */
- #define S_RIGHT 64 /* start byte right nybble only */
- #define E_LEFT 128 /* end byte left nybble only */
-
- /*
- * declarations
- */
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool;
-
- void Usage_(), process();
- Bool load_ctl(), isolate() ;
- char *Cmdname_() { return( "p_fixed" ); }
-
- /*
- * Global variables
- */
- static unsigned char
- flag[ 1000 ] ; /* Bits set for each field per define
- statements above. 0 is unused. */
- static short int
- rec_len, /* bytes in fixed length record */
- bgn[ 1000 ], /* starting byte, each field */
- fin[ 1000 ], /* ending ditto. 0 is used for
- record length */
- high_fld ; /* highest active field */
- /*
- * MAIN
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- FILE *fp ;
- char c10 ;
-
- /* usage: p_fixed control_file fixed_length_input > ASCII_output */
-
- if( argc != 3 )
- Usage_() ;
- c10 = argv[1][0] ;
- if( c10 == '-' || c10 == '/' || c10 == '?' )
- Usage_() ;
-
- if(( fp = fopen( argv[1], "r" )) == NULL )
- {
- fprintf( stderr, "\nUnable to control file %s.\n", argv[1] );
- Usage_();
- }
-
- if( load_ctl( fp ))
- {
- fprintf( stderr, "FATAL errors in control file %s.\n", argv[1] );
- fclose( fp ) ;
- exit( 1 ) ;
- }
- fclose( fp ) ;
-
- if(( fp = fopen( argv[2], "rb" )) == NULL )
- {
- fprintf( stderr, "\nUnable to open data file %s.\n", argv[2] );
- Usage_();
- }
-
- process( fp ) ;
-
- fclose( fp );
- exit( 0 );
- }
- /*
- * Usage
- */
- void
- Usage_()
- {
- fprintf( stderr,
- "\nUsage: %s control_file fixed_length_input > ASCII_output\n\n\
- Converts a fixed record length file to ASCII with field\n\
- numbers. A control file governs field lengths and\n\
- handling of empty data.\n\n\
- input: [1] A control file as in P_FIXED.CTL (also appears at\n\
- end of source code).\n", Cmdname_() );
- fprintf( stderr,
- " [2] The fixed length records data\n\n\
- output: ASCII output with one or more lines per field. New records\n\
- are signalled by a line containing 000; all other lines\n\
- begin with a three digit field number. Non-printable\n" ) ;
- fprintf( stderr,
- " characters are shown in hex format with leading backslash.\n\
- Additional processing may be needed to bring individual\n\
- fields into production indexing format.\n\n\
- writeup: MIR TUTORIAL ONE, topic 9\n\n" ) ;
- exit( 1 ) ;
- }
- /*
- * LOAD_CTL
- */
- Bool
- load_ctl( fp )
- FILE *fp ;
- {
- char buf[ 90 ] ;
- Bool line_okay ;
- int line_item, /* token # within one line */
- from, upto, /* limits of a token */
- foul_ups, /* count of bad lines */
- fld, /* field number */
- len, pt, i ;
-
- high_fld = foul_ups = rec_len = 0 ;
- for( i = 0 ; i < 1000 ; i++ )
- {
- flag[ i ] = 0 ;
- bgn[ i ] = fin[ i ] = -1 ;
- }
-
- while( fgets( buf, 80, fp ) != NULL )
- {
- len = strlen( buf ) - 1 ;
- while( isspace( buf[ len -1 ] ))
- len-- ;
- buf[ len ] = '\0' ;
- line_okay = TRUE ;
- if( buf[0] == '\032' )
- break ;
- if( !len || buf[0] == '#' )
- continue ; /* empty, or comment line */
-
- for( i = 0 ; i < len ; i++ )
- {
- if( islower( buf[ i ] ))
- buf[ i ] = toupper( buf[ i ] ) ;
- }
- pt = -1 ;
- line_item = 0 ;
-
- while( !isolate( buf, len, pt, &from, &upto ))
- {
- if( !line_item )
- {
- fld = atoi( &buf[ from ] ) ;
- if( fld < 0 || fld > 999 )
- {
- fprintf( stderr,
- "FATAL... Control line should start with a field number between\n\
- 1 and 999. Check your control file.\n\n" ) ;
- return( 1 ) ;
- }
- }
- else if( line_item == 1 )
- {
- bgn[ fld ] = atoi( &buf[ from ] ) ;
- if( buf[ upto ] == 'R' )
- flag[ fld ] |= S_RIGHT ;
- }
- else if( line_item == 2 )
- {
- fin[ fld ] = atoi( &buf[ from ] ) ;
- if( buf[ upto ] == 'L' )
- flag[ fld ] |= E_LEFT ;
- if( bgn[ fld ] > fin[ fld ] )
- line_okay = FALSE ;
- }
- else if( from == upto )
- {
- if( buf[ from ] == 'B' )
- flag[ fld ] |= BLANKS ;
- else if( buf[ from ] == 'Z' )
- flag[ fld ] |= ZEROS ;
- else if( buf[ from ] == 'N' )
- flag[ fld ] |= NULLS ;
- else
- line_okay = FALSE ;
- }
- else if( upto == ( from + 1 ) && buf[ from ] == 'L' )
- {
- if( buf[ upto ] == 'B' )
- flag[ fld ] |= L_BLANKS ;
- else if( buf[ upto ] == 'Z' )
- flag[ fld ] |= L_ZEROS ;
- else
- line_okay = FALSE ;
- }
- else if( upto == ( from + 1 ) && buf[ from ] == 'T' )
- {
- if( buf[ upto ] == 'B' )
- flag[ fld ] |= T_BLANKS ;
- else
- line_okay = FALSE ;
- }
- else
- line_okay = FALSE ;
-
- if( !line_okay )
- {
- fprintf( stderr,
- "Unexpected line in control file...\n\t%s\n", buf );
- if( foul_ups++ > 4 )
- return( 1 ) ;
- break ;
- }
- pt = upto ;
- line_item++ ;
- }
-
- /* Finish off input line */
-
- if( fld > high_fld )
- high_fld = fld ;
- }
-
- rec_len = fin[ 0 ] ;
- if( rec_len > MAX_BYTES )
- {
- fprintf( stderr, "Record length exceeds %d bytes... recompile.\n",
- MAX_BYTES ) ;
- return( 1 ) ;
- }
- if( !rec_len )
- {
- fprintf( stderr, "Record length not specified... FATAL.\n" ) ;
- return( 1 ) ;
- }
-
- if( foul_ups )
- return( 1 ) ;
- return( 0 ) ;
- }
- /*
- * ISOLATE - Find limits of next token in a buffer
- */
- Bool
- isolate( buf, len, pt, from, upto )
- char buf[] ;
- int pt, len,
- *from, *upto ;
- {
- int local_pt ;
-
- local_pt = pt + 1 ;
- while( isspace( buf[ local_pt ] ) && buf[ local_pt ] )
- local_pt++ ;
-
- if( local_pt > len - 1 )
- return( 1 ) ;
-
- *from = local_pt ;
-
- while( !isspace( buf[ local_pt ] ) && buf[ local_pt ] )
- local_pt++ ;
-
- *upto = local_pt - 1 ;
-
- return( 0 ) ;
- }
- /*
- * PROCESS
- */
- void
- process( fp )
- FILE *fp ;
- {
- unsigned char buf[ MAX_BYTES ],
- uc,
- first_save, last_save ;
- long int records ;
- int from, upto,
- fld, /* field number */
- len, i ;
-
- records = 0 ;
-
- while(( len = fread( buf, sizeof( char ), rec_len, fp )) > 0 )
- {
- if( len < rec_len )
- break ; /* assuming end */
-
- records++ ;
-
- if( !printf( "000 \n" ))
- {
- fprintf( stderr, "FATAL write error.\n\n" ) ;
- exit( 1 ) ; /* error check once per record is enough */
- }
-
- for( fld = 1 ; fld <= high_fld ; fld++ )
- {
- if( bgn[ fld ] == -1 )
- continue ; /* no such field */
-
- if( flag[ fld ] & S_RIGHT )
- {
- first_save = buf[ bgn[ fld ] ] ;
- buf[ bgn[ fld ] ] &= 0x0f ;
- }
- if( flag[ fld ] & E_LEFT )
- {
- last_save = buf[ fin[ fld ] ] ;
- buf[ fin[ fld ] ] >>= 4 ;
- }
- from = upto = -1 ;
- for( i = bgn[ fld ] ; i <= fin[ fld ] ; i++ )
- {
- uc = buf[ i ] ;
- if( uc == '\0' )
- continue ;
- if( uc == ' ' && !( flag[fld] & L_BLANKS ))
- continue;
- if( uc == '0' && !( flag[fld] & L_ZEROS ))
- continue;
- from = i ;
- break ;
- }
-
- if( from > -1 )
- {
- upto = fin[ fld ] ;
- if( !( flag[fld] & T_BLANKS ))
- {
- while( buf[ upto ] == ' ' )
- upto-- ;
- }
- }
- else /* no data found */
- {
- uc = buf[ bgn[ fld ] ] ;
- if( (( flag[fld] & BLANKS ) && uc == ' ' ) ||
- (( flag[fld] & ZEROS ) && uc == '0' ) ||
- (( flag[fld] & NULLS ) && uc == '\0' ) )
- from = upto = bgn[ fld ] ; /* 1st char only */
- }
-
- /* print out result if data or retention */
-
- if( from > -1 )
- {
- printf( "%03d ", fld ) ;
- for( i = from ; i <= upto ; i++ )
- {
- if( isprint( buf[ i ] ) && buf[i] != '\\' )
- putchar( buf[ i ] ) ;
- else
- printf( "\\%02x", buf[i] ) ;
- }
- putchar( '\n' ) ;
- }
-
- if( flag[ fld ] & S_RIGHT )
- buf[ bgn[ fld ] ] = first_save ;
- if( flag[ fld ] & E_LEFT )
- buf[ fin[ fld ] ] = last_save ;
- }
- }
-
- putchar( '\032' ) ;
-
- fprintf( stderr, "Processed %ld records.\n\n", records ) ;
- return ;
- }
- /* P_FIXED.CTL TEMPLATE May 11, 1992
- # Edit a copy of this file to use with P_FIXED.EXE in order
- # to break out fixed length records. Each line consists of
- # three numbers and zero or more codes; each element is separated
- # by one or more blanks. The numbers are:
- # field number
- # start byte (followed by R if right half of byte only)
- # end byte (followed by L if left half of byte only)
- # A special line must be included with field number 0, begin byte
- # 0, and end byte = last byte of record (i.e., record length - 1 ).
- #
- # Comment lines may be included. Each must start with #
- #
- # The codes that follow the three numbers are:
- # B retain field if blank
- # Z retain field if zeros
- # N retain field if nulls
- # LB retain leading blanks in field
- # LZ retain leading zeros in field
- # TB retain trailing blanks in field
- #
- 0 0 345
- 1 0 11 B
- 2 12 15
- 3 16 26 TB
- etc.
- */