home *** CD-ROM | disk | FTP | other *** search
- /*
- * SEE MIR TUTORIAL ONE, TOPIC 9.5 ... THIS IS OLD CODE THAT
- * HAS NOT BEEN UPGRADED FOR LACK OF SAMPLE DATA.
- *
- * P_MARC - preprocesses Marc records to standard format.
- * See document "MARC_REC" dated June 13, 1986 by
- * Doug Lowry for detail.
- *
- * written: dblowry 31 july 86
- * modified: dblowry 04 march 87
- * Copyright (c) 1987 Innotech Inc.
- *
- * usage: p_marc [2] filename(s)
- * or p_marc [2] which takes input from stdin
- * The option "2" is used to specify MARC II format,
- * which is the same as MARC except that the block and
- * record size values are omitted.
- *
- * input: mrc_fields containing client fld identifiers
- * and Reteaco fld nums & inv controls
- * stdin, or files
- *
- * output: filename.orig or stdout if using stdin for input
- *
- * note: this version makes use of the 4th column to specify how
- * specific fields are to be inverted and displayed.
- */
-
- #include <local.h>
-
- #ifndef STDIO_INCLUDED
- #include <stdio.h>
- #endif
-
- #ifndef CTYPE_INCLUDED
- #include <ctype.h>
- #endif
-
- /*
- * declarations
- */
-
- void load_inv_controls(), pre_process(), parse_directory();
- void proc_field();
- int get_data();
-
- #define NUM_FLDS 999 /* max number of client fields */
- #define NUM_CTLS 5 /* a safe (?) upper limit */
- #define CLI_FLD_LEN_MAX 4 /* length of client supplied fld*/
-
- #define RET_FLD_LEN 4 /* length of Reteaco fld num */
- #define CTL_COL 3 /* Reteaco control column */
- #define MAX_DAT_BYTES 9999 /* max bytes in record data */
- #define MAX_DIR_ENT 500 /* max entries in directory */
-
- /*
- * global variables
- */
-
- long file_location; /* cumulative bytes read, this file */
- Bool marcII; /* MARC II format */
-
- struct _mrc_direc {
- int mrc_field_num;
- int mrc_field_len;
- int mrc_field_offset;
- } direc[ MAX_DIR_ENT ];
- int direc_entries;
-
- struct _inv_ctl {
- char client_fld_id[ CLI_FLD_LEN_MAX + 1 ];
- char first_four[ NUM_CTLS ][ 4 ];
- } ctl[ NUM_FLDS ];
-
- int num_fields;
-
- char data_buf[ MAX_DAT_BYTES ];
-
- /*
- * MAIN -
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- int file, bgn, num_files;
- char fname[ BUFSIZ ];
- FILE *fp_in, *fp_out;
-
- marcII = FALSE;
- if( argc > 1 && strlen( argv[ 1 ] ) == 1 && argv[1][0] == '2'
- )
- marcII = TRUE;
- num_files = argc - 1;
- if( marcII )
- num_files--;
-
- load_inv_controls();
- /* case of no files */
- if( !num_files )
- {
- fp_in = stdin;
- fp_out = stdout;
- file_location= 0;
-
- pre_process( fp_out, fp_in );
-
- exit( 0 );
- }
-
- /* altenately do each file on command line */
- bgn = 1;
- if( marcII && num_files )
- bgn = 2;
- for( file = bgn; file < argc; ++file )
- {
- if ((fp_in = fopen( argv[ file ], "r" )) == NULL )
- {
- warning("%s: file \"%s\" does not exist\n",
- argv[ 0 ], argv[ file ] );
- continue;
- }
- sprintf( fname, "%s.orig", argv[ file ] );
- fp_out = fopen( fname, "w" );
- file_location= 0;
-
- pre_process( fp_out, fp_in );
-
- fclose( fp_in );
- fclose( fp_out );
- }
-
- exit( 0 );
- }
-
- /*
- * LOAD_INV_CONTROLS - loads in the Reteaco field identifiers and
- inversion
- * control characters corresponding to client's
- * field identifiers. The info is assumed to be in
- * the file "mrc_fields".
- */
-
- static void
- load_inv_controls()
- {
- FILE *fp;
- char buf[ BUFSIZ ];
- int i, j, col, fld;
-
- fp = e_fopen( "mrc_fields", "r" );
-
- for( fld = 0; fgets( buf, BUFSIZ, fp ) > 0; ++fld )
- {
- /* client's fld id */
- for( col= 0; isalnum( buf[ col ] ) && col < CLI_FLD_LEN_MAX
- ;
- ++col )
- ctl[ fld ].client_fld_id[ col ] = buf[ col ];
- ctl[ fld ].client_fld_id[ col ] = '\0';
-
- for( j = col ; isspace( buf[j] ); ++j )
- ;
- /* Reteaco's fld num & ctl */
- for( i = 0; buf[j] != '\t'; j += RET_FLD_LEN, ++i )
- {
- if ( buf[ j+3 ] == '0' ) buf[ j+3 ] = ' ';
- strncpy( ctl[fld].first_four[i], &buf[j], RET_FLD_LEN
- );
- ctl[fld].first_four[ i ][ RET_FLD_LEN ] = '\0';
- }
- if( i < NUM_CTLS ) ctl[fld].first_four[ i ][ 0 ] = '\0';
- }
-
- num_fields = fld;
- fclose( fp );
- return;
- }
-
- /*
- * PRE_PROCESS - Preprocesses text from the input file and writes
- * result to supplied output file.
- */
-
- static void
- pre_process( fp_out, fp_in )
- FILE *fp_out, *fp_in;
- {
- int field;
-
- while( get_data( fp_in ) != EOF )
- {
- fputs( "000 \n", fp_out );
- for( field= 0; field < direc_entries; ++field)
- proc_field( field, fp_out );
- }
- return;
- }
-
- /*
- * GET_DATA - Pulls in block header (if needed), then record
- header
- * and contents.
- */
-
- #define BLK_SIZ 80
-
- static int
- get_data( fp )
- FILE *fp;
- {
- static short latest_block; /* most recent block size
- read */
- static short block_bytes; /* cumulative bytes this
- block */
- static short record_bytes; /* total bytes in record */
- short direc_bytes; /* bytes in record directory */
- short data_bytes; /* bytes in record data fields */
- char leader[24];
- int four0, /* 4 if MARC, 0 if MARC II */
- i;
- short zilch;
-
- four0 = 4;
- if( marcII )
- four0 = 0;
- if ( file_location == 0 )
- {
- record_bytes= 0; /* initialize once per file */
- latest_block= 0;
- block_bytes= 0;
- }
- block_bytes += record_bytes;
-
- /* ...If block header due to be read, attempt to evaluate it */
-
- if ( !marcII && block_bytes == latest_block)
- {
- if(( fread( &latest_block, sizeof(short), 1, fp ) < 1 ) ||
- ( fread( &zilch, sizeof(short), 1, fp ) < 1 ))
- return( EOF );
- if ( zilch )
- error( "Block size out of sync at byte %D\n",
- file_location );
- block_bytes= 4;
- file_location += 4;
- }
-
- /* ...Read in the record size */
-
- if( !marcII )
- {
- if(( fread( &record_bytes, sizeof(short), 1, fp ) < 1 ) ||
- ( fread( &zilch, sizeof(short), 1, fp ) < 1 ))
- error( "Failure reading record size starting\
- at byte %D\n", file_location );
- if ( zilch )
- error( "Record size out of sync at byte %D\n",
- file_location );
- file_location += 4;
- }
-
- /* Get and analyze the 24 byte marc record leader */
- /* ...Activate the commented out items below when needed. */
-
- for ( i= 0; i < 24 ; i++ )
- {
- leader[i]= fgetc( fp );
- if ( leader[i] == EOF )
- return( EOF );
- }
- /* rec_status= leader[5]; */
- /* rec_type= leader[6]; */
- /* biblio_cat= leader[7]; */
- leader[5]= ' ';
- if( marcII )
- record_bytes = atoi( leader );
- else if ((atoi( &leader[0] ) + four0 ) != record_bytes)
- error( "Record length inconsistent at byte %D\n",
- file_location );
- /* indicator_count= leader[10] - '0'; */
- leader[17]= ' ';
- direc_bytes= atoi( &leader[12] ) - 24;
- file_location += 24;
-
- /* Now load the directory */
-
- if ( direc_bytes > MAX_DIR_ENT * 12 ||
- direc_bytes > MAX_DAT_BYTES )
- error ( "Oversize directory at byte %D\n", file_location);
- if ( fread( data_buf, sizeof(char), direc_bytes, fp ) <
- direc_bytes )
- error( "Failure reading directory starting at byte %D\n",
- file_location );
- file_location += direc_bytes;
-
- /* ... and place directory into field structure */
-
- if ( direc_bytes % 12 != 1 )
- error( "Directory size not 12*N + 1 at byte %D\n",
- file_location );
- direc_entries= direc_bytes/12;
- parse_directory();
-
- /* Now load the data stream */
-
- data_bytes= record_bytes - four0 - 24 - direc_bytes;
- if ( data_bytes > MAX_DAT_BYTES )
- error ( "Oversize record data at byte %D\n",
- file_location);
- if ( fread( data_buf, sizeof(char), data_bytes, fp ) <
- data_bytes )
- error( "Failure reading record data starting at byte %D\n",
- file_location );
- file_location += data_bytes;
-
- return( 0 );
- }
-
- /*
- * PARSE_DIRECTORY - Place ASCII of record directory into
- structure
- */
-
- static void
- parse_directory()
- {
- int entry,i;
- int off; /* offset within directory */
- char num[4],len[5],offset[6];
-
- off= 0;
- num[3] = len[4] = offset[5] = ' ';
-
- for( entry= 0; entry < direc_entries ; ++entry )
- {
- for( i= 0; i < 3 ; ++i )
- num[ i ] = data_buf[ off++ ];
- direc[entry].mrc_field_num = atoi( num );
- for( i= 0; i < 4 ; ++i )
- len[ i ] = data_buf[ off++ ];
- direc[entry].mrc_field_len = atoi( len );
- for( i= 0; i < 5 ; ++i )
- offset[ i ] = data_buf[ off++ ];
- direc[entry].mrc_field_offset = atoi( offset );
- }
- return;
- }
- /*
- * PROC_FIELD - Preprocess a single field
- */
-
- static void
- proc_field( field, fpo )
- int field;
- FILE *fpo;
- {
- int low,high; /* limits within data stream */
- int ctli, col, num, i;
- char c, nxt, four[5];
-
- low= direc[ field ].mrc_field_offset;
- high= low + direc[ field ].mrc_field_len - 1;
- num= direc[ field ].mrc_field_num - 1;
- four[4] = '\0';
-
- for ( ctli= 0; ctli < NUM_CTLS ; ++ctli )
- {
- if ( !ctl[ field ].first_four[ ctli ][ 0 ] ) break;
- strncpy( four, ctl[num].first_four[ctli], 4 );
- if ( four[3] == 'w' ) break; /* w= no invert, no display
- */
- fputs( four, fpo );
- col= 4;
- for ( i= low; i <= high; ++i)
- {
- if (( c= data_buf[i] ) == 037 ) /* begin sub-field */
- {
- i++; /* swallow next char */
- if ( col <= 4 )
- continue;
- fputc( '\n', fpo );
- col= 0;
- continue;
- }
- /* ...If at end of sub-field or line getting too long, set for new
- line */
- if ( c == 036 || ( col > 70 && isspace( c )) )
- {
- fputc( '\n', fpo );
- col= 0;
- continue;
- }
- /* ...Swallow other non-printing characters, beginning semi-colon
- */
- if ( !isprint( c ))
- continue;
- if ( col == 4 && ( c == ';' || c == ' ' ))
- continue;
- /* ...Prior to writing, set up lead 4 columns if needed */
- if ( col < 4 )
- {
- fputs( " ", fpo );
- fputc( four[3], fpo );
- col= 4;
- }
- if( c == ' ' )
- {
- nxt = data_buf[i+1];
- if( nxt == ' ' || !isprint( nxt ) )
- continue;
- }
- fputc( c, fpo );
- col++;
- }
- }
- if( c == 035 )
- fputc( '\n', fpo );
- return;
- }