home *** CD-ROM | disk | FTP | other *** search
- /*
- * usage: replace1 input output [ replacement_table ]
- *
- * REPLACE1 Replaces each byte in an input file with exactly one
- * alternative ASCII character. The default replacement table
- * built into the program is one that replaces high-bit-set
- * graphic characters with reasonable equivalents. An alternate
- * table may be used simply by naming it as an argument. This
- * high speed 1 for 1 replacement can be used for anything from
- * EBCDIC/ASCII conversion to a crude encryption.
- *
- * input: [1] Any file
- * [2] The optional replacement table consists of ASCII lines in
- * any order. Each line has two columns separated by white space.
- * The first column in each line is the incoming character or a
- * range of characters separated by only a hyphen; the second
- * column is the replacement. Non-printable characters are shown
- * by a backslash followed by two hex digits (for example, \08 for
- * backspace). Backslash itself is shown by \5C. Any characters
- * not in the replacement table are retained.
- *
- * output: File same size as input with some/all characters replaced.
- *
- * writeup: MIR TUTORIAL TWO, topic 6
- *
- * Written: Douglas Lowry Mar 06 92
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * (file 05LICENS) along with this program; if not, write to the
- * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
- #include <stdio.h>
-
- #define repeat for(;;)
- #define MAX_BYTES 2048
-
- /*
- * declarations
- */
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool;
-
- void Usage_(), process(), load_table() ;
- unsigned char get_hex() ;
- char *Cmdname_() { return( "replace1" ); }
-
- /*
- * MAIN
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- /* This table strips accents, turns box characters to asterisks, bar
- characters to equals, pointers to o, and all else from hex 7f up
- to blanks */
-
- unsigned char table[256] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- ' ','!','"','#','$','%','&', 39,'(',')','*','+',',','-','.','/',
- '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
- '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
- 'P','Q','R','S','T','U','V','W','X','Y','Z','[', 92,']','^','_',
- '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
- 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~', 32,
- /* Ç ü é â ä à å ç ê ë è ï î ì Ä Å */
- 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','A','A',
- /* É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ */
- 'E','a','A','o','o','o','u','u','y','o','u', 32, 32, 32, 32, 32,
- /* á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » */
- 'a','i','o','u','n','N','a','o', 32, 32, 32, 32, 32, 32,'o','o',
- /* ░ ▒ ▓ │ ┤ ╡ ╢ ╖ ╕ ╣ ║ ╗ ╝ ╜ ╛ ┐ */
- '=','=','=', 32, 32, 32, 32, 32, 32, 32,'*','*','*', 32, 32, 32,
- /* └ ┴ ┬ ├ ─ ┼ ╞ ╟ ╚ ╔ ╩ ╦ ╠ ═ ╬ ╧ */
- 32, 32, 32, 32, 32, 32, 32, 32,'*','*', 32, 32, 32,'*', 32, 32,
- /* ╨ ╤ ╥ ╙ ╘ ╒ ╓ ╫ ╪ ┘ ┌ █ ▄ ▌ ▐ ▀ */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ φ ε ∩ */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■ */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
- };
-
- FILE *fp, *fp_out ;
- char c10 ;
- Bool get_table ; /* user specifies table name */
-
- /* usage: replace1 input output [ replacement_table ] */
-
- if( argc < 3 || argc > 4 )
- Usage_() ;
- c10 = argv[1][0] ;
- if( c10 == '-' || c10 == '/' || c10 == '?' )
- Usage_() ;
-
- if( argc == 4 )
- {
- if(( fp = fopen( argv[3], "r" )) == NULL )
- {
- fprintf( stderr, "\nUnable to open file %s.\n", argv[3] );
- Usage_();
- }
- load_table( fp, table ) ;
- fclose( fp ) ;
-
- }
-
- if(( fp = fopen( argv[1], "rb" )) == NULL )
- {
- fprintf( stderr, "\nUnable to open file %s.\n", argv[1] );
- Usage_();
- }
-
- if(( fp_out = fopen( argv[2], "wb" )) == NULL )
- {
- fprintf( stderr, "\nUnable to open file %s.\n", argv[2] );
- Usage_();
- }
-
- process( fp, fp_out, table ) ;
-
- fclose( fp );
- fclose( fp_out );
- exit( 0 );
- }
- /*
- * Usage
- */
- void
- Usage_()
- {
- fprintf( stderr,
- "Usage: %s input output [ replacement_table ]\n\n\
- Replaces each byte in an input file with exactly one\n\
- alternative ASCII character. The default replacement table\n\
- built into the program is one that replaces high-bit-set\n",
- Cmdname_() );
- fprintf( stderr,
- " graphic characters with reasonable equivalents. An alternate\n\
- table may be used simply by naming it as an argument. This\n\
- high speed 1 for 1 replacement can be used for anything from\n\
- EBCDIC/ASCII conversion to a crude encryption.\n\
- input: [1] Any file\n" ) ;
- fprintf( stderr,
- " [2] The optional replacement table consists of ASCII lines in\n\
- any order. Each line has two columns separated by white space.\n\
- The first column in each line is the incoming character or a\n\
- range of characters separated by only a hyphen; the second\n" ) ;
- fprintf( stderr,
- " column is the replacement. Non-printable characters are shown\n\
- by a backslash followed by two hex digits (for example, \\08 for\n\
- backspace). Backslash itself is shown by \\5C. Any characters\n\
- not in the replacement table are retained.\n\
- output: File same size as input with some/all characters replaced.\n" ) ;
- fprintf( stderr, "writeup: MIR TUTORIAL TWO, topic 6\n\n" ) ;
-
- exit( 1 ) ;
- }
- /*
- * PROCESS
- */
- void
- process( fp_in, fp_out, table )
- FILE *fp_in, *fp_out ;
- unsigned char table[256] ;
- {
-
- unsigned char buf[ MAX_BYTES ],
- out;
- int len, i ;
-
- while(( len = fread( buf, sizeof( char ), MAX_BYTES, fp_in )) > 0 )
- {
- for( i = 0; i < len ; i++ )
- {
- out = table[ buf[ i ] ];
- if( fputc( out, fp_out ) != out )
- {
- fprintf( stderr, "FATAL... Unable to write.\n\n" ) ;
- exit( 1 ) ;
- }
- }
- }
-
- return ;
- }
- /*
- * LOAD_TABLE
- */
- void
- load_table( fp, table )
- FILE *fp ;
- unsigned char table[256] ;
- {
- unsigned char buf[ 120 ],
- replace, /* replacement value */
- from, to ;
- Bool foul_up ;
- int len, pt, i ;
-
- for( i = 0 ; i < 256 ; i++ )
- table[i] = ( unsigned char ) i ;
- foul_up = FALSE ;
-
- while( fgets( buf, 120, fp ) != NULL )
- {
- len = strlen( buf ) ;
- if( len > 110 )
- foul_up = TRUE ;
- pt = 0 ;
- while( isspace( buf[ pt ] ))
- pt++ ;
-
- /* Get first column */
-
- if( buf[pt] == '\\' )
- {
- from = get_hex( &buf[pt+1], &foul_up );
- pt += 3 ;
- }
- else
- from = buf[ pt++ ] ;
-
- /* Check if a range in first column */
-
- if( buf[pt] == '-' )
- {
- pt++ ;
- if( buf[pt] == '\\' )
- {
- to = get_hex( &buf[pt+1], &foul_up );
- pt += 3 ;
- }
- else
- to = buf[ pt++ ] ;
- }
- else
- to = from ;
-
- /* Get the second column = replacement character */
-
- while( isspace( buf[ pt ] ))
- pt++ ;
- if( buf[pt] == '\\' )
- {
- replace = get_hex( &buf[pt+1], &foul_up );
- pt += 3 ;
- }
- else
- replace = buf[ pt++ ] ;
-
- for( i = from ; i < to + 1 ; i++ )
- table[ i ] = replace ;
-
- if( foul_up )
- {
- fprintf( stderr, "Bad line in table...\n\t%s\n", buf ) ;
- Usage_() ;
- }
- }
-
- return ;
- }
- /*
- * GET_HEX - Get the value of two hex digits
- */
- unsigned char
- get_hex( buf, foul_up )
- unsigned char buf[] ;
- Bool *foul_up ;
- {
- int i ;
- unsigned char nib[2],
- c ;
-
- for( i = 0 ; i < 2 ; i++ )
- {
- nib[ i ] = -1;
- c = buf[ i ] ;
- if( c >= 0x30 && c <= 0x39 ) /* 0...9*/
- nib[ i ] = c - 0x30;
- else if( c > 0x40 && c < 0x47 ) /* A...F*/
- nib[ i ] = 9 + c - 0x40;
- else if( c > 0x60 && c < 0x67 ) /* a...f*/
- nib[ i ] = 9 + c - 0x60;
-
- if( nib[ i ] == -1 )
- *foul_up = TRUE ;
- }
-
- c = ( nib[ 0 ] << 4 ) | nib[ 1 ] ;
-
- return( c ) ;
- }