home *** CD-ROM | disk | FTP | other *** search
- /*
- * Usage - wp_gener < ASCII_file > generic_file
- *
- * WP_GENER Prepares an ASCII file so that it may be used as input for
- * almost any word processing program. This program replaces
- * a hard return with a blank between lines of continuous text.
- *
- * input: An ASCII text file with lines of 80 bytes or less.
- *
- * output: Same file with selected line ends replaced by blanks.
- *
- * writeup: MIR Tutorial TWO, topic ..
- *
- * Written: Douglas Lowry Apr 24 92
- * Copyright (C) 1992 Marpex Inc.
- *
- * The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
- * usage and co-ordination of the MIR family of programs to analyze,
- * prepare and index databases (small through gigabyte size), and
- * how to build integrated retrieval software around the MIR search
- * engine. The fifth of the five MIR tutorial series explains how
- * to extend indexing capability into leading edge search-related
- * technologies. For more information, GO IBMPRO on CompuServe;
- * MIR files are in the DBMS library. The same files are on the
- * Canada Remote Systems BBS. A diskette copy of the Introduction
- * is available by mail ($10 US... check, Visa or Mastercard);
- * diskettes with Introduction, Tutorial ONE software and the
- * shareware Tutorial ONE text cost $29. Shareware registration
- * for a tutorial is also $29.
- *
- * E-mail...
- * Compuserve 71431,1337
- * Internet doug.lowry%canrem.com
- * UUCP canrem!doug.lowry
- * Others: doug.lowry@canrem.uucp
- *
- * FAX... 416 963-5677
- *
- * "Snail mail"... Douglas Lowry, Ph.D.
- * Marpex Inc.
- * 5334 Yonge Street, #1102
- * North York, Ontario
- * Canada M2N 6M2
- *
- * Related database consultation and preparation services are
- * available through:
- * Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
- * North York, Ontario Canada M2J 4Z7
- * Tel. 416 492-3838 FAX 416 492-3843
- *
- * This program is free software; you may redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * (file 05LICENS) along with this program; if not, write to the
- * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
-
- #define repeat for(;;)
-
- /*
- * declarations
- */
-
- typedef enum _bool
- { FALSE = 0, TRUE = 1 } Bool;
-
- void Usage_(), process();
- int classify();
- char *Cmdname_() { return( "wp_gener" ); }
-
- /*
- * MAIN
- */
-
- main( argc, argv )
- int argc;
- char **argv;
- {
- if( argc > 1 )
- Usage_() ;
-
- process( ) ;
-
- exit( 0 );
- }
- /*
- * Usage
- */
- void
- Usage_()
- {
- fprintf( stderr,
- "\nUsage: %s < ASCII_file > generic_file\n\n\
- Prepares an ASCII file so that it may be used as input for\n\
- almost any word processing program. This program replaces\n\
- a hard return with a blank between lines of continuous text.\n\n\
- input: An ASCII text file with lines of 80 bytes or less.\n\n",
- Cmdname_() );
- fprintf( stderr,
- "output: Same file with selected line ends replaced by blanks.\n\n\
- writeup: MIR Tutorial TWO, topic ..\n\n" ) ;
- exit( 1 ) ;
- }
- #define EMPTY 0
- #define FIXED 1
- #define LEADER 2
- #define CONTINUOUS 3
- #define BEGIN_FILE 4
-
- /*
- * PROCESS - Two adjacent lines are continuous if both contain
- * no tabs or sequences of 3 or more blanks. One exception
- * is allowed: A paragraph leader may have leading white
- * space. EMPTY and FIXED lines are fully disqualified. A
- * LEADER is part of continuous text only if followed by a
- * line that is CONTINUOUS.
- */
- void
- process( )
- {
- unsigned char buf[ 90 ],
- c ;
- Bool ctl_z ; /* found an end of file marker */
- int type,prev, /* current and previous lines classified */
- /* as FIXED, LEADER or CONTINUOUS text */
- len, i ;
-
- type = BEGIN_FILE ;
- ctl_z = FALSE ;
-
- while( fgets( buf, 90, stdin ) != NULL )
- {
- if( ctl_z )
- {
- fprintf( stderr, "FATAL... Text continues after a CTL-Z\n" );
- fprintf( stderr, "\tIs the input really a text file?\n\n" ) ;
- exit( 1 ) ;
- }
- len = strlen( buf ) - 1 ;
- repeat
- {
- c = buf[ len - 1 ] ;
- if( c == '\032' )
- {
- ctl_z = TRUE ;
- break ;
- }
- if( c == '\n' || c == '\015' )
- len-- ;
- else
- break ;
- }
- if( ctl_z )
- continue ; /* Try for more input */
-
- if( len > 80 )
- {
- fprintf( stderr,"FATAL... line length exceeds 80 bytes.\n");
- Usage_() ;
- }
- buf[ len ] = '\0' ;
-
- prev = type ;
- type = classify( buf, len ) ;
-
- if( type == CONTINUOUS && ( prev == LEADER || prev == CONTINUOUS ))
- putchar( ' ' ) ;
- else if( prev != BEGIN_FILE )
- putchar( '\n' ) ;
-
- for( i = 0 ; i < len ; i++ )
- {
- if( putchar( buf[i] ) != buf[i] )
- {
- fprintf( stderr, "FATAL... Unable to write.\n\n" ) ;
- exit( 1 ) ;
- }
- }
- }
-
- putchar( '\n' ) ;
- putchar( '\032' );
- return ;
- }
- /*
- * CLASSIFY Classify line type as EMPTY, FIXED, LEADER or CONTINOUS
- */
- int
- classify( buf, len )
- char buf[90];
- int len ;
- {
- int txt_bgn, /* byte at which text starts */
- blank_ct, /* # of blanks in a row */
- pt ;
-
- if( !len )
- return( EMPTY ) ;
-
- for( pt = 0 ; pt < len ; pt++ )
- {
- if( !isspace( buf[pt] ))
- break ;
- }
-
- txt_bgn = pt ;
- blank_ct = 0 ;
- for( ; pt < len ; pt++ )
- {
- if( buf[ pt ] == '\t' )
- return( FIXED ) ;
- if( buf[ pt ] == ' ' )
- blank_ct++ ;
- else if( blank_ct ) /* any other character */
- {
- if( blank_ct > 2 )
- return( FIXED ) ;
- blank_ct = 0 ;
- }
- }
-
- if( blank_ct > 1 ) /* ...at end of line */
- return( FIXED ) ; /* else converting an end to blank */
- /* will create a third blank */
- if( txt_bgn )
- return( LEADER ) ;
-
- return( CONTINUOUS ) ;
- }