home *** CD-ROM | disk | FTP | other *** search
- /* main.c 1989 december 28 [gh]
- +-----------------------------------------------------------------------------
- | Abstract:
- | General purpose filter and file cleaning program. 9 out 10 hackers
- | prefer pep to any other soap. It is named after an excellent Norwegian
- | detergent.
- |
- | Authorship:
- | Copyright (c) 1988, 1989 Gisle Hannemyr.
- | Permission is granted to hack, make and distribute copies of this program
- | as long as this notice and the copyright notices are not removed.
- | If you intend to distribute changed versions of this program, please make
- | an entry in the "history" log (below) and mark the hacked lines with your
- | initials. I maintain the program, and shall appreiciate copies of bug
- | fixes and new versions.
- | Flames, bug reports, comments and improvements to:
- | snail: Gisle Hannemyr, Brageveien 3A, 0452 Oslo, Norway
- | email: EAN: gisle@nr.uninett
- | Inet: gisle@ifi.uio.no
- | UUCP: ...!mcvax!ifi!gisle
- | (and several BBS mailboxes in the Oslo area).
- |
- | Acknowledgments:
- | SYS V.2 rename courtesy of Robert Andersson (ra@isncr.is.se)
- | VMS rename courtesy of Bjorn Larsen.
- | Thanks to Inge Arnesen for finding & fixing a bug, (and to Nils-Eivind
- | Naas for bringing it to my attention).
- |
- | History:
- | 2.1 29 dec 89 [gh] Fixed pipe bug, added -v option, misc. speedups
- | 2.0 22 jan 89 [gh] Made it a filter, environment lookup, fixed pathbug.
- | 1.6 7 nov 88 [gh] Added ANSI interpretation.
- | 1.5 6 aug 88 [gh] Hacked it to not clobber original date.
- | 1.4 7 jul 88 [gh] Added general purpose conversion table.
- | 1.3 13 nov 87 [gh] Fixed find first so it works on true blue too.
- | 1.2 11 nov 87 [gh] Compensated for Turbo-C bug (isspace > 128 is bogus)
- | 1.1 31 aug 87 [gh] Added VMS.
- | 1.0 30 aug 87 [gh] Wrote it.
- |
- | Portability:
- | So far, PEP has been tested under CP/M, MS-DOS, BSD and VMS.
- | The implementation dependencies are:
- | * How the compiler identify itself and the operating system.
- | * How microcomputer compilers simulate the UNIX text line terminator.
- | * How operating systems folds command lines (i.e. CP/M folds).
- | * How operating systems expands command line wildcards (ie. UNIX does).
- |
- | This dependencies are implemented using IFDEFs. You should make sure that
- | exactly one of the following symbols (macro names) are defined:
- | * __CPM86__ -- For CP/M, MP/M, C-DOS and derivatives
- | * __MSDOS__ -- For MS-DOS and derivatives
- | * __UNIX__ -- For BSD UNIX (SYS V.3 and generic too?)
- | * __VMS__ -- For VMS
- |
- | In addition, the following symbols may be twiddled if desired:
- | * STRICMP -- Define this if linker complains about missing "stricmp"
- | * SYSV2 -- For SYS V.2 UNIX (if no "rename" in standard lib)
- | * __TURBOC__ -- For Borlands TURBOC (undefine it if you want PEP to
- | change the date on the files it filters).
- | * VMSV1 -- For VAX C V.1.x VMS (if no "rename" in standard lib)
- |
- | Most compilers already predefines a macro that identify the target oper-
- | ating system. Unfortunately, different vendors uses slightly different
- | symbols. Please add to the section headlined "canonize predefined
- | macroes" (below) if your compiler requires it.
- |
- | Note: PEP makes some assumptions about standard headers. See the file
- | "header.txt" for details.
- |
- | Btw. -- these are the compilers I have used to make pep:
- | * MS-DOS, Microsoft C ver. 5.1
- | Turbo C ver. 2.0
- | * BSD UNIX, SunOS Release 4.0
- | * SVID UNIX, NCR Tower System V.2
- | * VMS, VAX C release 2.4
- |
- | Environment:
- | PEP -- should point to directory with conversion tables.
- |
- | Bugs:
- | * I do not recompile PEP on all operating systems at each release.
- | Software rot may cause a particular version to need some tweaking.
- | Please mail me if you experience this -- but please read the enclosed
- | file "header.txt" first.
- +---------------------------------------------------------------------------*/
-
-
- /*---( Includes )-----------------------------------------------------------*/
-
- #define MAIN
-
- #include <stdio.h>
- #include "pep.h"
- #include <string.h>
- #include <ctype.h>
- #ifdef __VMS__
- #include <types.h>
- #include <stat.h>
- #else
- #include <sys/types.h>
- #include <sys/stat.h>
- #endif
- #if __CPM86__ || __MSDOS__
- #include <dos.h>
- #include "bdmg.h"
- #endif
- #ifdef __MSDOS__
- #include <fcntl.h>
- #include <io.h>
- #endif
- #ifdef __UNIX__
- #include <malloc.h>
- #else
- #include <stdlib.h>
- #endif
- #ifdef VMSV1
- #include <descrip.h>
- #endif
-
-
- /*---( defines )------------------------------------------------------------*/
-
- #define VERSION "2.1" /* Version number. */
- #define TEMPFIL "PEP.TMP" /* Tempfile. */
-
-
- /*---( constants )----------------------------------------------------------*/
-
- static char about1[] = "\n\
- Pep is a program that converts almost anything into plain text files.\n\
- Permission is granted to make and distribute copies of this program as\n\
- long as the copyright and this notice appears; and that all the files\n\
- in the distribution is included. ( The files in the distribution is\n\
- documented in the release note file \"aaread.me\". )\n\n";
-
- static char about2[] = "\
- Bug reports, improvements, comments, suggestions and flames to:\n\
- snail: Gisle Hannemyr, Brageveien 3A, 0452 Oslo, Norway\n\
- email: EAN: gisle@nr.uninett;\n\
- Inet: gisle@ifi.uio.no;\n\
- UUCP: ...!mcvax!ifi!gisle\n\
- (and several BBS mailboxes).\n";
-
- static char usage1[] = " Usage: pep [options] [filename ...]\n\
- Valid options:\n\
- \t-a -- about pep\n\
- \t-b -- remove non ASCII-codes\n\
- \t-c[size] -- compress spaces to tabs\n\
- \t-d+/- -- convert to/from DEC 8 bit charset\n\
- \t-e[guard] -- interprete ANSI escape sequences\n\
- \t-g<file> -- get conversion table from file\n\
- \t-h -- print this quick summary\n\
- \t-i+/- -- convert to/from IBM-PC charset\n\
- \t-k+/- -- convert to/from \"Kman\" charset\n\
- \t-m+/- -- convert to/from Macintosh charset\n";
-
- char usage2[] = "\
- \t-o[b] -- write output to named files\n\
- \t-p -- display transformations and pause\n\
- \t-s[size] -- extract strings\n\
- \t-t[size] -- expand tabstops\n\
- \t-u<term> -- use special line terminator\n\
- \t-v -- terminate only paragraphs\n\
- \t-w+/- -- convert to/from WS document mode\n\
- \t-x -- expand non printing chars\n\
- \t-z -- zero parity bit\n";
-
-
- /*---( variables )----------------------------------------------------------*/
-
- static int cright = TRUE; /* Flag copyright undisplayed */
- static int guardl = 0; /* ANSI overwrite guard level */
-
- long LCount = 0L; /* Global line count */
- int LineXx = 0; /* Horisontal position on line. */
- int ITabSz = 8; /* Input tabulator size. */
- int OTabSz = 8; /* Output tabulator size. */
- int StrSiz = 4; /* String size for strings. */
- #if __UNIX__ || __VMS__
- int EndOLn = '\n'; /* Under UNIX, default is LF; */
- #else
- int EndOLn = -1; /* else, the default is CRLF. */
- #endif
-
- static int backup = FALSE; /* Keep backup copy. */
- int bflagb = FALSE; /* Binary wash. */
- int cflagc = FALSE; /* Compress */
- int dflagd = FALSE; /* DEC character set. */
- static int eflage = FALSE; /* ANSI escape sequences. */
- int gflagg = FALSE; /* General fold table */
- int iflagi = FALSE; /* IBM character set. */
- int kflagk = FALSE; /* Kman character set. */
- int mflagm = FALSE; /* MAC character set. */
- #ifdef __VMS__
- static int oflago = TRUE; /* VMS has no pipes. */
- #else
- static int oflago = FALSE; /* Write output on files. */
- #endif
- static int pflagp = FALSE; /* Pause. */
- int sflags = FALSE; /* String extraction. */
- int tflagt = FALSE; /* Tab expansion */
- static int uflagu = FALSE; /* Use special line terminator. */
- int vflagv = FALSE; /* Terminate only paragraphs. */
- int wflag0 = FALSE; /* From WS doc. mode to 7-bit. */
- int wflag1 = FALSE; /* From 7-bit to WS doc. mode. */
- int xflagx = FALSE;
- int zflagz = FALSE;
-
-
- /*---( housekeeping )-------------------------------------------------------*/
-
-
- /*
- | Abs: Write control or meta character ii on stderr using standard conventions.
- */
- static void fputctl(ii)
- int ii;
- {
- if ((ii < 0) || (ii > 255)) fputs("<BOGUS>",stderr);
- else if (ii == 255) fputs("M-del",stderr);
- else if (ii == 127) fputs("DEL",stderr);
- else {
- if (ii >= 128) { fputc('M',stderr); fputc('-',stderr); ii -= 128; }
- if (ii < ' ') { fputc('^',stderr); ii += '@'; }
- fputc(ii,stderr);
- } /* else */
- } /* fputctl */
-
-
- /*
- | Abs: Display copyright notice.
- | Sef: Sets the cright flag FALSE so that we only displays it once.
- */
- void showcright()
- {
- fprintf(stderr,"pep ver. %s; Copyright (c) 1989 Gisle Hannemyr\n",VERSION);
- cright = FALSE;
- } /* showcright */
-
-
- /*
- | Abs: Display message and abort.
- */
- void mess(err)
- int err;
- {
- if (cright) showcright();
- fputs("pep: ",stderr);
- switch(err) {
- case 1: fputs("incompatible options", stderr); break;
- case 2: fputs("missing '+' or '-'", stderr); break;
- case 3: fputs("bad guard digit", stderr); break;
- case 4: fputs("invalid line terminator", stderr); break;
- case 5: fputs("no more room", stderr); break;
- case 6: fputs("bad conversion table", stderr); break;
- case 7: fputs("no matching files", stderr); break;
- case 8: fputs("cannot pause reading stdin", stderr); break;
- case 9: fputs("sorry, not yet implemented", stderr); break;
- default: fputs("unknown error", stderr); break;
- } /* switch */
- putc('\n',stderr);
- exit(ERROR_EXIT);
- } /* mess */
-
-
- void showprogress()
- {
- LCount++;
- if (oflago && ((LCount % 64) == 0)) fprintf(stderr,"\r%ld ",LCount);
- } /* showprogress */
-
-
- /*
- | Abs: Parse desired line terminator.
- | Des: r = CR
- | n = NL
- | s = RS
- | # = CRLF
- | - = none
- | <number> = use this as the terminator
- | Sef: IFrst, ILast, ILimit.
- */
- void getterm(ss)
- char *ss;
- {
- if (*ss == 'n') EndOLn = '\n';
- else if (*ss == 'r') EndOLn = '\r';
- else if (*ss == 's') EndOLn = 30;
- else if (*ss == '#') EndOLn = -1;
- else if (*ss == '-') EndOLn = -2;
- else if (isdigit(*ss)) EndOLn = atoi(ss);
- else mess(4);
- } /* getterm */
-
-
- /*
- | Abs: Check toggle.
- | Ret: TRUE if toggle is on, else off.
- */
- BOOL swchk(dd)
- char dd;
- {
- if ((dd != '+') && (dd != '-')) mess(2);
- return(dd == '+');
- } /* swchk */
-
-
- /*
- | Abs: Check fold direction and set up direction flags.
- | Des: IFrst set to 1 if folding to 8 bit character set.
- | Sef: IFrst, ILast, ILimit.
- */
- void folddir(dd)
- char dd;
- {
- if (swchk(dd)) { IFrst = 1; ILast = 0; ILimit = 91; }
- else { IFrst = 0; ILast = 1; ILimit = 128; }
- } /* folddir */
-
-
- /*
- | Abs: Show transformations.
- | Imp: Moved from main() coz MS-C don't like big main functions.
- */
- void showoptions(tabledir,cname)
- char *tabledir, *cname;
- {
- int cc;
-
- fputs("Transformations:\n",stderr);
- if (!vflagv) fputs(" * stripping all trailing spaces;\n",stderr);
-
- if (gflagg) {
- fprintf(stderr," * translating using table in file \"%s\";\n",cname);
- readtable(tabledir,cname,TRUE);
- } /* if gflagg */
-
- #ifdef __TURBOC__
- fputs(" * preserving file dates;\n",stderr);
- #endif
- if (oflago) fputs(" * output file is input file (not stdout);\n",stderr);
- if (backup) fputs(" * creating .BAK copies of input file;\n",stderr);
- if (bflagb) fprintf(stderr," * %sing non ASCII-codes;\n", xflagx ? "expand" : "remov");
- if (dflagd || iflagi || mflagm) {
- char *machine;
- if (dflagd) machine = "DEC";
- else if (iflagi) machine = "IBM";
- else if (mflagm) machine = "MAC";
- if (IFrst) fprintf(stderr," * Norwegian 7-bit ==> 8-bit %s charset", machine);
- else fprintf(stderr," * 8-bit %s charset ==> norwegian 7-bit", machine);
- if (kflagk) fputs(",\n using \\\\ for Norwegian \"OE\";\n",stderr);
- else fputs(";\n",stderr);
- } /* if (dflagd || iflagi || mflagm) */
- if (sflags) fprintf(stderr," * extracting strings >= %d characters;\n",StrSiz);
- if (cflagc && tflagt) {
- fputs(" * optimizing out spaces from tabulation;\n",stderr);
- if (ITabSz != OTabSz) fprintf(stderr," * repacking tabs, size: %d --> %d;\n",ITabSz,OTabSz);
- } else {
- if (tflagt) fprintf(stderr," * expanding tabs, size: %d;\n",ITabSz);
- if (cflagc) fprintf(stderr," * inserting tabs, size: %d;\n",OTabSz);
- }
- if (eflage) fputs(" * interpreting ANSI escape sequences;\n",stderr);
-
- if (wflag0) fputs(" * WS doc. mode ==> 7-bit text files;\n",stderr);
- if (wflag1) fputs(" * 7-bit text files ==> WS doc. mode;\n",stderr);
- else {
- if (EndOLn == -2 ) fputs(" * removing line terminators",stderr);
- else {
- if (vflagv) fputs(" * terminating paragraphs only with ",stderr);
- else fputs(" * terminating lines with ",stderr);
- if (EndOLn == -1 ) fputs("CRLF",stderr);
- else if (EndOLn == '\n') fputs("LF", stderr);
- else if (EndOLn == '\r') fputs("CR", stderr);
- else if (EndOLn == 30 ) fputs("RS", stderr);
- else fputctl(EndOLn);
- } /* if EndOLn != -2 */
- } /* if else not WS */
- #ifdef __VMS__
- fputs(" (Stream_LF);\n",stderr);
- #else
- fputs(";\n",stderr);
- #endif
-
- if (zflagz) fputs(" * zeroing parity bit;\n",stderr);
- fprintf(stderr," * %sing control characters.\n", xflagx ? "expand" : "remov");
- if (pflagp) {
- fputs("\nHit CTRL-C to abort, RETURN continue. ",stderr);
- cc = getc(stdin);
- if (cc == 3) exit(ERROR_EXIT); /* Not necessary for cooked microes. */
- } /* if */
- } /* showoptions */
-
-
- /*
- | Abs: Do a complete file.
- | Sef: Zero line cont.
- */
- void dofile(eflage)
- int eflage;
- {
- #ifndef __VMS__
- /* Speed things up using a bigger I/O buffer. */
- if (setvbuf(Fdi,NULL,_IOFBF,16384) || setvbuf(Fdo,NULL,_IOFBF,16384))
- mess(5); /* No more room */
- #endif
- LCount = 0L;
- if (eflage) doansi(guardl); else doplain();
- } /* dofile */
-
-
- /*---( main )---------------------------------------------------------------*/
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- struct stat statbuf;
- unsigned int statype;
- unsigned int statmod;
- char *tabledir;
- char *cname, *nname, *ss;
- int cc;
- char dd;
- #if __CPM86__ || __MSDOS__
- char *cp;
- struct DIRLIST *first, *last;
- #endif
- #ifdef __TURBOC__
- struct ftime *filtim;
- int hh;
- #endif
-
- #ifdef __MSDOS__
- _fmode = O_BINARY; /* Tell MS-C, Turbo not to do CRLF expansion. */
- tabledir = argv[0]; /* Look for tables in startup-directory. */
- #else
- tabledir = NULL; /* No such startup convention for other OS's. */
- #endif
-
- inittable();
-
- argc--; argv++; /* skip program name */
- while (argc && (**argv == '-')) {
- (*argv)++; /* skip initial '-' */
- cc = **argv; /* option letter */
- #ifdef __CPM86__
- cc = tolower(cc);
- #endif
- (*argv)++; /* skip option letter */
- dd = **argv; /* arg argument */
- switch (cc) {
- case 'a': showcright(); fputs(about1, stderr); fputs(about2, stderr);
- exit(NORML_EXIT);
- case 'b': bflagb++; break;
- case 'c': cflagc++; if (**argv != '\0') OTabSz = atoi(*argv); break;
- case 'd': dflagd++; folddir(dd); break;
- case 'e': eflage++; if (**argv != '\0') guardl = atoi(*argv);
- tflagt++;
- break;
- case 'g': gflagg++; cname = *argv; break;
- /* 'h': OK to fall thru' to default */
- case 'i': iflagi++; folddir(dd); break;
- case 'k': kflagk++; folddir(dd); break;
- case 'm': mflagm++; folddir(dd); break;
- case 'o': oflago++; if (dd == 'b') backup++; break;
- case 'p': pflagp++; break;
- case 's': sflags++; if (**argv != '\0') StrSiz = atoi(*argv); break;
- case 't': tflagt++; if (**argv != '\0') ITabSz = atoi(*argv); break;
- case 'u': uflagu++; if (**argv != '\0') getterm(*argv); else mess(4); break;
- case 'v': vflagv++; break;
- case 'w': if (swchk(dd)) tflagt = wflag1 = TRUE;
- else zflagz = wflag0 = TRUE;
- break;
- case 'x': xflagx++; break;
- case 'z': zflagz++; break;
- default : showcright();
- fputs(usage1,stderr); fputs(usage2,stderr);
- exit(NORML_EXIT);
- } /* switch */
- argc--; argv++;
- } /* while options */
-
- if (pflagp && !argc) mess(8);
- if ((dflagd + gflagg + iflagi + mflagm + zflagz) > 1) mess(1);
- if ((guardl < 0) || (guardl > 2)) mess(3);
- if (kflagk && !(dflagd + mflagm)) iflagi++;
- if (!uflagu && dflagd && IFrst) EndOLn = '\n'; /* To DEC (Dec uses LF as terminator) */
- if (!uflagu && iflagi && IFrst) EndOLn = -1; /* To IBM (Uses CRLF as terminator) */
- if (!uflagu && mflagm && IFrst) EndOLn = '\r'; /* To Mac (Mac uses CR as terminator) */
-
- Fdi = stdin; /* Default */
- Fdo = stdout;
- #ifdef __MSDOS__
- setmode(fileno(Fdi),_fmode); /* Make sure that even braindamaged MS-DOS */
- setmode(fileno(Fdo),_fmode); /* are transparent when redirecting i/o. */
- #endif
- if (!argc) { /* Doing standard input */
- if (pflagp) showoptions(tabledir,cname);
- else if (gflagg) readtable(tabledir,cname,FALSE);
- if (oflago) mess(7);
- dofile(eflage);
- } else {
- showcright();
- showoptions(tabledir,cname);
- #if __CPM86__ || __MSDOS__
- first = expwildcard(argv);
- if (!first) mess(7);
- /* Inv: first now points to a start of linked list of files. */
- last = first;
- while (last) {
- cname = last->fnam;
- if (oflago) {
- putc('\r',stderr);
- putc('\n',stderr);
- #else
- while (argc) {
- cname = *argv;
- if (oflago) {
- putc('\n',stderr);
- #endif
- fputs(cname,stderr);
- } /* if oflago */
-
- #ifdef __UNIX__
- if (lstat(cname,&statbuf)) {
- #else
- if (stat(cname,&statbuf)) {
- #endif
- if (!oflago) fputs(cname,stderr);
- fputs(": can't access\n", stderr);
- goto cont;
- } /* if not stat */
- statmod = statbuf.st_mode;
- statype = statbuf.st_mode & S_IFMT;
-
- if (statype != S_IFREG) {
- if (!oflago) fputs(cname,stderr);
- if (statype == S_IFDIR) fputs(": directory\n", stderr);
- #ifdef __UNIX__
- else if (statype == S_IFLNK) fputs(": symbolic link\n", stderr);
- #endif
- else fputs(": special file\n", stderr);
- goto cont;
- } /* if not a regular file */
-
- if ((Fdi = fopen(cname,"r")) == NULL) {
- if (!oflago) fputs(cname,stderr);
- fputs(": can't open\n", stderr);
- goto cont;
- }
- if (oflago) if ((Fdo = fopen(TEMPFIL,"w")) == NULL) {
- fputs(": can't create tmpfile\n",stderr); exit(ERROR_EXIT);
- }
- #ifdef __TURBOC__
- if (!(filtim = (struct ftime *)malloc(sizeof(struct ftime))))
- mess(5); /* No more room */
- getftime(fileno(Fdi),filtim);
- #endif
- if (oflago) putc('\n',stderr);
- dofile(eflage);
- if (oflago) fprintf(stderr,"\r Done. %ld lines written.",LCount);
-
- if (ferror(Fdi) || ferror(Fdo)) {
- fputs("\npep: I/O error, file unchanged\n", stderr);
- } else {
- fclose(Fdi);
- if (oflago) {
- fclose(Fdo);
- if (chmod(TEMPFIL,statmod)) fputs("\npep: could not set mode\n",stderr);
- #ifdef __TURBOC__
- if ((hh = _open(TEMPFIL,O_RDONLY)) != -1) setftime(hh,filtim);
- _close(hh);
- #endif
- } /* if (oflago) */
-
- cc = 0; /* error flag */
- if (backup) {
- if (!(nname = (char *)malloc(strlen(cname)+5))) mess(5);
- strcpy(nname,cname);
- if (ss = strrchr(nname,'.')) *ss = '\0';
- strcat(nname,".BAK");
- unlink(nname);
- #ifdef __UNIX__
- if (strcmp(nname,cname)) { /* UNIX names are case sensitive */
- #else
- if (stricmp(nname,cname)) { /* other filenames are not. */
- #endif
- if (rename(cname,nname)) cc++;
- } /* if same names */
- } else {
- if (oflago) if (unlink(cname)) cc++;
- } /* if .. else no backup */
- if (oflago) if (rename(TEMPFIL,cname)) cc++;
- if (cc) {
- fprintf(stderr, "\npep: error creating %s, file is %s\n",cname,TEMPFIL);
- exit(ERROR_EXIT);
- } /* if error */
- } /* if .. else close file */
- cont:
- #if __CPM86__ || __MSDOS__
- last = last->next;
- } /* while */
- dispwildcard(first);
- #else
- argc--; argv++;
- } /* while */
- if (oflago) putc('\n',stderr);
- #endif
- } /* if .. else read named files */
- exit(NORML_EXIT);
- } /* main */
-
- /* EOF */
-