home *** CD-ROM | disk | FTP | other *** search
- From mipos3!intelca!oliveb!ames!husc6!necntc!ncoast!allbery Sat Jan 23 19:37:30 PST 1988
- Article 258 of comp.sources.misc:
- Path: td2cad!mipos3!intelca!oliveb!ames!husc6!necntc!ncoast!allbery
- From: aeb@cwi.nl (Andries Brouwer)
- Newsgroups: comp.sources.misc
- Subject: v02i008: subst - substitute strings for strings
- Message-ID: <7102@ncoast.UUCP>
- Date: 20 Jan 88 01:03:39 GMT
- Sender: allbery@ncoast.UUCP
- Organization: CWI, Amsterdam
- Lines: 370
- Approved: allbery@ncoast.UUCP
- X-Archive: comp.sources.misc/8801/9
- Comp.Sources.Misc: Volume 2, Issue 9
- Submitted-By: Andries Brouwer <aeb@cwi.nl>
- Archive-Name: subst
-
- Comp.sources.misc: Volume 2, Issue 9
- Submitted-By: Andries Brouwer <aeb@cwi.nl>
- Archive-Name: subst
-
- [The next few postings are from rs's slush pile. At least we know he's still
- alive. ;-) Also -- you will have noticed that there are occasional irregular
- headers -- I aadding the compatible headers manually while trying to finish
- the generalized posting program, I occasionally mess up. Sorry! ++bsa]
-
- This program does unlimited string substitution.
- I needed it because of the limits built into sed
- (this program has no limits on string sizes or
- number of strings), and the awkwardness of worrying
- about the special characters of sed.
- Maybe it is useful to others as well.
-
- The program header documents it.
-
- ---------------------------------------------------
- /* subst: substitute fixed strings for other fixed strings - aeb@cwi.nl */
- /* written 11 Nov. 1987 - placed in public domain - do not delete header */
- /*
- * Call: subst [-acs] sfile [ifile]
- * Here sfile is the file with descriptions of the substitutions
- * to be performed and ifile is the input file.
- * If ifile is not given, then stdin is read.
- * If the option -c is given, then the following argument is itself the
- * substitution description.
- * The description has the format:
- * <old><tab><new>
- * and different description lines are separated by newlines.
- * In cases where <old> might contain tabs or <new> might contain
- * newlines, one can use subst -s .
- * Now description lines have the format
- * <sep><old><sep><tab><sep2><old><sep2>
- * and again different description lines are separated by newlines.
- * (Here <sep> and <sep2> denote arbitrary single characters.)
- * By default, only the places where <old> occurs as "keyword",
- * i.e., not preceded or followed by a letter or digit, are substituted,
- * but the option -a causes substitution for all occurrences.
- */
-
- /*
- Why not use sed or /lib/cpp or m4 or ... ? Well, m4 and /lib/cpp
- react to special characters in the file, but I want to leave the
- file as it is, except for these substitutions. What about sed?
- This is not so bad, but requires some preprocessing of sfile
- in case the strings may contain . or & etc. When both sfile and
- ifile are computer generated, this is a hassle, and the present
- solution is much cleaner. Moreover, subst has no built-in limits.
- Note: this program works reasonably well when the number of
- substitution strings is not too large. It uses a linear list, and
- this becomes very slow when there are thousands of substitution strings.
- */
-
- #include <stdio.h>
- extern char *malloc(), *realloc(), *strcpy(), *grow(), *alloc(), *input();
-
- int opta, optc, opts;
- FILE *inf, *sf;
- char *sin, *iname, *sname;
- int eoi;
- struct repl {
- char *in;
- char *out;
- struct repl *next;
- } *replhead, *repltail;
-
- char speedup[256]; /* let us hope that characters have 8 bits ... */
-
- usage(){
- fprintf(stderr, "subst: Usage is subst [-cs] sfile [ifile]\n");
- exit(2);
- }
-
- main(argc,argv) int argc; char **argv; {
- while(argc > 1 && argv[1][0] == '-') {
- do {
- switch(argv[1][1]) {
- case 's':
- opts++;
- break;
- case 'c':
- optc++;
- break;
- case 'a':
- opta++;
- break;
- default:
- usage();
- exit(2);
- }
- argv[1]++;
- } while(argv[1][1]);
- argv++;
- argc--;
- }
- if(argc > 3 || argc < 2)
- usage();
- if(argc == 3) {
- iname = argv[2];
- inf = fopen(iname,"r");
- if(inf == NULL) {
- perror(iname);
- exit(1);
- }
- } else {
- inf = stdin;
- iname = "<stdin>";
- }
- if(!optc) {
- sname = argv[1];
- sf = fopen(sname,"r");
- if(sf == NULL) {
- perror(sname);
- exit(1);
- }
- } else {
- sin = argv[1];
- sname = "<argin>";
- }
- getsfile();
- do_it();
- return(0);
- }
-
- unsigned maxilth, maxolth;
-
- getsfile(){
-
- #define LSIZ 4
-
- #define put_in_buf(c) {\
- if(bufp >= buf + bsz) {\
- buf = grow(buf, bsz + LSIZ);\
- bufp = buf + bsz;\
- bsz += LSIZ;\
- }\
- *bufp++ = c;\
- }
-
- #define put_in_repl(inout,max) {\
- register unsigned lth = strlen(buf) + 1;\
- if(lth > max) max = lth;\
- replp->inout = alloc(lth);\
- (void) strcpy(replp->inout,buf);\
- }
-
- #define put_in_chain {\
- replp->next = NULL;\
- if(replhead == NULL)\
- replhead = replp;\
- else\
- repltail->next = replp;\
- repltail = replp;\
- replp = (struct repl *) alloc(sizeof(struct repl));\
- }
-
- char line[LSIZ], *buf;
- register char *lp, *bufp;
- register int state = 0, bsz = 0, eos = 0;
- register struct repl *replp;
- char sep;
-
- if(optc)
- lp = sin;
- else {
- lp = line;
- line[0] = 0;
- }
-
- replp = (struct repl *) alloc(sizeof(struct repl));
-
- buf = alloc(LSIZ);
- bufp = buf;
- bsz = LSIZ;
-
- while(!eos) {
- if(!*lp) {
- if(optc) {
- lp = "\n";
- eos++;
- } else {
- if(fgets(line, sizeof(line), sf) == NULL) {
- if(ferror(sf)) {
- perror(sname);
- exit(1);
- }
- break;
- }
- lp = line;
- if(!*lp) return; /* strange ... */
- }
- }
- switch(state) {
- case 0: /* before in */
- state = 1;
- if(opts) {
- sep = *lp++;
- continue;
- }
- sep = '\t';
- /* fall through */
- case 1: /* reading in */
- if(*lp != sep) {
- put_in_buf(*lp++);
- continue;
- }
- lp++;
- put_in_buf(0);
- put_in_repl(in,maxilth);
- bufp = buf;
- state = (opts ? 2 : 3);
- continue;
- case 2: /* waiting for tab */
- if(*lp++ == '\t') state = 3;
- continue;
- case 3: /* before out */
- state = 4;
- if(opts) {
- sep = *lp++;
- continue;
- }
- sep = '\n';
- /* fall through */
- case 4: /* reading out */
- if(*lp != sep) {
- put_in_buf(*lp++);
- continue;
- }
- lp++;
- put_in_buf(0);
- put_in_repl(out,maxolth);
- bufp = buf;
- put_in_chain;
- state = (opts ? 5 : 0);
- continue;
- case 5: /* waiting for newline */
- if(*lp++ == '\n') state = 0;
- continue;
- }
- }
-
- free((char *) replp);
- free(buf);
- }
-
-
- do_it(){
-
- #define ISIZ 16384
-
- #define is_ok(c) (c < '0' || (c < '@' && c > '9') || (c < 'a' && c > 'Z') || c > 'z')
-
- #define assure_ip if(ip == ibuf1) {\
- register char *tp;\
- if(eoi)\
- goto nxt;\
- output(ibuf, ibufp-ibuf);\
- tp = ibufp;\
- ip = ibufp = ibuf = ibuf0;\
- while(tp < ibuf1)\
- *ip++ = *tp++;\
- ibuf1 = input(ip, ibuf1-ip);\
- if(ip == ibuf1)\
- goto nxt;\
- }
-
- register struct repl *replp;
- register char *ibuf, *ibuf0, *ibuf1, *ibufp, *cp, *ip;
- register unsigned ilth;
- int prevc_is_ok = 1;
-
- /* small speedup: remember first char of all in-strings */
- /* [this changes the semantics slightly: we do no longer
- replace the empty string by something, but that would
- otherwise lead to an infinite loop, so is useless anyway] */
- for(replp = replhead; replp; replp = replp->next)
- speedup[replp->in[0]] = 1;
-
- ilth = 2*maxilth;
- if(ISIZ > ilth)
- ilth = ISIZ;
- ibuf0 = alloc(ilth);
- ibufp = ibuf = ibuf1 = ibuf0 + ilth;
-
- while(1) {
-
- if(ibufp == ibuf1) {
- output(ibuf, ibufp-ibuf);
- if(eoi)
- return;
- ibufp = ibuf = ibuf0;
- ibuf1 = input(ibuf, ibuf1-ibuf);
- if(ibuf == ibuf1)
- return;
- }
-
- if(prevc_is_ok && speedup[*ibufp])
- for(replp = replhead; replp; replp = replp->next) {
- cp = replp->in;
- ip = ibufp;
- while (*cp) {
- assure_ip;
- if(*cp++ != *ip++)
- goto nxt;
- }
- /* found a match! */
- if(!opta) {
- assure_ip;
- if(!is_ok(*ip))
- goto nxt;
- }
- output(ibuf, ibufp-ibuf);
- fputs(replp->out, stdout);
- ibufp = ibuf = ip;
- goto nxt2;
- nxt: ;
- }
- if(!opta)
- prevc_is_ok = is_ok (*ibufp);
- ibufp++;
- nxt2: ;
- }
- }
-
- char *
- input(ibuf,n) char *ibuf; register int n; {
- register int nn = fread(ibuf, sizeof(char), n, inf);
- if(nn < n) {
- if(feof(inf))
- eoi++;
- else {
- perror("subst: input error: ");
- exit(1);
- }
- }
- return(ibuf + nn);
- }
-
- output(obuf,n) char *obuf; register int n; {
- if(n > 0) {
- if(fwrite(obuf, sizeof(char), n, stdout) != n) {
- perror("subst: write error: ");
- exit(1); /* probably file system full? */
- }
- }
- }
-
- char *
- alloc(n) unsigned n; {
- register char *a = malloc(n);
- if(a == NULL) {
- fprintf(stderr, "subst: out of memory\n");
- exit(1);
- }
- return(a);
- }
-
- char *
- grow(a,n) register char *a; register int n; {
- a = realloc(a, (unsigned) n);
- if(a == NULL) {
- fprintf(stderr, "subst: realloc failed\n");
- exit(1);
- }
- return(a);
- }
- --
- Andries Brouwer -- CWI, Amsterdam -- uunet!mcvax!aeb -- aeb@cwi.nl
-
-
-