home *** CD-ROM | disk | FTP | other *** search
- Subject: v11i065: Hum concordance package update kit
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rs@uunet.UU.NET
-
- Submitted-by: sun!tut (Bill Tuthill)
- Posting-number: Volume 11, Issue 65
- Archive-name: hum.pch
-
- Quite a few people asked me for the troff macros to produce concordances.
- They weren't included with Hum source because "troffmt" automatically
- produces macros. However, this made me think. Now that laser printers
- have virtually replaced line printers, troff is a lot cheaper than it
- used to be. So I modified "troffmt" for laser printers, taking out the
- cut marks. Here is the recommended command line to produce a concordance
- in 10 point type on a laser printer:
-
- % kwic -c120 filename(s) | sort | troffmt | troff
-
- While I was modifying "troffmt" I found a bug in "format". So the shell
- archive below contains the new "troffmt", the fixed "format", and troff
- macros for producing a concordance. The troff macros are helpful if you
- want to modify the look of your concordance. Here's how to produce a
- concordance with your own macros:
-
- % kwic -c120 filename(s) | sort | troffmt -m | troff trmacs -
-
- A few people asked for the "lemma" program, which was promised in the
- documentation. To tell you the truth, I never came up with a good design
- for "lemma". At UC Berkeley, we always used editor scripts to move words
- from one file to another. I think this method is superior to any special
- purpose program I could write.
-
- Bill Tuthill
-
- ------------------------------- cut here -------------------------------
- echo Extracting troffmt.c 1>&2
- sed 's/^X//' > troffmt.c <<\!!!EOF!!!
- X# include <stdio.h> /* troffmt.c (rev3.7) */
- X# include <ctype.h>
- X# include <signal.h>
- X
- Xchar *tempfile; /* to store contexts while counting */
- Xint nocnt = 0; /* toggle for not counting keyword */
- Xint nokwd = 0; /* toggle for suppressing keyword */
- Xint nomacs = 0; /* toggle for not supplying macros */
- X
- Xusage() /* print proper usage and exit */
- X{
- X puts("Usage: troffmt [-ckm] [filename(s)] [-]\t\t(rev3.7)");
- X puts("-c: suppress counting of keyword frequency");
- X puts("-k: entirely suppress printing of keyword");
- X puts("-m: do not supply concordance macros automatically");
- X puts("- : read standard input instead of files");
- X exit(1);
- X}
- X
- Xmain(argc, argv) /* format concordance for sending to troff */
- Xint argc;
- Xchar *argv[];
- X{
- X FILE *fopen(), *fp;
- X int i, j, onintr();
- X char *mktemp();
- X
- X if (signal(SIGINT, SIG_IGN) != SIG_IGN)
- X signal(SIGINT, onintr);
- X
- X tempfile = "/tmp/FmtXXXXX";
- X mktemp(tempfile);
- X
- X for (i = 1; argv[i] && *argv[i] == '-'; i++)
- X {
- X for (j = 1; argv[i][j] != NULL; j++)
- X {
- X if (argv[i][j] == 'c')
- X nocnt = 1;
- X else if (argv[i][j] == 'k')
- X nokwd = 1;
- X else if (argv[i][j] == 'm')
- X nomacs = 1;
- X else /* bad option */
- X {
- X fprintf(stderr,
- X "Illegal troffmt flag: -%c\n", argv[i][j]);
- X usage();
- X }
- X }
- X }
- X if (!nomacs)
- X pr_macs();
- X if (i == argc)
- X {
- X if (nokwd)
- X rmkwds(stdin);
- X else if (nocnt)
- X ffmt(stdin);
- X else
- X format(stdin);
- X }
- X for (; i < argc; i++)
- X {
- X if (argv[i][0] == '-' && argv[i][1] == NULL)
- X {
- X if (nokwd)
- X rmkwds(stdin);
- X else if (nocnt)
- X ffmt(stdin);
- X else
- X format(stdin);
- X }
- X if ((fp = fopen(argv[i], "r")) != NULL)
- X {
- X if (nokwd)
- X rmkwds(fp);
- X else if (nocnt)
- X ffmt(fp);
- X else
- X format(fp);
- X fclose(fp);
- X }
- X else /* attempt to open file failed */
- X {
- X fprintf(stderr,
- X "Trfmt cannot access the file: %s\n", argv[i]);
- X continue;
- X }
- X }
- X unlink(tempfile);
- X exit(0);
- X}
- X
- Xpr_macs() /* supply concordance macros automatically */
- X{
- X if (access("/usr/lib/ms/ms.acc", 4) == 0) /* -ms accent marks */
- X printf(".so /usr/lib/ms/ms.acc\n");
- X else if (access("/usr/lib/me/chars.me", 4) == 0) /* -me accent marks */
- X printf(".so /usr/lib/me/chars.me\n");
- X else fprintf(stderr, "Install -ms or -me macros!\n");
- X printf(".de KW\n.ne 2.1\n.sp 0.3\n.ta 20n\n.ft B\n..\n");
- X printf(".de CX\n.sp 0.1\n.ta 3.6iR 3.75i\n.ft R\n..\n");
- X printf(".de HD\n'sp 2\n.tl ''- %% -''\n'sp 2\n..\n");
- X printf(".de FO\n'bp\n..\n.wh 0 HD\n.wh -5 FO\n");
- X printf(".nf\n.po 0.5i\n.ll 7.0i\n.lt 7.0i\n");
- X}
- X
- Xformat(fp) /* print keyword and count only if different */
- XFILE *fp;
- X{
- X FILE *fopen(), *tf;
- X char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
- X char *sp, *kwp, *cxp, *strcpy();
- X int kwfreq = 0;
- X
- X strcpy(okw,"~~~~~"); /* make sure 1st keyword is printed */
- X tf = NULL; /* to prevent core dump with no input */
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s, kwp = nkw; *sp != ' ' && *sp != '|'; sp++, kwp++)
- X {
- X if (*sp == '\b') /* interpolate troff string */
- X {
- X *kwp = '\\';
- X *++kwp = '*';
- X } else
- X *kwp = *sp;
- X }
- X *kwp = NULL;
- X for (; *sp && *sp != '|'; sp++)
- X ;
- X for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
- X {
- X if (*sp == '|')
- X *cxp = '\t';
- X else if (*sp == '\b') /* interpolate troff string */
- X {
- X *cxp = '\\';
- X *++cxp = '*';
- X } else
- X *cxp = *sp;
- X }
- X *cxp = '\n';
- X *++cxp = NULL;
- X
- X if (strcmp(nkw, okw) != 0) /* kwds different */
- X {
- X if (kwfreq != 0)
- X prtmpfile(tf, kwfreq);
- X printf(".KW\n%s\t", nkw);
- X tf = fopen(tempfile, "w");
- X fputs(cntxt, tf);
- X kwfreq = 1;
- X }
- X else /* if keywords are the same */
- X {
- X fputs(cntxt, tf);
- X kwfreq++;
- X }
- X strcpy(okw, nkw);
- X }
- X prtmpfile(tf, kwfreq);
- X}
- X
- Xprtmpfile(tf, kwfreq) /* print frequency and contexts */
- XFILE *tf;
- Xint kwfreq;
- X{
- X char save[BUFSIZ];
- X
- X if (tf == NULL) /* exit without core dump */
- X exit(1);
- X
- X printf("(%d)\n.CX\n", kwfreq);
- X fclose(tf);
- X tf = fopen(tempfile, "r");
- X while (fgets(save, BUFSIZ, tf))
- X printf(" %s", save);
- X fclose(tf);
- X}
- X
- Xint onintr() /* remove tempfile in case of interrupt */
- X{
- X fprintf(stderr, "\nInterrupt\n");
- X unlink(tempfile);
- X exit(1);
- X}
- X
- Xffmt(fp) /* fast format routine, no keyword counting */
- XFILE *fp;
- X{
- X char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
- X char *sp, *kwp, *cxp;
- X
- X strcpy(okw,"~~~~~"); /* make sure 1st keyword is printed */
- X
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s, kwp = nkw; *sp && *sp != ' '; sp++, kwp++)
- X {
- X if (*sp == '\b') /* interpolate troff string */
- X {
- X *kwp = '\\';
- X *++kwp = '*';
- X } else
- X *kwp = *sp;
- X }
- X *kwp = NULL;
- X for (; *sp && *sp != '|'; sp++)
- X ;
- X for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
- X {
- X if (*sp == '|')
- X *cxp = '\t';
- X else if (*sp == '\b') /* interpolate troff string */
- X {
- X *cxp = '\\';
- X *++cxp = '*';
- X } else
- X *cxp = *sp;
- X }
- X *cxp = '\n';
- X *++cxp = NULL;
- X
- X if (strcmp(nkw, okw) != 0) /* kwds different */
- X printf(".KW\n%s\n.CX\n %s", nkw, cntxt);
- X else /* if keywords are the same */
- X printf(" %s", cntxt);
- X strcpy(okw, nkw);
- X }
- X}
- X
- Xrmkwds(fp) /* completely suppress printing of keyword */
- XFILE *fp;
- X{
- X char s[BUFSIZ], *sp;
- X
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s; *sp && *sp != '|'; sp++)
- X ;
- X for (; *sp; sp++)
- X {
- X if (*sp == '|')
- X putchar(' ');
- X else
- X putchar(*sp);
- X }
- X }
- X}
- !!!EOF!!!
- if [ "`wc -c troffmt.c`" != " 5328 troffmt.c" ]
- then
- echo \07WARNING troffmt.c: extraction error
- fi
- echo Extracting format.c 1>&2
- sed 's/^X//' > format.c <<\!!!EOF!!!
- X# include <stdio.h> /* format.c (rev3.7) */
- X# include <ctype.h>
- X# include <signal.h>
- X
- Xchar *tempfile; /* to store overflow while counting */
- Xint nomap = 0; /* toggle for mapping keyword to lcase */
- Xint nocnt = 0; /* toggle for counting keyword */
- Xint nokwd = 0; /* toggle for suppressing keyword */
- X
- Xusage() /* print proper usage and exit */
- X{
- X puts("Usage: format [-mck] [filename(s)]\t\t(rev3.7)");
- X puts("-m: keywords not mapped from lower to upper case");
- X puts("-c: suppress counting of keyword frequency");
- X puts("-k: entirely suppress printing of keyword");
- X exit(1);
- X}
- X
- Xmain(argc, argv) /* make keyword headings with count */
- Xint argc;
- Xchar *argv[];
- X{
- X FILE *fopen(), *fp;
- X int i, j, onintr();
- X char *mktemp();
- X
- X if (signal(SIGINT, SIG_IGN) != SIG_IGN)
- X signal(SIGINT, onintr);
- X
- X tempfile = "/tmp/FmtXXXXX";
- X mktemp(tempfile);
- X
- X for (i = 1; argv[i] && *argv[i] == '-'; i++)
- X {
- X for (j = 1; argv[i][j] != NULL; j++)
- X {
- X if (argv[i][j] == 'm')
- X nomap = 1;
- X else if (argv[i][j] == 'c')
- X nocnt = 1;
- X else if (argv[i][j] == 'k')
- X nokwd = 1;
- X else /* bad option */
- X {
- X fprintf(stderr,
- X "Illegal format flag: -%c\n", argv[i][j]);
- X usage();
- X }
- X }
- X }
- X if (i == argc)
- X {
- X if (nokwd)
- X rmkwds(stdin);
- X else if (nocnt)
- X ffmt(stdin);
- X else
- X format(stdin);
- X }
- X for (; i < argc; i++)
- X {
- X if ((fp = fopen(argv[i], "r")) != NULL)
- X {
- X if (nokwd)
- X rmkwds(fp);
- X else if (nocnt)
- X ffmt(fp);
- X else
- X format(fp);
- X fclose(fp);
- X }
- X else /* attempt to open file failed */
- X {
- X fprintf(stderr,
- X "Format cannot access the file: %s\n", argv[i]);
- X continue;
- X }
- X }
- X unlink(tempfile);
- X exit(0);
- X}
- X
- Xchar buff[BUFSIZ*8]; /* tempfile buffer for storing contexts */
- Xint bufflen; /* total length of contexts in buffer */
- Xint fulltf = 0; /* does the tempfile contain something? */
- XFILE *tf = NULL; /* file pointer for tempfile routines */
- X
- Xformat(fp) /* print keyword and count only if different */
- XFILE *fp;
- X{
- X char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
- X char *sp, *kwp, *cxp, *strcpy();
- X int kwfreq = 0;
- X
- X strcpy(okw,"~~~~~"); /* make sure 1st keyword is printed */
- X
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s, kwp = nkw; *sp && *sp != '|'; sp++, kwp++)
- X {
- X if (!nomap && islower(*sp))
- X *kwp = toupper(*sp);
- X else
- X *kwp = *sp;
- X }
- X *kwp = NULL;
- X
- X for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
- X {
- X if (*sp == '|') {
- X *cxp = ' '; *++cxp = ' '; *++cxp = ' ';
- X } else
- X *cxp = *sp;
- X }
- X *cxp = '\n';
- X *++cxp = NULL;
- X
- X if (strcmp(nkw, okw) != 0) /* kwds different */
- X {
- X if (kwfreq != 0)
- X {
- X getbuff(kwfreq);
- X putchar('\n');
- X }
- X *buff = NULL;
- X bufflen = 0;
- X fputs(nkw, stdout);
- X putbuff(cntxt);
- X kwfreq = 1;
- X }
- X else /* if keywords are the same */
- X {
- X putbuff(cntxt);
- X kwfreq++;
- X }
- X strcpy(okw, nkw);
- X }
- X getbuff(kwfreq);
- X}
- X
- Xputbuff(cntxt) /* cache routine to buffer tempfile */
- Xchar cntxt[];
- X{
- X char *strcat();
- X
- X if (!fulltf)
- X {
- X bufflen += strlen(cntxt);
- X if (bufflen < BUFSIZ*8)
- X strcat(buff, cntxt);
- X else {
- X fulltf = 1;
- X if ((tf = fopen(tempfile, "w")) == NULL)
- X perror(tempfile);
- X fputs(buff, tf);
- X *buff = NULL;
- X bufflen = 0;
- X }
- X }
- X else /* fulltf */
- X fputs(cntxt, tf);
- X}
- X
- Xgetbuff(kwfreq) /* print frequency and context buffer */
- Xint kwfreq;
- X{
- X char str[BUFSIZ];
- X
- X printf("(%d)\n", kwfreq);
- X if (!fulltf)
- X fputs(buff, stdout);
- X else
- X {
- X fclose(tf);
- X if ((tf = fopen(tempfile, "r")) == NULL)
- X perror(tempfile);
- X while (fgets(str, BUFSIZ, tf))
- X fputs(str, stdout);
- X fclose(tf);
- X fulltf = 0;
- X }
- X}
- X
- Xint onintr() /* remove tempfile in case of interrupt */
- X{
- X fprintf(stderr, "\nInterrupt\n");
- X unlink(tempfile);
- X exit(1);
- X}
- X
- Xffmt(fp) /* if different, print keyword without count */
- XFILE *fp;
- X{
- X char s[BUFSIZ], okw[BUFSIZ/2], nkw[BUFSIZ/2], cntxt[BUFSIZ];
- X char *sp, *kwp, *cxp, *strcpy();
- X
- X strcpy(okw,"~~~~~"); /* make sure 1st keyword is printed */
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s, kwp = nkw; *sp && *sp != '|'; sp++, kwp++)
- X {
- X if (!nomap && islower(*sp))
- X *kwp = toupper(*sp);
- X else
- X *kwp = *sp;
- X }
- X *kwp = NULL;
- X
- X for (++sp, cxp = cntxt; *sp && *sp != '\n'; sp++, cxp++)
- X {
- X if (*sp == '|') {
- X *cxp = ' '; *++cxp = ' '; *++cxp = ' ';
- X } else
- X *cxp = *sp;
- X }
- X *cxp = '\n';
- X *++cxp = NULL;
- X
- X if (strcmp(nkw, okw) != 0) /* kwds different */
- X printf("\n%s\n %s", nkw, cntxt);
- X else /* if keywords are the same */
- X printf(" %s", cntxt);
- X strcpy(okw, nkw);
- X }
- X}
- X
- Xrmkwds(fp) /* completely suppress printing of keyword */
- XFILE *fp;
- X{
- X char s[BUFSIZ], *sp;
- X
- X while (fgets(s, BUFSIZ, fp))
- X {
- X for (sp = s; *sp && *sp != '|'; sp++)
- X ;
- X for (; *sp; sp++)
- X {
- X if (*sp == '|')
- X printf(" ");
- X else
- X putchar(*sp);
- X }
- X }
- X}
- !!!EOF!!!
- if [ "`wc -c format.c`" != " 4765 format.c" ]
- then
- echo \07WARNING format.c: extraction error
- fi
- echo Extracting trmacs 1>&2
- sed 's/^X//' > trmacs <<\!!!EOF!!!
- X.\" @(#)ms.acc 1.1 86/07/08 SMI; from UCB 4.2
- X. \" AM - accent mark definitions
- X.bd S B 3
- X. \" fudge factors for nroff and troff
- X.if n \{\
- X. ds #H 0
- X. ds #V .8m
- X. ds #F .3m
- X. ds #[ \f1
- X. ds #] \fP
- X.\}
- X.if t \{\
- X. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
- X. ds #V .6m
- X. ds #F 0
- X. ds #[ \&
- X. ds #] \&
- X.\}
- X. \" simple accents for nroff and troff
- X.if n \{\
- X. ds ' \h'-1'\'
- X. ds ` \h'-1'\`
- X. ds ^ \h'-1'^
- X. ds , \h'-1',
- X. ds ~ \h'-1'~
- X. ds ? ?
- X. ds ! !
- X. ds / \h'-1'\(sl
- X. ds q o\h'-1',
- X.\}
- X.if t \{\
- X. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
- X. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
- X. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
- X. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
- X. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
- X. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
- X. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
- X. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
- X. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
- X.\}
- X. \" troff and (daisy-wheel) nroff accents
- X.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
- X.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
- X.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
- X.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
- X.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
- X.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
- X.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
- X.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
- X.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
- X.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
- X.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
- X.ds ae a\h'-(\w'a'u*4/10)'e
- X.ds Ae A\h'-(\w'A'u*4/10)'E
- X.ds oe o\h'-(\w'o'u*4/10)'e
- X.ds Oe O\h'-(\w'O'u*4/10)'E
- X. \" corrections for vroff
- X.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
- X.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
- X. \" for low resolution devices (crt and lpr)
- X.if \n(.H>23 .if \n(.V>19 \
- X\{\
- X. ds : \h'-1'"
- X. ds 8 B
- X. ds v \h'-1'\o'\(aa\(ga'
- X. ds _ \h'-1'^
- X. ds . \h'-1'.
- X. ds 3 3
- X. ds o a
- X. ds d- d\h'-1'\(ga
- X. ds D- D\h'-1'\(hy
- X. ds th \o'bp'
- X. ds Th \o'LP'
- X. ds ae ae
- X. ds Ae AE
- X. ds oe oe
- X. ds Oe OE
- X.\}
- X.rm #[ #] #H #V #F C
- X.de KW
- X.ne 2.1
- X.sp 0.3
- X.ta 20n
- X.ft B
- X..
- X.de CX
- X.sp 0.1
- X.ta 3.6iR 3.75i
- X.ft R
- X..
- X.de HD
- X'sp 2
- X.tl ''- % -''
- X'sp 2
- X..
- X.de FO
- X'bp
- X..
- X.wh 0 HD
- X.wh -5 FO
- X.nf
- X.po 0.5i
- X.ll 7.0i
- X.lt 7.0i
- !!!EOF!!!
- if [ "`wc -c trmacs`" != " 2509 trmacs" ]
- then
- echo \07WARNING trmacs: extraction error
- fi
-
-