Source Code 1992 March

home *** CD-ROM | disk | FTP | other *** search

/ Source Code 1992 March / Source_Code_CD-ROM_Walnut_Creek_March_1992.iso / usenet / altsrcs / 2 / 2950 < prev next >

Wrap

Internet Message Format | 1991-03-04 | 44.9 KB

From: lee@sq.sq.com (Liam R. E. Quin) Newsgroups: alt.sources Subject: lq-text Full Text Retrieval Database Part 02/13 Message-ID: <1991Mar4.020107.16142@sq.sq.com> Date: 4 Mar 91 02:01:07 GMT : cut here --- cut here -- : To unbundle, sh this file #! /bin/sh : part 02 echo x - lq-text/src/filters/Makefile 1>&2 sed 's/^X//' >lq-text/src/filters/Makefile <<'@@@End of lq-text/src/filters/Makefile' X# filters/Makefile -- Copyright 1990 Liam R. Quin. All Rights Reserved. X# This code is NOT in the public domain. X# See the file ../COPYRIGHT for full details. X X# This Makefile belongs in the "src/filters" directory. X# X# Note that most of the actual configuration is done in ../Makefile and X# in ../h/global.h, and not here. X X# $Id: Makefile,v 1.4 90/10/06 00:57:26 lee Rel $ X X X# This is what gets made: XTARGETS = MailFilter NewsFilter XLIBFILES=$(TARGETS) XEXTRA=-DMAILFILTER='$(MAILFILTER)' -DNEWSFILTER='$(NEWSFILTER)' $(EXTRA) X XSRCS = FilterMain.c FilterType.c MailFilter.c NewsFilter.c XOBJS = FilterMain.o FilterType.o MailFilter.o NewsFilter.o X XPWD=filters X XDESTDIR=../lib XLQ=../lib/liblq.a XMODE=755 X X# for compiling: XEXTRA=-I../h XRANLIB=echo X Xall: $(TARGETS) X Xsaber_src: X echo $(PWD) X #cd $(PWD) X #load $(CFLAGS) $(SRCS) X #cd .. X Xsaber_obj: X #cd $(PWD) X #load $(CFLAGS) $(SRCS) X #cd .. X Xinstall: all X for i in $(LIBFILES); do cp "$$i" $(DESTDIR); \ X strip "$(DESTDIR)/$$i" ; \ X chmod $(MODE) "$(DESTDIR)/$$i" ; \ X done X Xtidy: X /bin/rm -f *.o core m.log tags X Xclean: tidy X /bin/rm -f $(TARGETS) $(TEST) X Xdepend: X mkdep $(CFLAGS) *.c X XCFilter: FilterMain.o CFilter.o X $(CC) $(CFLAGS) -o CFilter FilterMain.o CFilter.o $(MALLOC) $(LQ) X XNewsFilter: FilterMain.o NewsFilter.o X $(CC) $(CFLAGS) -o NewsFilter FilterMain.o NewsFilter.o $(MALLOC) $(LQ) X XMailFilter: FilterMain.o MailFilter.o X $(CC) $(CFLAGS) -o MailFilter FilterMain.o MailFilter.o $(MALLOC) $(LQ) X XCDMSFilter: FilterMain.o CDMSFilter.o X $(CC) $(CFLAGS) -o CDMSFilter FilterMain.o CDMSFilter.o $(MALLOC) $(LQ) X X# X# $Log: Makefile,v $ X# Revision 1.4 90/10/06 00:57:26 lee X# Prepared for first beta release. X# X# Revision 1.3 90/10/03 21:14:45 lee X# Added MAILFILTER stuff. X# X# Revision 1.2 90/09/28 21:54:43 lee X# No longer uses OWNER. X# X# Revision 1.1 90/08/09 19:17:58 lee X# Initial revision X X# DO NOT PUT ANYTHING AFTER THIS LINE X# DO NOT DELETE THIS LINE -- mkdep uses it. X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY. X XFilterMain.o: FilterMain.c XMailFilter.o: MailFilter.c /usr/include/malloc.h XMailFilter.o: ../h/wordrules.h ../h/emalloc.h XNewsFilter.o: NewsFilter.c XNewsFilter.o: ../h/wordrules.h ../h/emalloc.h XTroffFilter.o: TroffFilter.c XTroffFilter.o: ../h/wordrules.h ../h/emalloc.h X X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY @@@End of lq-text/src/filters/Makefile echo x - lq-text/src/filters/NewsFilter.c 1>&2 sed 's/^X//' >lq-text/src/filters/NewsFilter.c <<'@@@End of lq-text/src/filters/NewsFilter.c' X/* NewsFilter.c -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* $Id: NewsFilter.c,v 1.5 90/10/06 00:57:27 lee Rel1-10 $ X */ X X/* Filter for usenet articles. X * Throw away all of the header except X * Subject X * From X * Organi[sz]ation X * X * Probably ought to keep Message-ID, but I can't store it anyway! X * X * See FilterMain and wordrules.h for more info. X * X */ X X#ifdef SYSV X extern int _filbuf(), _flsbuf(); /* for lint! */ X#endif X#include <stdio.h> X#include <malloc.h> X#include <ctype.h> X#include "wordrules.h" X X#include "emalloc.h" X X#define STREQ(boy, girl) ((*(boy) == *(girl)) && !strcmp(boy, girl)) X X/** C Library functions that need to be declared: **/ X#ifndef tolower X extern int tolower(); X#endif Xextern int strcmp(); X/** Functions in this file that need to be declared **/ Xint GetChar(); Xvoid Header(), Body(); X/** **/ X Xextern char *progname; Xvoid Filter(); X Xchar *KeepThese[] = { /* these must be sorted on the first character */ X "from", X "keywords", X "summary", X "subject", X "organisation", X "organization", X 0 X}; X Xint icstreq(s1, s2) /* case insensitive strcmp */ X char *s1, *s2; X{ X register char ch1, ch2; X X while (*s1 && *s2) { X if (*s1 != *s2) { X if (isupper(*s1)) { X ch1 = tolower(*s1); X ch2 = (*s2); X } else if (isupper(*s2)) { X /* Note that we only have to test one character for case! */ X ch1 = (*s1); X ch2 = tolower(*s2); X } else { X return 0; /* they are different */ X } X if (ch1 != ch2) return 0; /* the strings differ */ X } X s1++; s2++; X } X if (!*s1 && !*s2) { X return 1; /* they are the same */ X } X return 0; /* they are different */ X} X Xint XIsWanted(String) X char *String; X{ X char **p; X int ch = String[0]; X X if (isupper(ch)) ch = tolower(ch); X X for (p = KeepThese; *p && **p; p++) { X if (ch < **p) return 0; /* gone too far */ X else if (icstreq(String, *p)) return 1; X } X return 0; X} X Xvoid XFilter(InputFile, Name) X FILE *InputFile; X char *Name; X{ X Header(InputFile, Name); X Body(InputFile, Name); X} X Xtypedef enum { X F_NotSeenAnythingYet, X F_InTheFirstWord, X F_AfterTheFirstWord X} t_FirstWord; X Xint InWord = 0; X Xvoid XHeader(InputFile, Name) X FILE *InputFile; X char *Name; X{ X int AtStartOfLine = 1; X int IgnoreLine = 1; /* initialised for lint and gcc -W really... */ X t_FirstWord FirstWord = F_NotSeenAnythingYet; X int ch; X static int BufLen; X static char *Buffer = 0; X int AtStartOfWord; X register char *q; X X if (Buffer == 0) { X BufLen = 24; X Buffer = emalloc(BufLen); X } X X q = Buffer; X InWord = 0; X X while ((ch = GetChar(InputFile)) != EOF) { X if (ch == '\n') { X if (AtStartOfLine) { /* a blank line */ X putchar('\n'); X return; X } X } X X InWord = InWord ? WithinWord(ch) : StartsWord(ch); X X switch (FirstWord) { X case F_NotSeenAnythingYet: X if (InWord) { X FirstWord = F_InTheFirstWord; X if (q - Buffer >= BufLen - 1) { X int where = q - Buffer; X X BufLen += 24; X Buffer = erealloc(Buffer, BufLen); X q = &Buffer[where]; X } X *q++ = ch; X } else { X putchar(' '); X } X break; X case F_InTheFirstWord: X if (InWord) { X if (q - Buffer >= BufLen - 1) { X int where = q - Buffer; X X BufLen += 24; X Buffer = erealloc(Buffer, BufLen += 24); X q = &Buffer[where]; X } X *q++ = ch; X break; X } else { /* reached the end of the first word on the line */ X *q = '\0'; X /* See if it's a keyword */ X if ((IgnoreLine = !IsWanted(Buffer)) != 0) { X /* Turn the word into one that won't get indexed, X * so that word counmts are unaffected: X * We use qxxxxxxx (any number of x's) for this. X */ X for (q = Buffer; *q; q++) { X putchar((q == Buffer) ? 'q' : 'x'); X } X putchar (ch == '\n' ? '\n' : ' '); X } else { X printf("%s%c", Buffer, ch == '\n' ? ch : ' '); X } X FirstWord = F_AfterTheFirstWord; X } X break; X default: X if ((AtStartOfLine = (ch == '\n'))) { X IgnoreLine = 0; X q = Buffer; X FirstWord = F_NotSeenAnythingYet; X AtStartOfWord = 1; X } X if (InWord && !IgnoreLine) { X putchar(ch); X } else { X if (AtStartOfWord && InWord) { X putchar('q'); X AtStartOfWord = 0; X } else if (InWord) { X putchar('x'); X } else if (isspace(ch)) { X putchar(ch); X } else { X putchar(' '); X } X } X if (!InWord) AtStartOfWord = 1; X } X if ((AtStartOfLine = (ch == '\n'))) { X IgnoreLine = 0; X q = Buffer; X FirstWord = F_NotSeenAnythingYet; X AtStartOfWord = 1; X } X } X if (ch == EOF) { X fprintf(stderr, "%s: warning: Mail folder %s has no message body\n", X progname, Name); X } X} X Xvoid XBody(InputFile, Name) X FILE *InputFile; X char *Name; X{ X int ch; X X while ((ch = GetChar(InputFile)) != EOF) { X if (InWord = InWord ? WithinWord(ch) : StartsWord(ch)) { X putchar(ch); X } else { X putchar((ch == '\n') ? '\n' : ' '); X } X } X} X X#ifdef __GNU__ Xinline X#endif Xint XGetChar(fd) X FILE *fd; X{ X static int LastChar = 0; X X if (LastChar) { X int ch = LastChar; X LastChar = 0; X return ch; X } X X /* Only return a single quote if it is surrounded by letters */ X if ((LastChar = getc(fd)) == '\'') { X LastChar = getc(fd); X if (InWord && isalpha(LastChar)) return '\''; X else return ' '; X } else { X int ch = LastChar; X LastChar = 0; X return ch; X } X} X X/* X * $Log: NewsFilter.c,v $ X * Revision 1.5 90/10/06 00:57:27 lee X * Prepared for first beta release. X * X * Revision 1.4 90/09/20 16:36:59 lee X * Fixed icstrcmp() and IsWanted() so that the unwanted parts of headers X * get deleted again.... (oops!) X * X * Revision 1.3 90/09/19 21:19:50 lee X * Now supports turning unindexed stuff into qxxxxx-words. X * X * Revision 1.2 90/08/29 21:56:58 lee X * Alpha release. X * X * Revision 1.1 90/08/09 19:17:57 lee X * Initial revision X * X * Revision 1.2 89/09/16 21:16:01 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:05:48 lee X * Initial revision X * X */ @@@End of lq-text/src/filters/NewsFilter.c echo x - lq-text/src/h/Liamdbm.h 1>&2 sed 's/^X//' >lq-text/src/h/Liamdbm.h <<'@@@End of lq-text/src/h/Liamdbm.h' X/* Liamdbm.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* $Id: Liamdbm.h,v 1.2 90/10/06 02:18:14 lee Rel1-10 $ X * X * This is used with gdbm. I have not linked with gdbm, and, if you X * wish to do so, you must be careful not to voilate any copyright X * notices... (sigh) X * X * The version of gdbm for which I had a manual is rather old and had no X * ndbm compatibility. X */ X X#include "gdbm.h" Xextern datum gdbm_fetch(); Xextern datum gdbm_firstkey(); Xextern datum gdbm_nextkey(); X Xtypedef char DBM; X X#define dbm_store(db, key, data, mode) gdbm_store(db, key, data) X/* gdbm_open is stupder than ndbm_open.... */ X#define dbm_open(FileName, Mode, m) gdbm_open(FileName, 512, Mode, 0) X#define dbm_fetch gdbm_fetch X#define dbm_close gdbm_close X#define dbm_firstkey gdbm_firstkey X#define dbm_nextkey gdbm_nextkey X X/* X * $Log: Liamdbm.h,v $ X * Revision 1.2 90/10/06 02:18:14 lee X * Prepared for first beta release. X * X * X */ @@@End of lq-text/src/h/Liamdbm.h echo x - lq-text/src/h/Revision.h 1>&2 sed 's/^X//' >lq-text/src/h/Revision.h <<'@@@End of lq-text/src/h/Revision.h' X/* This header file gets updated with every distributed change to any source X * file anywhere in the lq-text package. X * A short description of the change is added to the Log here, too. X * Lee. X */ X X#define LQTEXTREVISION "Release 1.10" X X/* $Revision: 1.10 $ X * X * Revision 1.6 90/10/04 17:12:45 lee X * lqtext now compiles and mostly works under BSD. X * Fixes bug in phrase matching -- PhraseMatchLevel now works on one-word X * phrases. X * X * Revision 1.5 90/09/28 22:19:36 lee X * Made GetChar() a macro in lqaddfile -- speed improvement... X * X * Revision 1.4 90/09/20 16:37:35 lee X * Fixed Mail and News filters so that they throw away the unwanted header X * parts correctly. X * X * Revision 1.3 90/09/20 12:51:24 lee X * Major sdbm initialisation bug fixed. X * X * Revision 1.2 90/09/20 11:52:35 lee X * Fixed the filters so that lqshow highlights the right word (the qxx fix) X * X * Revision 1.1 90/09/20 11:52:18 lee X * Initial revision X * X * X */ @@@End of lq-text/src/h/Revision.h echo x - lq-text/src/h/blkheader.h 1>&2 sed 's/^X//' >lq-text/src/h/blkheader.h <<'@@@End of lq-text/src/h/blkheader.h' X/* blkheader.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X * X * (was called blockheader.h, but this was too long on SysV for RCS) X */ X X/* descibe the physical WOrdPlace database... X * X * $Header: /usr/src/cmd/lq-text/src/h/RCS/blkheader.h,v 1.2 90/03/20 20:57:46 lee Rel1-10 $ X * X * $Log: blkheader.h,v $ X * Revision 1.2 90/03/20 20:57:46 lee X * removed WID from the block. This reduces checking, but should also X * noticeably reduce the size of the database. X * X * Revision 1.1 90/03/20 20:54:44 lee X * Initial revision X * X */ X X/* The header of each block -- I can't use sReadNumber, because I don't know X * the size of NextOffset until I get to the end, and it's too late by then! X * X * I should really store the block offset, and not the byte offset. This X * would save a whole byte -- I could use 3 bytes for the NextBlock! X */ Xtypedef struct { X unsigned long NextOffset; /* a byte offset */ X char Data[1]; /* the address of this is where the number start... */ X} t_BlockHeader; @@@End of lq-text/src/h/blkheader.h echo x - lq-text/src/h/emalloc.h 1>&2 sed 's/^X//' >lq-text/src/h/emalloc.h <<'@@@End of lq-text/src/h/emalloc.h' X/* emalloc.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* emalloc.h -- header file for emalloc.c, Liam Quin's malloc() wrapper X * X * $Id: emalloc.h,v 1.5 91/03/02 19:40:04 lee Rel1-10 $ X * X * $Log: emalloc.h,v $ X * Revision 1.5 91/03/02 19:40:04 lee X * Simpler version of malloc defines if MALLOCTRACE unused... X * X * Revision 1.4 91/03/02 18:31:21 lee X * Simpler call to malloc wrappers if MALLOCTRACE undefined. X * X * Revision 1.3 90/10/06 02:18:26 lee X * Prepared for first beta release. X * X * Revision 1.2 90/08/29 21:57:44 lee X * removed most of the testing code X * X * Revision 1.1 90/08/09 19:14:48 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:45:20 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * X */ X Xextern int _LiamIsInCurses; X X#define InitScr() (_LiamIsInCurses = initscr()) X#define EndWin() (_LiamIsInCurses ? (_LiamIsInCurses = 0), endwin() : 0) X Xextern char *_emalloc(), *_erealloc(), *_ecalloc(); Xextern void _efree(); X X#ifdef MALLOCTRACE X#define emalloc(u) _emalloc(u, __FILE__, __LINE__) X#define erealloc(s, u) _erealloc(s, u, __FILE__, __LINE__) X#define ecalloc(n, siz) _ecalloc(n, siz, __FILE__, __LINE__) X#define efree(s) _efree(s, __FILE__, __LINE__) X#else X#define emalloc _emalloc X#define erealloc _erealloc X#define ecalloc _ecalloc X#define efree _efree X#endif @@@End of lq-text/src/h/emalloc.h echo x - lq-text/src/h/fileinfo.h 1>&2 sed 's/^X//' >lq-text/src/h/fileinfo.h <<'@@@End of lq-text/src/h/fileinfo.h' X/* fileinfo.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* Internal structure used by NX-Text to represent a word */ X X/* Needs: sys/types.h */ X X/* $Id: fileinfo.h,v 1.2 90/10/06 02:18:27 lee Rel1-10 $ X * X * $Log: fileinfo.h,v $ X * Revision 1.2 90/10/06 02:18:27 lee X * Prepared for first beta release. X * X * Revision 1.1 90/08/09 19:14:57 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:45:57 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:14:29 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:15:19 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:00:34 lee X * Initial revision X * X * X */ X Xtypedef unsigned long t_FID; X Xtypedef struct { X char *Name; X t_FID FID; /* File Identifier */ X int FilterType; /* command to ASCIIify, 0 unknown, 1 none */ X time_t Date; /* when the file was last indexed */ X FILE *Stream; X} t_FileInfo; X X#define FindFile(name) ((*(name) == '/') ? (name) : _FindFile(name)) Xextern char *_FindFile(); @@@End of lq-text/src/h/fileinfo.h echo x - lq-text/src/h/filter.h 1>&2 sed 's/^X//' >lq-text/src/h/filter.h <<'@@@End of lq-text/src/h/filter.h' X/* filter.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* filter.h -- define filter table for NX-Text, Liam Quin's text retrieval X * program. X * This table is built from a file like a simplified /etc/magic, normally X * stored in /usr/local/lib/nx-text/lib/filtertable X * but you can set this either here or in the Makefile. X * X * NEEDS: stdio.h X * X * $Id: filter.h,v 1.6 91/03/02 18:45:04 lee Rel1-10 $ X * X * $Log: filter.h,v $ X * Revision 1.6 91/03/02 18:45:04 lee X * Spell MAILFILTER correctly in the ifdef... X * X * Revision 1.5 90/10/13 03:11:31 lee X * Now defines filters for easier stand-alone testing of stuff... X * X * Revision 1.4 90/10/06 02:18:28 lee X * Prepared for first beta release. X * X * Revision 1.3 90/09/28 23:03:16 lee X * Now use MAILFILTER and NEWSFILTER... X * X * Revision 1.2 90/08/29 21:57:57 lee X * removed most of the testing code X * X * Revision 1.1 90/08/09 19:15:01 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:46:04 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:14:33 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * X */ X X#define FTYPE_NEWS 1 X#define FTYPE_MAIL 2 X#define FTYPE_CDMS 3 X#define FTYPE_MOSTLYASCII 4 X#define FTYPE_C_SOURCE 5 X X/* The Type field in each array entry is so that I can do some very simple X * checking... X */ Xextern int fclose(), pclose(); Xstruct s_FilterTable { X int Type; X int (* close)(); /* how to close the darned stream */ X char *String; X}; X#ifndef FILTERDEF Xextern struct s_FilterTable FilterTable[]; X#else Xstruct s_FilterTable FilterTable[] = { X { 0, fclose, 0 }, /* use fopen() */ X#ifndef NEWSFILTER X# define NEWSFILTER "NewsFilter" X#endif X { FTYPE_NEWS, pclose, NEWSFILTER }, X#ifndef MAILFILTER X# define MAILFILTER "MailFilter" X#endif X { FTYPE_MAIL, pclose, MAILFILTER }, X#ifdef FTYPE_CDMS /* CrystalWriter from Syntactics... */ X { FTYPE_CDMS, pclose, "CDMSFilter" }, X#endif X#ifdef FTYPE_NTROFF X { FTYPE_NTROFF, pclose, "lqderoff" }, /* not yet released, sorry */ X#endif X { FTYPE_MOSTLYASCII, pclose, "AsciiFilter" }, X#ifdef FTYPE_C_SOURCE X { FTYPE_C_SOURCE, pclose, "CFilter" }, /* leave me last! */ X#endif X /* If you add more, you MUST update MaxFilterType */ X { 0, 0, 0 } X}; X#endif X#define MaxFilterType FTYPE_C_SOURCE @@@End of lq-text/src/h/filter.h echo x - lq-text/src/h/globals.h 1>&2 sed 's/^X//' >lq-text/src/h/globals.h <<'@@@End of lq-text/src/h/globals.h' X/* globals.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X * X * $Id: globals.h,v 1.6 91/02/20 19:26:53 lee Rel1-10 $ X * X * (see Log at end of this file for change history. Keep this up to date X * using rcs if you have it...) X */ X X/* globals.h -- declarations of globally accessible variables, and also X * of configurable parameters. X * X * Some of the configuation options might be given in ../Makefile, so X * you must check in there too. X * X * Everything that includes this file must be linked with Defaults.c X */ X X/* X * DOCPATH gives the list of directories in which to search in order X * to find files to retrieve and to index. The default can be wired X * in here, or can be simply "." (in which case relative pathnames will X * be from wherever one invokes the commands, and absolute pathnames X * will be absolute. For example, X * #define DFLTDOCPATH "/usr/man:." X * In any case, it can be overridden by a DOCPATH line in the configuration X * file for a given database (README in the database directory), and also X * by an environment variable DOCPATH (the latter taking precedence over X * the former). X * X * Use ((char *) 0) to disable the default -- in this case, you always have X * to give one, either with the $DOCPATH variable or in the database file. X * X */ X#ifndef DFLTDOCPATH X# define DFLTDOCPATH ((char *) 0) X#endif X X/* LQTEXTDIR: if the programs can't find the directory to use -- i.e., X * there was no -d option and $(LQTEXTDIR) is unset, we either X * look in UNDERHOME (if that was defined here) or in wherever LQTEXTDIR X * is defined to point. X */ X#ifndef LQTEXTDIR X# define LQTEXTDIR "/usr/spool/lqtextdir" X#endif X X/* If UNDERHOME is set, look there for a directory -- e.g. X * #define UNDERHOME "sockdrawer" X * would make lqtext programs look for a directory something like X * /users/liam/sockdrawer X * (where /users/liam is my login directory) X */ X#ifndef UNDERHOME X# define UNDERHOME "LQTEXTDIR" X#endif X X/* The name of a configuration file found in the database directory: X */ X#define CONFIGFILE "README" X X/* If the config file doesn't give a filename for a list of common X * words, we look for one called DFLTCOMMONFILE (and don't mind if we X * don't find it). Use "/dev/null" or ((char *) 0) if you want to X * disable the default. X * It's case sensitive, of course. X */ X#define DFLTCOMMONFILE "CommonWords" X X#ifndef PAGER X/* The default pager to use if the user doesn't set $PAGER. This is only X * used in lqshow, the browser. Good things to try are X * more, "less -Ce", and (generally only on System V) "pg -ns". X * Specify an absolute path if possible. It's often a lot faster, and X * it's somewhat safer... X */ X# ifdef BSD X# define PAGER "/usr/ucb/more" X# else X# define PAGER "/usr/bin/pg -ns" X# endif X#endif X X#ifndef DBMCREAT X/* If you are using dbm or gdbm (?), you will need to create the dbm files X * by hand yourself. Defining DBMCREAT as 0 makes the software do this X * automatically, with a very slight performance penalty. X * X * ndbm and sdbm can use O_CREAT, so set it to 1 here for them. X * You will also have to look at ../Makefile, ../PORTING, smalldb.h and X * ../lqlib/smalldb.h, making whatever changes are needed. X */ X# define DBMCREAT 1 /* 1 for ndbm, 0 for dbm */ X#endif X X#ifdef sparc X# define NEEDALIGN X#endif X X#ifdef mips /* e.g. SGI machines */ X# define NEEDALIGN X#endif X X/* NEEDALIGN is for C compilers that require C structures to start at X * word boundaries. You need this on sparc and sgi machines... X */ X X/*** X *** If you want to change anything beyond here... X *** X *** well, you can. X *** After all, it's your copy. X *** X *** But don't come running back to me if it doesn't work! X *** At least not until you have tried X *** + understanding what the problem is; X *** + looking at the source to see why; X *** + fixing the problem; X *** + taking off your shoes and socks and grinning for a while. X *** X *** Liam. X *** X ***/ X X/* The following let you reconfigure the names of the files that form X * part of the database, but there is no point in doing so unless you X * are porting to some strange system that has absurd filename restrictions! X */ X#ifndef WORDINDEX X# define WORDINDEX "wordlist" X /* This is a dbm file, so you'll get two files, one with ".pag" X * stuck on the end and one with ".dir" on the end. X * It contains an entry for every word in the database, enabling X * the software to go from a word to an integer (well, a t_WID) X * very quickly. X * It tends to be a little over one tenth of the size of the DATABASE. X */ X#endif X#ifndef WIDINDEXFILE X# define WIDINDEXFILE "WIDIndex" X /* WIDINDEXFILE contains each word in the datbase, together with some X * information and the first few bytes of data. X * It contains WIDBLOCKSIZE bytes for every word, but this has to X * be at least MAXWORDLEN + 10 bytes long (see WordInfo.c). X */ X#endif X#ifndef DATABASE X# define DATABASE "data" X /* For those words whose data doesn't fit into the first WIDBLOCKSIZE X * bytes, space is allocated in this file in BLOCKSIZE chunks. Make X * BLOCKSIZE small, or you will waste a lot of space -- on the other X * hand, there's a 4-byte-per-block overhead at the moment. X * This file gets very b i g indeed. X */ X#endif X#ifndef FILEINDEX X# define FILEINDEX "FileList" X /* This is a list of every file in the database, again in dbm format, X * so there are actually two files (a .pag and a .dir) involved. X * If your files are short, it will quickly grow to a tenth of the size X * of the database. X * It stores the filename, and some other information. X */ X#endif X#ifndef FIDFILE X# define FIDFILE "FIDFile" X /* This contains the largest currently used file number... you can X * look at it to see how many files have been indexed. X * It is only a few bytes long. X */ X#endif X#ifndef WIDFILE X# define WIDFILE "WIDFile" X /* This contains the largest currently used word number... you can X * look at it to see how many unique words have been seen. X * It is only a few bytes long. X */ X#endif X X#ifndef WIDBLOCKSIZE X# define WIDBLOCKSIZE 32 X/* WIDBLOCKSIZE absolutely must be large enough to fit at least one byte X * of actual data, or all hell will break loose. X * (actually that could be fixed...). X * In any case, it has to contain (apart from the >= 1 byte of data): X * + the length count (1 byte) and the word itself (no null on the end) X * + the block number in the database (1..5 bytes) X * + the number of matches (1..5 bytes) X * X * It helps efficiency very, very slightly if these are a power of two X * bytes, as then they never cross Unix block boundaries. X * X */ X#endif X X#ifndef BLOCKSIZE X#define BLOCKSIZE 64 X/* BLOCKSIZE is the size of blocks in the data file. There are several X * tradeoffs: X * + there is a 4-bytes-per-block overhead for list pointers, so it's X * a good idea to make them large X * + there's a bit of work involved in fetching the blocks, so things go X * faster if they're larger... X * + many blocks are not full, so it's a good idea to make them small. X * On average, a little over (BLOCKSIZE - 4) / 2 bytes are wasted for X * every word chain. X * + since many of the blocks are not full, it's a good idea to make them X * small, minimising the amount of extra data that gets copied around by X * the Unix kernel. If the blocks are smaller it'll go faster... X * X * It helps efficiency very, very slightly if these are a power of two X * bytes, as then they never cross Unix block boundaries. X * X */ X#endif X X/**** Some useful macros */ X X/* STREQ(a,b) is much faster than strcmp() in the (common) case that the X * first character of the strings differ. X * It is due (as far as I know) to Henry Spencer, at the University of X * Toronto Zoology Dept., X * utzoo!henry X */ X#ifndef STREQ X# define STREQ(henry,utzoo) (*(henry) == *(utzoo) && !strcmp(henry, utzoo)) X#endif X X/* Inline functions are functions that get expanded inline during X * compilation -- sort of like macros with real local arguments. X * Not all compilers support them. X */ X#ifdef __GNUC__ X# define INLINE inline X#else X# define INLINE /* not supported */ X#endif X X#ifdef DefineThem X# define DECL(name, type, value) type name = value X# define EXTERN /* just define them please */ X#else X# define EXTERN extern /* declare but do not define */ X# define DECL(name, type, value) EXTERN type name X#endif X X/****/ X X/* Now declare (or define) things: */ X Xextern char *progname; /* from progname.c, for error messages */ XDECL(CommonWordFile, char *, DFLTCOMMONFILE); XDECL(DatabaseDir, char *, LQTEXTDIR); XDECL(FileIndex, char *, FILEINDEX); XDECL(WordIndex, char *, WORDINDEX); XDECL(DataBase, char *, DATABASE); XDECL(FidFile, char *, FIDFILE); XDECL(WidFile, char *, WIDFILE); XDECL(WidIndexFile, char *, WIDINDEXFILE); XDECL(DocPath, char *, DFLTDOCPATH); X X/* X * $Log: globals.h,v $ X * Revision 1.6 91/02/20 19:26:53 lee X * Added NEEDALIGN on mips systems X * (thanks to Mark Moraes, moraes@cs.toronto.edu) X * X * Revision 1.5 90/10/07 20:41:20 lee X * Added NEEDALIGN for fussy architectures. X * X * Revision 1.4 90/10/06 02:21:21 lee X * Prepared for first beta release. X * X * Revision 1.3 90/10/03 21:31:54 lee X * Added definition of PAGER, which has moved here from lqshow.c X * X * Revision 1.2 90/08/09 19:15:03 lee X * after BSD lint and saber-C X * X * Revision 1.1 90/03/23 17:32:11 lee X * Initial revision X * X * X */ @@@End of lq-text/src/h/globals.h echo x - lq-text/src/h/numbers.h 1>&2 sed 's/^X//' >lq-text/src/h/numbers.h <<'@@@End of lq-text/src/h/numbers.h' X/* numbers.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* ReadNumber and WriteNumber take/return a long, using a compression X * algorithm to reduce the amount of data taken. X * X * They use (char *) pointers instead if prefixes with an s. X * X * $Id: numbers.h,v 1.3 90/10/06 02:18:30 lee Rel1-10 $ X * X */ X Xextern INLINE unsigned long fReadNumber(); Xextern INLINE unsigned long sReadNumber(); X Xextern INLINE void fWriteNumber(); Xextern INLINE void sWriteNumber(); X X/* X * $Log: numbers.h,v $ X * Revision 1.3 90/10/06 02:18:30 lee X * Prepared for first beta release. X * X * Revision 1.2 90/08/09 19:15:42 lee X * after BSD lint and saber-C X * X * Revision 1.1 90/04/19 19:27:04 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:46:43 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 1.2 89/09/16 21:15:40 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:02 lee X * Initial revision X * X */ @@@End of lq-text/src/h/numbers.h echo x - lq-text/src/h/pblock.h 1>&2 sed 's/^X//' >lq-text/src/h/pblock.h <<'@@@End of lq-text/src/h/pblock.h' X/* pblock.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X#ifndef PBLOCK_H /* the matching endif is at the end of the file... */ X X# define PBLOCK_H X/* The physical Word Database... X * X * First, there is the WID (from 1 to 4 bytes) X * X * Then, there is a NEXT pointer (or 0). X * X * Then, there is a list of (FID, OFFSET) pairs. X * X * $Header: /usr/src/cmd/lq-text/src/h/RCS/pblock.h,v 1.2 90/08/09 19:15:45 lee Rel1-10 $ X * X * $Log: pblock.h,v $ X * Revision 1.2 90/08/09 19:15:45 lee X * after BSD lint and saber-C X * X * Revision 1.1 90/03/01 23:54:37 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:47:04 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:15:36 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:15:43 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:09 lee X * Initial revision X * X * X */ X Xtypedef struct { X t_FID FID; X unsigned long BlockInFile; X unsigned short WordInBlock; X unsigned short Flags; X unsigned char StuffBefore; /* preceding ignored garbage */ X} t_WordPlace; X X/* This structure is really only used by addfile; elsewhere arrays of X * WordlPlace are used. X */ X Xtypedef struct s_WordPlaceList { X char *Word; X t_WordPlace WordPlace; X struct s_WordPlaceList *Next; X} t_WordPlaceList; X X/* Warning: One cannot use structure copy for a pblock! */ X X/* This does *NOT* correspond to the physical disk layout -- see pblock.c */ Xtypedef struct { X t_WID WID; /* for checking; */ X unsigned long ChainStart; X unsigned long NumberOfWordPlaces; X t_WordPlace WordPlaces[1]; /* made by joining lots of disk blocks... */ X} t_pblock; X X#endif @@@End of lq-text/src/h/pblock.h echo x - lq-text/src/h/phrase.h 1>&2 sed 's/^X//' >lq-text/src/h/phrase.h <<'@@@End of lq-text/src/h/phrase.h' X/* phrase.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* LQ-Text -- Liam's Text Retrieval Package X * Liam R. Quin, September 1989, and later... X * X * phrase.h -- data structures for handling entire phrases X * X */ X X/* $Id: phrase.h,v 1.2 90/10/06 02:18:33 lee Rel1-10 $ X * X */ X X/* Represent a Phrase as a linked list of WordInfo pointers, plus a list X * of matches. X */ X Xtypedef struct s_PhraseItem { X t_WordInfo *Word; X struct s_PhraseItem *Next; X unsigned long SearchIndex; /* For phrase-matching */ X char *WordStart; /* pointer into original phrase */ X} t_PhraseItem; X Xtypedef enum { X PCM_AnyCase, /* Ignore case entirely */ X PCM_HalfCase, /* Upper only matches upper; lower matches either */ X PCM_SameCase, /* Exact matching */ X} t_PhraseCaseMatch; X Xtypedef struct s_Match { X t_WID WID; X t_WordPlace *Where; X struct s_Match *Next; X} t_Match; X Xtypedef struct s_MatchList { X t_Match *Match; X struct s_MatchList *Next; X} t_MatchList; X X Xtypedef struct s_Phrase { X t_PhraseItem *Words; /* list of words and pblocks */ X char *OriginalString; /* as supplied by the user */ X char *ModifiedString; /* after deleting short/unindexed words */ X unsigned long NumberOfMatches; X t_MatchList *Matches; X struct s_Phrase *Next; /* for use when we're in a list of phrases... */ X unsigned short HasUnknownWords; X} t_Phrase; X X/* This is for FilleList() */ Xtypedef struct s_Answer { X char *Answer; X struct s_Answer *Next; X} t_Answer; X X/* X * $Log: phrase.h,v $ X * Revision 1.2 90/10/06 02:18:33 lee X * Prepared for first beta release. X * X * Revision 1.1 90/08/09 19:15:49 lee X * Initial revision X * X * Revision 1.1 89/09/17 23:03:37 lee X * Initial revision X * X */ @@@End of lq-text/src/h/phrase.h echo x - lq-text/src/h/smalldb.h 1>&2 sed 's/^X//' >lq-text/src/h/smalldb.h <<'@@@End of lq-text/src/h/smalldb.h' X/* smalldb.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* $Id: smalldb.h,v 1.3 91/03/03 00:12:56 lee Exp $ X */ X X/* You must include fcntl.h before this file. */ X X#ifdef ndbm X# include <ndbm.h> X# define FoundDbmOK X# define NDBM X#endif X X#ifdef sdbm X# include "sdbm.h" X# define FoundDbmOK X# define NDBM /* it's compatible */ X#endif X X#ifdef ozmahash X# include "ozmadbm.h" X# define FoundDbmOK X# define NDBM /* it's compatible as well... */ X#endif X X#ifndef FoundDbmOK X# include "Liamdbm.h" X#endif X X#ifndef O_RDWR X# include <fcntl.h> X#endif X X#define CACHE 2 /* size of DBM cache in startdb() -- I only use two! */ X/* If you rip out the dbm cache stuff for use elsewhere, increse the 2 X * to something like 5 or so!!! Each entry uses two file pointers. X * Lee X */ X X#ifndef CACHE X# define startdb(FilePrefix) dbm_open(FilePrefix, O_RDWR|O_CREAT, 0640) X# define enddb(db) { if (db) dbm_close(db); } X#endif X X X#ifndef startdb XDBM *startdb(); X#endif X X#ifndef enddb X# ifdef CACHE X# define enddb(db) /* nothing to do, because of the cache */ X# else X void enddb(); X# endif /* CACHE */ X#endif /* !enddb */ X X/* X * $Log: smalldb.h,v $ X * Revision 1.3 91/03/03 00:12:56 lee X * Integrated ozmahash. X * X * Revision 1.2 90/10/06 02:18:36 lee X * Prepared for first beta release. X * X * Revision 1.1 90/08/09 19:16:00 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:47:19 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:16:01 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:15:45 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:12 lee X * Initial revision X * X */ @@@End of lq-text/src/h/smalldb.h echo x - lq-text/src/h/wordindex.h 1>&2 sed 's/^X//' >lq-text/src/h/wordindex.h <<'@@@End of lq-text/src/h/wordindex.h' X/* wordindex.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* (this file is currently empty, but might return...) */ X X/* X * $Id: wordindex.h,v 1.2 90/10/06 02:18:38 lee Rel1-10 $ X * X * $Log: wordindex.h,v $ X * Revision 1.2 90/10/06 02:18:38 lee X * Prepared for first beta release. X * X * Revision 1.1 90/08/09 19:16:02 lee X * Initial revision X * X * Revision 2.1 89/10/02 01:16:06 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:15:47 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:13 lee X * Initial revision X * X * X */ @@@End of lq-text/src/h/wordindex.h echo x - lq-text/src/h/wordinfo.h 1>&2 sed 's/^X//' >lq-text/src/h/wordinfo.h <<'@@@End of lq-text/src/h/wordinfo.h' X/* wordinfo.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* X * $Id: wordinfo.h,v 1.3 90/10/06 02:21:30 lee Rel1-10 $ X */ X Xtypedef unsigned long t_WID; X X#ifndef PBLOCK_H X# include "pblock.h" X#endif X X#ifndef WIDBLOCKSIZE X#define WIDBLOCKSIZE 32 X#endif X Xextern char *WidIndexFile; /* Default.c */ X X/* this is a hack for speed: */ X#define GetNextWID SpoofGetNextWID X X/** A t_WordInfo describes a single word, in terms of X ** where it came from X ** how to find its database entries X ** how to find the in-core database entries (a copy of the above) X **/ X X/* There would be a performance benefit if this struct was smaller. X * It was foolish of me to use WordInfo for so many different things in X * addfile, and now I pay the price. X * Addfile may end up calling malloc for 10,000 of these things... X * X * There should be: X * t_WordPlace (exists, pblock.h) X * for recording a specific occurrence of a given word in a given file X * t_WordInfo (definition follows... look down...) X * for recording information about a WID's entry in the database X * t_WordPlaceList X * for addfile to make a list of word places... X * t_pblock (exists, see pblock.h) X * for containing the list of WordPlaces found in the database for a X * given word, or for putting them there. Uses arrays rather than X * lists to squeeze a few extra milliseconds. Some hope :-( :-) X * X * t_WordPlaceList almost certainly happen in the next major edit phase... X * t_WordInfo will then be somewhat smaller. X * All of the entries marked with a leading comment (below) should X * be elsewhere (and some of them were, in the Grand Design!). X * X */ Xtypedef struct s_WordInfo { X char *Word; X t_WID WID; /* My Word Identifier */ X unsigned long NumberOfWordPlaces; /* total */ X t_FID FID; /* where we got it from */ X unsigned long Offset; /* word entry position in the data base */ X struct s_WordInfo *Next; /* for making lists of WordInfo structs */ X char *DataBlock; /* for writing me out to the index */ X char *WordPlaceStart; X t_WordPlace *WordPlaces; /* first few pairs */ X t_WordPlace WordPlace; /* For addfile -- this is due to go!!!! */ X /* shorts are at the end to obviate alignment padding... */ X unsigned long WordPlacesInHere; X unsigned short Length; /* Word length; reduce the need for strlen */ X#if 0 X unsigned char Flags; X /* Flags serve two purposes: X * the LSB says whether the entry is sorted. X * the remainder are a logical AND of all entries in a sorted X * block. NOTE: if the block is unsorted, the other bits should X * still be up to date. X */ X#endif X} t_WordInfo; X X/* X * $Log: wordinfo.h,v $ X * Revision 1.3 90/10/06 02:21:30 lee X * Prepared for first beta release. X * X * Revision 1.2 90/08/09 19:16:04 lee X * after BSD lint and saber-C X * X * Revision 2.2 89/10/08 20:47:27 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:16:15 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.3 89/09/17 23:04:52 lee X * Various fixes; NumberInBlock now a short... X * X * Revision 1.2 89/09/16 21:15:49 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:16 lee X * Initial revision X * X */ @@@End of lq-text/src/h/wordinfo.h echo x - lq-text/src/h/wordrules.h 1>&2 sed 's/^X//' >lq-text/src/h/wordrules.h <<'@@@End of lq-text/src/h/wordrules.h' X/* wordrules.h -- Copyright 1989 Liam R. Quin. All Rights Reserved. X * This code is NOT in the public domain. X * See the file COPYRIGHT for full details. X */ X X/* $Id: wordrules.h,v 1.2 90/10/06 02:18:39 lee Rel1-10 $ X * X */ X X/* Rules for determining what an indexable word looks like; X * These are implemented by the various filters, as well as by X * the indexing software itself. This means that the filters X * don't need to keep track of word lengths, as addfile will do this, X * but that they should not emit non-word stuff if they can help it, X * turning it into the equivalent amount (in bytes) of white-space X * instead. X * They should also turn words they don't want indexed into "qxxx", X * with the right number of x's (e.g. "bare" --> "qxxx"). X */ X X/* A "word" is a letter followed by any combination of X * letters, digits or '_'. An embedded (not trailing) ' is also allowed X * (_ is allowed so that one can index progamming languages; strictly X * speaking, a lot of languages allow _ at the start too, but I don't X * want to get confused by nroff output etc., which contains lines of X * underscores) X * X * This scheme currently excludes numbers... X * 31, 31.4 and 31.9e4 will all be ignored. So will 1987. X */ X X#define StartsWord(ch) isalpha(ch) X#define WithinWord(ch) (isalnum(ch) || (ch == '_') || (ch == '\'')) X#define EndsWord(ch) isalnum(ch) X X/* Don't index words unless they are at least MinWordLength characters X * long! X */ X#define MinWordLength 3 X#define MaxWordLength 18 /* truncate words to this */ X/* The Following is for *.WordPlace.BlockInFile. If words are constrained X * to be 3 or more characters long, there can be at most X * (FileBlockSize / 4) of them in a block (since words must be separated X * by at least one character). X * Hence, 7 bits, which allows 0..127 giving 128 distinct values, X * gives us a block that is 128 * (MinWordLength + 1) bytes long. X */ X#define FileBlockSize (128 * (MinWordLength + 1)) X X/* WordPlace Flags: X * When a plural word is found, or a possessive word, it is reduced to X * being singular, and flags are set appropriately. X * Also, a flag is set to say if the word started with a Capital Letter. X * This puts Window, windows, and Window's all together, but enables them X * to be differentiated for searching if required. X * These flags are implemented by WordInfo and addfile, not by the various X * filters, but the filters must preserve capitalisation of the first letter X * in each word, and pass through apostrophes within words (like this's). X */ X X#define WPF_WASPLURAL 0001 /* The word... ended in s */ X#define WPF_UPPERCASE 0002 /* ...Started with a capital letter */ X#define WPF_POSSESSIVE 0004 /* ...ended in 's */ X#define WPF_ENDEDINING 0010 /* ...ended in ing */ X#define WPF_LASTWASCOMMON 0020 /* the previous word was common */ X#define WPF_LASTHADLETTERS 0040 /* we skipped some letters to get here */ X#define WPF_HASSTUFFBEFORE 0100 /* Other than 1 byte of garbage before */ X#define WPF_LASTINBLOCK 0200 /* I'm the last word in this block */ X X/* new note (jan 90): X * You can't currently have both plural and posessive in the most common case X * of the boys' muddy feet (for example), as the trailing ' gets deleted. X * this doesn't matter, but perhaps that combination should be reserved for X * had-another-standard-ending??? e.g. -ed or -ing, that isn't often followed by X * -s or -'s... X * X * Also, ENDEDINING (ended in "ing") is currently unused entirely. X * Perhaps if it is set, the plural and possessive bits should index which of X * four endings was found, although this would preclude special-casing of the X * s's combination. Probably better that way. X * X * I should very much like to have another flag or two, perhaps embedded in X * one of the other fields. This might be feasible if there is a pre-scan X * when the index is written to determine the most common (modal) flags and X * distance (currently I assume 1) and to omit these whenever they are the default. X * In this case, the fact that every occurrence of Jesus starts with a capital X * letter (and ends in -s, *blush*), can still lead to most of the flags being X * omitted. X * X * The next revision will separate the list of FIDs from the rest of the information, X * in which case the embedding of the flags becomes a little trickier. This X * belongs in the TODO file now, sorry. X * X * Liam Quin, January 22nd 1990, at home in Warrington, England (ugh) X * X */ X X/* X * $Log: wordrules.h,v $ X * Revision 1.2 90/10/06 02:18:39 lee X * Prepared for first beta release. X * X * Revision 1.1 90/08/09 19:16:05 lee X * Initial revision X * X * Revision 2.2 89/10/08 20:47:35 lee X * Working version of nx-text engine. Addfile and wordinfo work OK. X * X * Revision 2.1 89/10/02 01:16:19 lee X * New index format, with Block/WordInBlock/Flags/BytesSkipped info. X * X * Revision 1.2 89/09/16 21:15:52 lee X * First demonstratable version. X * X * Revision 1.1 89/09/07 21:06:17 lee X * Initial revision X * X */ @@@End of lq-text/src/h/wordrules.h echo end of part 02 -- Liam R. E. Quin, lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337