home *** CD-ROM | disk | FTP | other *** search
- From: lee@sq.sq.com (Liam R. E. Quin)
- Newsgroups: alt.sources
- Subject: lq-text Full Text Retrieval Database Part 06/13
- Message-ID: <1991Mar4.020500.16494@sq.sq.com>
- Date: 4 Mar 91 02:05:00 GMT
-
- : cut here --- cut here --
- : To unbundle, sh this file
- #! /bin/sh
- : part 06
- echo x - lq-text/src/liblqtext/progname.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/progname.c <<'@@@End of lq-text/src/liblqtext/progname.c'
- X/* progname.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X * This file simply declares progname.
- X * This variable MUST be set by main().
- X */
- X
- Xchar *progname = (char *) 0;
- X
- X/* $Id: progname.c,v 1.2 90/10/06 00:12:19 lee Rel1-10 $
- X *
- X * $Log: progname.c,v $
- X * Revision 1.2 90/10/06 00:12:19 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.1 90/03/24 17:07:22 lee
- X * Initial revision
- X *
- X *
- X */
- @@@End of lq-text/src/liblqtext/progname.c
- echo x - lq-text/src/liblqtext/smalldb.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/smalldb.c <<'@@@End of lq-text/src/liblqtext/smalldb.c'
- X/* smalldb.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/* Simple interface to start and end dbm.
- X * You may also need to supply dbm_store() and dbm_fetch(), but these
- X * should certainly be macros.
- X *
- X * $Id: smalldb.c,v 1.5 91/03/03 00:15:22 lee Rel1-10 $
- X *
- X * $Log: smalldb.c,v $
- X * Revision 1.5 91/03/03 00:15:22 lee
- X * Improved an error message and fixed a permissions bug.
- X *
- X * Revision 1.4 91/03/02 18:52:48 lee
- X * Default access is now read only -- lqWriteAccess must be called otherwise.
- X *
- X * Revision 1.3 90/10/06 00:12:20 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.2 90/09/20 17:53:26 lee
- X * slight error reporting improvement.
- X *
- X * Revision 1.1 90/08/09 19:16:56 lee
- X * Initial revision
- X *
- X * Revision 2.2 89/10/08 20:47:14 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:15:55 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.2 89/09/16 21:18:39 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:06:11 lee
- X * Initial revision
- X *
- X *
- X */
- X
- X#include "globals.h"
- X
- X#include <stdio.h>
- X
- X#include <fcntl.h>
- X#ifdef BSD
- X# include <sys/param.h>
- X# define PATH_MAX MAXPATHLEN /* untested, sorry */
- X#else /*!BSD*/
- X# include <limits.h> /* for PATH_MAX */
- X#endif
- X#include "smalldb.h"
- X#include "emalloc.h"
- X
- Xextern int strcmp();
- Xextern char *strcpy();
- X
- X/* The physical database for the list of words, and for the list
- X * of files, uses ndbm.
- X * The advantage of this is that it takes only two file system accesses
- X * to retrieve any data item (honest!).
- X * It's also reasonably fast at insertion.
- X * One disadvantage is that it doesn't cope if too many words have the
- X * same (32-bit) hash function, although publicly available replacements
- X * such as the GNU project's gdbm fix this.
- X *
- X * Since starting the database is expensive (two opens and a malloc),
- X * I have a cache of DBM pointers and keep them open. Versions of the
- X * dbm routines that don't support more than one database will have to
- X * have a cache-size of one!
- X * I am not sure what the impact of this would be on performance; for
- X * adding a new file it shouldn't be too bad, as the file list is examined
- X * only once for each file, during reading, and the word database is looked
- X * at (at least once for each distinct word) only on writing.
- X * For retrieval, however, the word database will be looked at for each
- X * word in the query, and the file database for (potentially) each match
- X * of each word, so the requests will be more interspersed.
- X * Under no circumstances is it acceptable to dispense with the cache, as
- X * otherwise you will be doing (literally) thousands of calls to
- X * open() and close() per second!
- X *
- X */
- X
- X#undef startdb
- X
- X#ifndef CACHE
- X/* It's unusual to deal with lots of databases at once, so let's not
- X * waste RAM...
- X */
- X# define CACHE 3
- X#endif
- X
- Xstatic char NameCache[CACHE][PATH_MAX + 1]; /* + 1 for \0, I think */
- Xstatic DBM *Cache[CACHE]; /* (set to zero by definition) */
- X
- Xstatic int MaxInCache = (-1);
- X
- X/* FileFlags and Mode are passed to dbm_open */
- Xstatic int FileFlags = O_RDONLY;
- Xstatic int FileModes = 0;
- X
- Xvoid
- XlqWriteAccess()
- X{
- X FileFlags = O_RDWR|O_CREAT;
- X FileModes = 0664; /* owner and group write, others read only */
- X}
- X
- XDBM *
- Xstartdb(FilePrefix)
- X char *FilePrefix;
- X{
- X extern int errno;
- X register int i;
- X
- X for (i = 0; i <= MaxInCache; i++) {
- X if (Cache[i] && STREQ(NameCache[i], FilePrefix)) {
- X return Cache[i];
- X }
- X }
- X
- X /* Find an empty slot */
- X for (i = 0; i <= MaxInCache; i++) {
- X if (Cache[i] == (DBM *) 0) break;
- X }
- X
- X if (i > MaxInCache) {
- X if (i >= CACHE) i = 0;
- X }
- X
- X if (Cache[i]) dbm_close(Cache[i]);
- X NameCache[i][0] = '\0';
- X
- X errno = 0;
- X
- X if ((Cache[i] = dbm_open(FilePrefix, FileFlags, FileModes)) == (DBM *)0) {
- X int e = errno;
- X (void) fprintf(stderr, "%s: dbm_open error %d: ", progname, errno);
- X errno = e;
- X perror(FilePrefix);
- X exit(1);
- X }
- X (void) strcpy(NameCache[i], FilePrefix);
- X if (i > MaxInCache) MaxInCache = i;
- X
- X return Cache[i];
- X}
- X
- X#undef enddb
- X
- X/*ARGSUSED*/
- Xvoid
- Xenddb(db)
- X DBM *db;
- X{
- X /* no-op */
- X}
- X
- Xvoid
- Xcleanupdb()
- X{
- X register int i;
- X
- X for (i = 0; i <= MaxInCache; i++) {
- X if (Cache[i]) dbm_close(Cache[i]);
- X Cache[i] = (DBM *) 0;
- X NameCache[i][0] = '\0';
- X }
- X}
- @@@End of lq-text/src/liblqtext/smalldb.c
- echo x - lq-text/src/liblqtext/system.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/system.c <<'@@@End of lq-text/src/liblqtext/system.c'
- X/* system.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X *
- X * This is not a very portable way of doing things... and certainly not
- X * a very fast one. MUST be re-written.
- X * Only for use from within curses.
- X *
- X * Lee
- X *
- X * $Id: system.c,v 1.3 90/10/06 00:21:37 lee Rel1-10 $
- X */
- X
- X#ifdef ultrix
- X# include <cursesX.h>
- X#else
- X# include <curses.h>
- X#endif
- X
- X#ifndef echo
- Xextern int echo();
- X#endif
- X#ifndef wmove
- Xextern int wmove();
- X#endif
- X#ifndef nl
- Xextern int nl();
- X#endif
- X#ifndef noecho
- Xextern int noecho();
- X#endif
- X#ifndef nonl
- Xextern int nonl();
- X#endif
- X#ifndef wrefresh
- Xextern int wrefresh();
- X#endif
- X#ifndef waddstr
- Xextern int waddstr();
- X#endif
- X#ifndef wclear
- Xextern int wclear();
- X#endif
- X
- Xint
- XMySystem(string)
- X char *string;
- X{
- X int val;
- X
- X clearok(stdscr, TRUE);
- X clear();
- X refresh();
- X noraw();
- X echo();
- X nl();
- X val = system("stty opost icanon onlcr icrnl echo");
- X (void) system(string);
- X fprintf(stderr, "\n[press return to continue] ");
- X raw();
- X noecho();
- X nonl();
- X (void) getch();
- X clearok(stdscr, TRUE);
- X mvwaddstr(stdscr, 10, 10, " "); /* ???!?? */
- X
- X return val;
- X}
- X
- @@@End of lq-text/src/liblqtext/system.c
- echo x - lq-text/src/lqtext/FindCommon.sh 1>&2
- sed 's/^X//' >lq-text/src/lqtext/FindCommon.sh <<'@@@End of lq-text/src/lqtext/FindCommon.sh'
- X:
- X# FindCommon -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X# This code is NOT in the public domain.
- X# See the file COPYRIGHT for full details.
- X#
- X# $Id: FindCommon.sh,v 1.2 90/10/06 00:50:31 lee Rel1-10 $
- X
- X# Find the most common words in the database.
- X# usage is % n, where n is the n most comon words to find
- X
- Xlqword -a | sed -e 's/^......................\(.........\)..\(..*\)$/\1 \2/' |
- Xsort -nr | sed ${1-500}q
- X
- Xexit $?
- X
- X# 1 | 0 | 2 | pcpaintbrush
- X# 2 | 0 | 2 | escape
- X# 3 | 0 | 1 | durham
- X# 4 | 60928 | 12 | making
- X# 5 | 0 | 1 | ethical
- X# 6 | 0 | 1 | committing
- X
- X# $Log: FindCommon.sh,v $
- X# Revision 1.2 90/10/06 00:50:31 lee
- X# Prepared for first beta release.
- X#
- X#
- @@@End of lq-text/src/lqtext/FindCommon.sh
- echo x - lq-text/src/lqtext/Makefile 1>&2
- sed 's/^X//' >lq-text/src/lqtext/Makefile <<'@@@End of lq-text/src/lqtext/Makefile'
- X# Makefile for LQ-Text, a full text retrieval package by Liam R. Quin
- X# This Makefile belongs in the "src/lqtext" directory.
- X#
- X# Note that most of the actual configuration is done in ../Makefile and
- X# in ../h/global.h, and not here.
- X
- X# Makefile -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X# This code is NOT in the public domain.
- X# See the file ../COPYRIGHT for full details.
- X#
- X# $Id: Makefile,v 1.5 91/03/03 00:19:26 lee Rel1-10 $
- X
- X
- XPWD=lqtext
- X
- XTARGETS = lqaddfile lqfile lqword lqphrase lqshow lqkwik lq
- XBINFILES =lqaddfile lqfile lqword lqshow lqphrase lqkwik
- X
- XDESTDIR=../bin
- XMODE=755
- XRANLIB=echo
- X
- XEXTRA=-I../h
- X
- Xall: $(TARGETS)
- X
- X# for ndbm (simplest), leave empty or use -lndbm if you need it
- X# for sdbm (best so far), use ../lib/libsdbm.a
- X# for gdbm... well, I dunno.
- XDBMLIBS=../lib/libsdbm.a
- X# DBMLIBS=-lndbm
- X# DBMLIBS=ndbm.o bcopy.o
- X
- XTEXTLIB=../lib/liblqtext.a ../lib/liblq.a
- X
- X# The following are for "make depend" and for sabre to load...
- XDEPENDFILES = ReadAhead.c SixBit.c fileindex.c lqaddfile.c lqphrase.c \
- X lqshow.c lqword.c sizes.c wordtable.c
- X
- X# MALLFILES=/usr/lib/debug/malloc.o /usr/lib/debug/mallocmap.o
- XMALLFILES =
- X
- Xinstall: all
- X for i in $(BINFILES); do cp "$$i" $(DESTDIR); \
- X strip "$(DESTDIR)/$$i" ; \
- X done ; \
- X mv lq $(DESTDIR)/lq; chmod $(MODE) $(DESTDIR)/lq;
- X
- X.SUFFIXES: .c .o .src .obj
- X
- X.c.src:
- X #load $(CFLAGS) $<
- X
- X.o.obj:
- X #load $(CFLAGS) $<
- X
- X# If you are going to use saber on these, you should name the programs.
- Xsaber_src:
- X
- Xsaber_obj:
- X
- Xlq: lq.sh
- X cp lq.sh lq
- X chmod +x lq
- X
- Xlqshow: lqshow.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqshow lqshow.o $(TEXTLIB) $(TERMCAP) $(DBMLIBS)
- X
- Xlqaddfile: lqaddfile.o wordtable.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqaddfile lqaddfile.o wordtable.o \
- X $(TEXTLIB) $(MALLOC) $(DBMLIBS) $(MALLFILES)
- X
- Xlqfile: fileindex.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqfile fileindex.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
- X
- Xlqword: lqword.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqword lqword.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
- X
- Xlqkwik: lqkwik.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqkwik lqkwik.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
- X
- Xlqphrase: lqphrase.o $(TEXTLIB)
- X $(CC) $(CFLAGS) -o lqphrase lqphrase.o $(TEXTLIB) $(DBMLIBS)
- X
- Xlint: AddFile.Lint News.Lint FileInfo.Lint Phrase.Lint
- X
- Xtidy:
- X /bin/rm -f *.o core
- X
- Xclean: tidy
- X /bin/rm -f $(TARGETS) $(TEST)
- X
- Xdepend:
- X mkdep $(CFLAGS) *.c
- X
- X#
- X# $Log: Makefile,v $
- X# Revision 1.5 91/03/03 00:19:26 lee
- X# added lqkwik
- X#
- X# Revision 1.4 90/10/06 00:50:42 lee
- X# Prepared for first beta release.
- X#
- X# Revision 1.3 90/10/05 23:54:57 lee
- X# deleted mkdep output.
- X#
- X# Revision 1.2 90/09/28 21:54:01 lee
- X# No longer uses OWNER.
- X#
- X# Revision 1.1 90/08/09 19:17:39 lee
- X# Initial revision
- X#
- X
- X# DO NOT DELETE THIS LINE -- mkdep uses it.
- X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
- X
- XSixBit.o: SixBit.c ../h/globals.h
- XSixBit.o: ../h/wordrules.h
- Xfileindex.o: fileindex.c ../h/globals.h
- Xfileindex.o: ../h/emalloc.h ../h/fileinfo.h
- Xlqaddfile.o: lqaddfile.c
- Xlqaddfile.o: ../h/globals.h ../h/fileinfo.h ../h/emalloc.h
- Xlqaddfile.o: ../h/wordinfo.h ../h/pblock.h ../h/wordrules.h ../h/filter.h
- Xlqkwik.o: lqkwik.c ../h/globals.h ../h/fileinfo.h
- Xlqkwik.o: ../h/wordinfo.h ../h/pblock.h ../h/wordrules.h ../h/pblock.h
- Xlqkwik.o: ../h/emalloc.h
- Xlqphrase.o: lqphrase.c ../h/globals.h ../h/emalloc.h
- Xlqphrase.o: ../h/fileinfo.h ../h/wordinfo.h ../h/pblock.h ../h/pblock.h
- Xlqphrase.o: ../h/phrase.h
- Xlqshow.o: lqshow.c ../h/globals.h
- Xlqword.o: lqword.c ../h/globals.h
- Xwordtable.o: wordtable.c ../h/globals.h
- X
- X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
- @@@End of lq-text/src/lqtext/Makefile
- echo x - lq-text/src/lqtext/ReadAhead.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/ReadAhead.c <<'@@@End of lq-text/src/lqtext/ReadAhead.c'
- @@@End of lq-text/src/lqtext/ReadAhead.c
- echo x - lq-text/src/lqtext/fileindex.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/fileindex.c <<'@@@End of lq-text/src/lqtext/fileindex.c'
- X/* fileindex.c -- Copyright 1989, 1990 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/* A simple program to give information about one or more files about
- X * which information is stored in the NX-Text database.
- X *
- X * $Id: fileindex.c,v 1.4 91/03/02 18:56:53 lee Rel1-10 $
- X */
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#include <stdio.h>
- X#include <sys/types.h>
- X#include <malloc.h>
- X#include "emalloc.h"
- X#include "fileinfo.h"
- X
- Xstatic char *Revision = "@(#) lqtext 2.3 89/11/34";
- X
- X/* The position of the \n in the 26-char string returned by ctime(3): */
- X#define DATENEWLINE 24
- X
- Xchar *progname;
- Xint AsciiTrace = 0;
- X
- X/** System calls and library functions used in this file: **/
- X
- X/** Unix System calls: **/
- Xextern void exit();
- X/** System Library Functions: **/
- X
- X/** external lqtext functions: **/
- Xextern void cleanupdb(), SetDefaults();
- Xint SaveFileInfo(), GetFilterType();
- X#ifndef efree
- X extern void efree();
- X#endif
- X/** Functions defined within this file: **/
- Xvoid AddInfo(), AllInfo(), Display(), PrintInfo();
- X
- Xint AllFiles = 0;
- Xint ListMode = 0;
- Xint AddFiles = 0;
- X
- Xint
- Xmain(argc, argv)
- X int argc;
- X char *argv[];
- X{
- X extern int optind, getopt();
- X /** extern char *optarg; (unused at the moment) **/
- X int ch;
- X int ErrorFlag = 0;
- X
- X progname = argv[0];
- X
- X SetDefaults(argc, argv);
- X
- X /* All programs take Zz:Vv */
- X while ((ch = getopt(argc, argv, "Zz:VvAax")) != EOF) {
- X switch (ch) {
- X case 'z':
- X case 'Z':
- X break; /* done by SetDefaults(); */
- X case 'V':
- X fprintf(stderr, "%s version %s\n", progname, Revision);
- X break;
- X case 'v':
- X AsciiTrace = 1;
- X break;
- X case 'A':
- X AddFiles = 1;
- X break;
- X case 'a':
- X AllFiles = 1;
- X break;
- X case 'l':
- X ListMode = 1;
- X break;
- X case 'x':
- X ErrorFlag = (-1);
- X break;
- X case '?':
- X ErrorFlag = 1;
- X break;
- X }
- X }
- X
- X /* Normally put call to lrqError here to give a helpful message,
- X * but not yet ready to ship the error handling package, sorry
- X */
- X if (ErrorFlag) {
- X fprintf(stderr, "%s: usage: %s [options] [files]\n",progname,progname);
- X fprintf(stderr, "%s: options are:\n", progname);
- X fputs("\
- X -c file -- treat the named file as a list of common words\n\
- X -d dir -- use the lq-text database in the directory \"dir\"\n\
- X -l -- list mode: no header output or lines drawn\n\
- X -s -- show the list of saved files\n\
- X -t N -- set trace level to N [default: 0]\n\
- X -V -- print version information\n\
- X -v -- be verbose (same as -t 1)\n\
- X -x -- print this explanation\n\
- X\n\
- XIn addition, if no files are given, the following are understood:\n\
- X -A -- add the named files to the list of known files\n\
- X -a -- list information about all files\n", stderr);
- X exit((ErrorFlag > 0) ? 1 : 0);
- X }
- X
- X if (AllFiles && AddFiles) {
- X fprintf(stderr, "%s: do not use both -a and -A options\n", progname);
- X fprintf(stderr, "\tuse %s -x for further explanation.\n", progname);
- X exit(1);
- X }
- X
- X if (optind >= argc && !AllFiles && !AddFiles) {
- X fprintf(stderr,
- X "%s: You must either give the -a option or specify files to list.\n",
- X progname);
- X fprintf(stderr, "\tuse %s -x for further explanation.\n", progname);
- X exit(1);
- X }
- X
- X if (!AddFiles || !ListMode) {
- X printf("%-7.7s | T | %-20.20s | %s\n",
- X "FID", "Date Last indexed", "Current Location");
- X puts(
- X"========|===|======================|=========================================="
- X );
- X }
- X if (AllFiles) {
- X AllInfo();
- X } else {
- X if (AddFiles) {
- X extern lqWriteAccess();
- X
- X lqWriteAccess();
- X }
- X
- X while (optind < argc) {
- X if (AddFiles) {
- X AddInfo(argv[optind++]);
- X } else {
- X PrintInfo(argv[optind++]); /* ugh */
- X }
- X }
- X }
- X cleanupdb(); /* close dbm files */
- X exit(0);
- X /*NOTREACHED*/
- X return 1; /* for lint and gcc... */
- X}
- X
- Xvoid
- XPrintInfo(Name)
- X char *Name;
- X{
- X extern t_FileInfo *GetFileInfo();
- X long FID;
- X extern long atol();
- X extern t_FID Name2FID();
- X
- X t_FileInfo *FileInfo;
- X
- X if ((FID = Name2FID(Name)) == (t_FID) 0) {
- X fprintf(stderr, "No FID information for filename: %s\n", Name);
- X if ((FID = atol(Name)) == (t_FID) 0) {
- X return;
- X }
- X }
- X
- X /* get info from the list */
- X if ((FileInfo = GetFileInfo(FID)) == (t_FileInfo *) 0) {
- X fprintf(stderr, "No index information for: %s\n", Name);
- X return;
- X }
- X Display(FileInfo);
- X}
- X
- Xvoid
- XDisplay(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X extern char *ctime();
- X char *DateString;
- X
- X DateString = ctime(&(FileInfo->Date));
- X DateString[DATENEWLINE] = '\0'; /* delete the trailing newline */
- X
- X if (ListMode) {
- X printf("%lu %d %s %s\n",
- X FileInfo->FID, FileInfo->FilterType, &DateString[4], FileInfo->Name);
- X } else {
- X printf("%7lu | %d | %-20.20s | %s\n",
- X FileInfo->FID, FileInfo->FilterType, &DateString[4], FileInfo->Name);
- X }
- X}
- X
- X/**
- XMon Sep 25 23:58:53 BST 1989
- XFID | T | Date Last indexed | Current Location
- X========|===|======================|===========================================
- X 1 | 0 | Sep 25 20:31:26 1989 | /usr2/liam/Bible/NT/John/john01.kjv
- X 2 | 0 | Sep 25 20:31:28 1989 | /usr2/liam/Bible/NT/John/john02.kjv
- X 3 | 0 | Sep 25 20:31:30 1989 | /usr2/liam/Bible/NT/John/john03.kjv
- X**/
- X
- Xvoid
- XAllInfo()
- X{
- X extern long GetMaxFID();
- X extern t_FileInfo *GetFileInfo();
- X
- X t_FileInfo *FileInfo;
- X long FID;
- X long MaxFid = GetMaxFID();
- X
- X for (FID = 0L; FID <= MaxFid; FID++) {
- X if ((FileInfo = GetFileInfo(FID)) != (t_FileInfo *) 0) {
- X Display(FileInfo);
- X efree(FileInfo); /* NOTDONE use destroyfileinfo() */
- X }
- X }
- X printf("Max File Identifier is %lu\n", MaxFid);
- X}
- X
- Xvoid
- XAddInfo(FileName)
- X char *FileName;
- X{
- X extern time_t time();
- X extern unsigned long GetNextFID();
- X t_FileInfo FileInfo;
- X
- X FileInfo.Name = FileName;
- X (void) time(&(FileInfo.Date));
- X FileInfo.FID = GetNextFID();
- X FileInfo.Stream = 0; /* force GetFilterType to use open()? */
- X
- X /* determine filter type */
- X FileInfo.FilterType = GetFilterType(&FileInfo);
- X
- X printf("%d %s (type %d) %s\n",
- X FileInfo.FID,
- X FileInfo.Name,
- X FileInfo.FilterType,
- X SaveFileInfo(&FileInfo) == 0 ?
- X "saved successfully." :
- X "not saved."
- X );
- X}
- X
- X/*
- X * $Log: fileindex.c,v $
- X * Revision 1.4 91/03/02 18:56:53 lee
- X * Now asks for write access iff [sic] necessary
- X *
- X * Revision 1.3 90/10/06 00:50:50 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.2 90/08/29 21:44:51 lee
- X * Alpha release
- X *
- X * Revision 1.1 90/08/09 19:17:11 lee
- X * Initial revision
- X *
- X * Revision 2.2 89/10/08 20:45:46 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:14:18 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.2 89/09/16 21:16:17 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:05:55 lee
- X * Initial revision
- X *
- X */
- @@@End of lq-text/src/lqtext/fileindex.c
- echo x - lq-text/src/lqtext/intersect.sh 1>&2
- sed 's/^X//' >lq-text/src/lqtext/intersect.sh <<'@@@End of lq-text/src/lqtext/intersect.sh'
- X:
- X# intersect word-one word-two
- X#
- X# intersect -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X# This code is NOT in the public domain.
- X# See the file ../COPYRIGHT for full details.
- X#
- X# $Id: intersect.sh,v 1.3 91/03/03 00:18:59 lee Rel1-10 $
- X#
- X
- X
- XFileNumber=0
- XFileList=
- XProgram=lqphrase
- XProgOpts=
- XAll=/tmp/iAll$$
- Xexport All
- X
- Xtrap '/bin/rm -f $All $tmp $First $FileList; exit' 0 1 2 3 15
- X
- Xif [ x"$1" = x"" ]
- Xthen
- X echo "$0: Usage: `basename $0` {-w word} | {-p phrase} ..." 1>&2
- X exit 1
- Xfi
- X
- X
- Xfor i
- Xdo
- X if [ x"$i" = x"-p" ]
- X then
- X Program=lqphrase
- X ProgOpts=
- X elif [ x"$i" = x"-w" ]
- X then
- X Program=lqword
- X ProgOpts=-l
- X else
- X tmp=/tmp/inter.$FileNumber
- X $Program $ProgOpts "$i" | tee -a $ALL | awk '{ print $3 }' | sort -u > $tmp
- X if [ x"$First" = x"" ]
- X then
- X First="$tmp"
- X else
- X FileList="$FileList $tmp"
- X fi
- X FileNumber=`expr $FileNumber + 1`
- X fi
- Xdone
- X
- X# Find matches...
- Xtmp=/tmp/inter.tmp$$
- X
- Xfor i in $FileList
- Xdo
- X fgrep -x -f $First $i | sort -u > $tmp
- X mv $tmp $First
- Xdone
- X
- Xmv $First $tmp
- Xsed 's/^/ /' $tmp > $First
- X
- Xfgrep -f $First $All
- Xexit 0
- X
- X#
- X#
- X# $Log: intersect.sh,v $
- X# Revision 1.3 91/03/03 00:18:59 lee
- X# brought up to date a little...
- X#
- X# Revision 1.2 90/10/06 00:50:52 lee
- X# Prepared for first beta release.
- X#
- X# Revision 1.1 90/08/29 21:45:01 lee
- X# Initial revision
- X#
- X#
- X#
- @@@End of lq-text/src/lqtext/intersect.sh
- echo x - lq-text/src/lqtext/lq.sh 1>&2
- sed 's/^X//' >lq-text/src/lqtext/lq.sh <<'@@@End of lq-text/src/lqtext/lq.sh'
- X#! /bin/sh
- X: use /bin/sh
- X# put the : line first on System V
- X
- X# lq -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X# This code is NOT in the public domain.
- X# See the file ../COPYRIGHT for full details.
- X#
- X# $Id: lq.sh,v 1.3 90/10/06 00:50:53 lee Rel1-10 $
- X#
- X
- Xif [ x"`echo -n hello`" = x'hello' ]
- Xthen
- X N=-n
- X C=
- Xelse
- X N=
- X C='\c'
- Xfi
- X
- Xquit=no
- Xt=/tmp/lq$$
- XListFile=/tmp/lqshow$$
- Xexport ListFile
- X
- Xtrap '/bin/rm -f $t; exit' 0 1 2 3 15
- X
- X
- Xwhile [ x"$quit" != x"yes" ]
- Xdo
- X cat << boy
- X| Type a words or phrases to find, one per line,
- X| and then press return.
- Xboy
- X x='fhdjfd'
- X Phrases=
- X while [ x"$x" != x"" ]
- X do
- X echo $N "| $C"
- X read x
- X if [ x"$x" != x"" ]
- X then
- X New=`echo "$x" | sed 's/"/:/g'`
- X Phrases="${Phrases} \"$x\""
- X fi
- X done
- X echo $Phrases
- X eval lqphrase -p $Phrases \> $t
- X if [ ! -s $t ]
- X then
- X echo "No match"
- X else
- X # determine the order in which matches will be presented to the user:
- X sort +2 -o "$t" "$t" # (this is our ranking function)
- X # (it only makes a difference if there was more than one phrase)
- X
- X # Now some arcanery, I'm afraid... The trick is that lqshow can be
- X # given the name of a file descriptor in which to save the names of
- X # any files the user selects (with "s").
- X old_t="$t"
- X t="$t ${ListFile}"
- X lqshow -o 3 -f $t 3>> ${ListFile}
- X t="$old_t"
- X if [ -s ${ListFile} ]
- X then ## the user typed s/k/whatever to save some files...
- X # make the list by interpreting the list file:
- X LIST=`awk '
- X /^#.*$/ { next }
- X ($1 == "s") { SAVE[$2]++ }
- X ($1 == "d") { SAVE[$2] = 0 }
- X END {
- X for (i in SAVE) {
- X if (SAVE[i] > 0) print i
- X }
- X }' $ListFile | sort -u`
- X # make a new list file...
- X echo "$LIST" | sed '/^[ ]*$/d' > $ListFile
- X LIST="" # save memory
- X fi
- X # now see if it's still non-empty...
- X if [ -s ${ListFile} ]
- X then
- X List="Type S filename to save the list of files (s also quits) "
- X else
- X /bin/rm -f ${ListFile}
- X fi
- X fi
- X echo $List
- X echo $N "Type q to quit, or return to continue: $C"
- X read quit rest
- X case "$quit" in
- X [qQ]*) quit="yes" ;;
- X [sS]) # save the list of matches
- X cat $ListFile
- X
- X if [ ! -s "$ListFile" ]
- X then
- X echo "No files in the list to save."
- X quit=no
- X else
- X if [ -z "$rest" ]
- X then rest="lq.list"
- X fi
- X
- X if [ -f "$rest" ]
- X then echo "Appending to existing file $rest"
- X fi
- X
- X cat $ListFile >> $rest
- X rm $ListFile
- X if [ x"$quit" = x"s" ]
- X then quit=yes
- X else quit=no
- X fi
- X fi
- X ;;
- X *) quit=no ;;
- X esac
- Xdone
- X
- X#
- X# $Log: lq.sh,v $
- X# Revision 1.3 90/10/06 00:50:53 lee
- X# Prepared for first beta release.
- X#
- X#
- X#
- @@@End of lq-text/src/lqtext/lq.sh
- echo x - lq-text/src/lqtext/lqaddfile.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/lqaddfile.c <<'@@@End of lq-text/src/lqtext/lqaddfile.c'
- X/* lqaddfile.c -- Copyright 1989, 1990 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/* addfile -- add a file to the LQ-Text text retrieval index
- X * Liam Quin, August 1989 and later...
- X *
- X * $Id: lqaddfile.c,v 1.14 91/03/02 21:22:39 lee Rel1-10 $
- X */
- X
- Xstatic char *Version = "@(#) $Id: lqaddfile.c,v 1.14 91/03/02 21:22:39 lee Rel1-10 $";
- X
- X#ifdef SYSV
- Xextern int _filbuf(); /* used but not defined in stdio.h */
- X#endif
- X#include <stdio.h>
- X#include <malloc.h>
- X#include <ctype.h>
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#ifdef BSD
- X# include <strings.h>
- X#else
- X# include <string.h>
- X#endif
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X#include "fileinfo.h"
- X#include "wordinfo.h"
- X#include "wordrules.h"
- X#include "filter.h"
- X
- X#include "emalloc.h"
- X
- X#define enew(var, type) (var = (type *) emalloc(sizeof(type)))
- X
- X#ifdef SYSV
- X#define TOLOWER(ch) ch = tolower(ch)
- X#else
- X#define TOLOWER(ch) if (isupper(ch)) ch = tolower(ch)
- X#endif
- X
- Xvoid DestroyFileInfo(), SaveFileInfo(), AddStream(), AddFrom();
- Xextern lqWriteAccess(); /* Allow write access to the database */
- X/* Symbol Table Interface */
- Xextern void AddWord(), WriteCurrentMaxWID();
- Xextern void DumpCache(), cleanupdb();
- Xextern char *WordRoot();
- Xextern int TooCommon(), GetFilterType();
- Xint RealGetChar(), AddFile();
- X
- X/** System calls and library routines used in this file: **/
- X/** System calls: **/
- Xextern void exit();
- Xextern int stat();
- X/** Library Functions: **/
- Xextern int atoi();
- X#ifndef tolower
- X extern int tolower();
- X#endif
- Xextern void perror();
- X/**/
- X
- Xchar *progname = "@(#) : addfile.c,v 1.1 89/08/28 20:16:05 lee Locked $";
- Xstatic int UseLineNumbers = 0;
- X
- X/* FROM pblock.c */
- Xextern int AsciiTrace; /* provide increasingly verbose info if not zero */
- X
- Xstatic int LastChar = 0;
- Xstatic int _chForLee = 0;
- X
- X#define GetChar(F) \
- X ( LastChar ? \
- X (++BytesRead, (_chForLee = LastChar), (LastChar = 0), _chForLee) : \
- X ( (_chForLee = getc(FileInfo->Stream)) != '\'' || !InWord) ? \
- X (++BytesRead, _chForLee) : RealGetChar(F) )
- X
- Xint
- Xmain(argc, argv)
- X int argc;
- X char *argv[];
- X{
- X extern char *strrchr();
- X extern int getopt(), cknatstr();
- X extern void SetDefaults();
- X extern char *optarg;
- X extern int optind;
- X extern int MaxWordsInCache; /* see wordtable.c */
- X
- X int c;
- X int ErrorFlag = 0;
- X int DoNothing = 0;
- X char *InputFile = (char *) 0;
- X
- X#ifdef MALLOCTRACE
- X malloc_debug(2);
- X#endif
- X
- X progname = argv[0]; /* retain the full path at first */
- X
- X#ifdef M_MXFAST
- X (void) mallopt(M_MXFAST, sizeof(t_WordPlace));
- X /* may need to comment mallopt() out entirely for BSD -- use ifndef.
- X * seems to work under SunOS, though.
- X * When it works, it says "Allocate 100 or so chunks of this size at a
- X * time, and whenver I ask for this much or less, give me one of the
- X * chunks". Clearly it had better not be too large, but it is a big
- X * win with a structure allocated for every occurrence of every word!
- X */
- X#endif
- X
- X SetDefaults(argc, argv);
- X
- X while ((c = getopt(argc, argv, "w:f:xVZz:")) != -1) {
- X switch (c) {
- X case 'w':
- X if (!cknatstr(optarg)) {
- X fprintf(stderr,
- X "%s: -w must be given a number >= 0, not \"%s\"\n",
- X progname, optarg);
- X fprintf(stderr, "\tuse %s -xv for further information\n");
- X exit(1);
- X }
- X MaxWordsInCache = atoi(optarg);
- X break;
- X case 'Z':
- X case 'z':
- X break; /* work done in SetDefault() */
- X case 'V':
- X fprintf(stderr, "%s: version: %s\n", progname, Version);
- X DoNothing = 1;
- X break;
- X case 'f':
- X if (InputFile) {
- X fprintf(stderr,
- X"%s: only one -f option allowed; use -xv for explanation\n", progname);
- X
- X exit(1);
- X }
- X InputFile = optarg;
- X break;
- X case 'x':
- X ErrorFlag = (-1);
- X break;
- X default:
- X case '?':
- X ErrorFlag = 1;
- X }
- X }
- X
- X if ((progname = strrchr(progname, '/')) != (char *) NULL) {
- X ++progname; /* step over the last / */
- X } else {
- X progname = argv[0];
- X }
- X
- X if (ErrorFlag > 0) {
- X fprintf(stderr, "use %s -x or %s -xv for an explanation.\n",
- X progname, progname);
- X exit(1);
- X } else if (ErrorFlag < 0) { /* -x was used */
- X fprintf(stderr, "%s -- add files to an lq-text retrieval database\n",
- X progname);
- X
- X fputs("Options are:\n\
- X -f file -- read the list of files to index from \"file\"\n\
- X -c file -- cfile contains a list of common words to be ignored\n\
- X -d dir -- use the lq-text database in the named directory\n\
- X -t N -- set the trace level to N [default: N = 0]\n\
- X -V -- print Version number and exit\n\
- X -v -- be verbose (equivalent to -t 1)\n\
- X -w n -- dump the word-cache every n words\n\
- X -x -- print this eXplanation and exit\n\
- X -- -- all following arguments are file names\n\
- X\n\
- X", stderr);
- X if (AsciiTrace == 1) {
- X /* used -v or -t1 */
- X fprintf(stderr, "\n\
- X Any remaining arguments are taken to be file names. The current\n\
- XDOCPATH (%s) is searched for the files, and they are read and added\n\
- Xto the index. (If you use the -f option, you should not give filename\n\
- Xarguments on the command line, although you can use \"-f -\" to read the\n\
- Xlist of files from standard input, one per line.\n\
- XSetting (with -w) the size of the cache may dramatically\n\
- Ximprove performance. Systems with memory larger than the data can try -w0.\n\
- XSee lqtext(1) for more information.\n", DocPath);
- X }
- X exit(0);
- X
- X }
- X
- X if (DoNothing) {
- X if (optind < argc) {
- X fprintf(stderr, "%s: warning: %d extra argument%s ignored...\n",
- X progname, argc - optind,
- X argc - optind == 1 ? "" : "%s" );
- X fprintf(stderr, "Use %s -x for an explanation\n", progname);
- X }
- X exit(0);
- X }
- X
- X lqWriteAccess();
- X
- X if (InputFile) {
- X if (optind < argc) {
- X fprintf(stderr, "%s: -f: too many arguments; use -xv\n", progname);
- X exit(1);
- X }
- X AddFrom(InputFile);
- X } else for (; optind < argc; ++optind) {
- X if (AddFile(argv[optind]) < 0 && AsciiTrace >= 1) {
- X fprintf(stderr, "%s: warning: Problem adding file %s\n",
- X progname, argv[optind]);
- X }
- X }
- X
- X#ifndef MALLOCTRACE
- X DumpCache(0); /* the 0 means don't bother calling free() */
- X#else
- X DumpCache(1); /* Free everthing so whatever is left is a memory leak */
- X#endif
- X
- X cleanupdb(); /* empty the dbm cache */
- X WriteCurrentMaxWID();
- X
- X#ifdef MALLOCTRACE
- X (void) fprintf(stderr, "%s: Malloctrace: checking...\n", progname);
- X malloc_verify();
- X (void) fprintf(stderr, "%s: Malloc Map\n", progname);
- X mallocmap();
- X#endif
- X
- X exit(0);
- X /*NOTREACHED*/
- X return 1; /* disaster if we get here -- it's just for lint! */
- X}
- X
- Xvoid
- XAddFrom(Name)
- X char *Name;
- X{
- X char *GetLine();
- X
- X FILE *fp;
- X char *Line;
- X
- X if (Name[0] == '-' && Name[1] == '\0') {
- X fp = stdin;
- X } else {
- X fp = fopen(Name, "r");
- X }
- X
- X if (fp == (FILE *) 0) {
- X extern int errno;
- X int e = errno;
- X
- X fprintf(stderr, "%s: -f: can't open ", progname);
- X errno = e;
- X perror(Name);
- X exit(1);
- X }
- X
- X while ((Line = GetLine(fp, Name)) != (char *) 0) {
- X if (AddFile(Line) < 0 && AsciiTrace >= 1) {
- X /* we already got one error message from AddFile() */
- X fprintf(stderr, "%s: warning: Problem adding file %s\n",
- X progname, Line);
- X }
- X }
- X
- X if (fp != stdin) {
- X (void) fclose(fp);
- X }
- X}
- X
- Xstatic int LineInFile = 0;
- Xstatic FILE *LastFile = 0;
- X
- Xchar *
- XGetLine(fp, Name)
- X FILE *fp;
- X char *Name;
- X{
- X static char *Line = (char *) 0;
- X static int Length = 0;
- X int ch;
- X register char *p;
- X
- X if (!Line) {
- X if (Length <= 10) Length = 30;
- X Line = emalloc(Length);
- X }
- X
- X p = Line;
- X
- X if (fp == LastFile) {
- X ++LineInFile;
- X } else {
- X LineInFile = 0; /* number lines from zero! */
- X LastFile = fp;
- X }
- X
- X while ((ch = getc(fp)) != EOF) {
- X static int HaveWarned = 0;
- X
- X if (isspace(ch)) {
- X if (p == Line) { /* ignore blank lines and leading blanks */
- X continue;
- X }
- X if (ch == '\n') {
- X if (p == (char *) 0) {
- X /* how could this ever happen? do I need it? */
- X p = Line;
- X continue;
- X }
- X *p = '\0';
- X return Line;
- X }
- X if (AsciiTrace && !HaveWarned) {
- X fprintf(stderr,
- X"%s: -f: Warning: spaces found in filenames read from \"%s\"\n",
- X progname, Name);
- X HaveWarned = 1;
- X }
- X }
- X
- X /* add the character to the string */
- X if (p - Line + 1 >= Length) {
- X int SaveWhere = p - Line;
- X Length += 30;
- X Line = erealloc(Line, Length);
- X p = &Line[SaveWhere];
- X }
- X *p++ = ch;
- X }
- X
- X if (p && Line && p != Line) {
- X fprintf(stderr, "%s: -f: warning: no newline at the end of \"%s\"\n",
- X progname, Name);
- X *p = '\0';
- X return Line;
- X }
- X
- X return (char *) 0;
- X}
- X
- Xextern int fclose(), pclose();
- X
- Xt_FileInfo *
- XMakeFileInfo(Name)
- X char *Name;
- X{
- X#ifdef BSD
- X extern time_t time();
- X#else
- X extern long time();
- X#endif
- X extern t_FID Name2FID();
- X extern t_FileInfo *GetFileInfo();
- X extern t_FID GetNextFID();
- X FILE *MakeInput();
- X struct stat StatBuf;
- X
- X t_FileInfo *FileInfo = 0;
- X t_FID FID;
- X
- X if (!Name || !*Name) return (t_FileInfo *) 0; /* sanity */
- X
- X if (stat(Name, &StatBuf) < 0) {
- X#ifndef FindFile /* it is a macro these days... */
- X extern char *FindFile();
- X#endif
- X extern int errno;
- X
- X int e = errno;
- X char *doc;
- X
- X if ((doc = FindFile(Name)) == (char *) 0) {
- X fprintf(stderr, "Can't index ");
- X errno = e; /* fprintf might well clobber errno! */
- X perror(Name);
- X return (t_FileInfo *) 0;
- X }
- X
- X if (stat(doc, &StatBuf) < 0) {
- X e = errno;
- X fprintf(stderr, "Can't index ");
- X errno = e; /* fprintf might well clobber errno! */
- X perror(Name);
- X return (t_FileInfo *) 0;
- X }
- X Name = doc;
- X }
- X
- X if (StatBuf.st_size == 0L) {
- X if (AsciiTrace) {
- X fprintf(stderr, "%s empty -- not indexed\n", Name);
- X }
- X return (t_FileInfo *) 0;
- X }
- X /* See if it's in the index already: */
- X if ((FID = Name2FID(Name)) != (t_FID) 0) {
- X
- X if ((FileInfo = GetFileInfo(FID)) != (t_FileInfo *) 0) {
- X /* Check to see if the file hass changed since it was last
- X * indexed. If it has, we should delete the old one from
- X * the database and give this one a new FID, but I have
- X * not done that yet -- that's /usr/local/lib/lqtextd or
- X * something, I suppose!
- X */
- X if (FileInfo->Date >= StatBuf.st_mtime) {
- X if (AsciiTrace) {
- X fprintf(stderr, "%s unchanged -- not indexed\n", Name);
- X }
- X DestroyFileInfo(FileInfo);
- X return (t_FileInfo *) 0;
- X }
- X }
- X } else {
- X FID = GetNextFID((long) StatBuf.st_size);
- X }
- X
- X if (FileInfo == (t_FileInfo *) 0) {
- X /* Allocate Structure */
- X enew(FileInfo, t_FileInfo);
- X
- X /* Although not always necessary, call emalloc here so that a
- X * FileInfo can always be deleted with DestroyFileInfo()
- X */
- X FileInfo->Name = emalloc((unsigned)(strlen(Name) + 1));
- X (void) strcpy(FileInfo->Name, Name);
- X
- X /* Other bits to set: */
- X
- X /* date */
- X FileInfo->Date = StatBuf.st_mtime;
- X
- X /* file type */
- X if ((FileInfo->FilterType = GetFilterType(FileInfo, &StatBuf)) < 0) {
- X if (AsciiTrace) {
- X fprintf(stderr, "%s unknown file type -- not indexed\n", Name);
- X }
- X (void) efree(FileInfo->Name);
- X (void) efree((char *) FileInfo);
- X return (t_FileInfo *) 0;
- X }
- X }
- X
- X FileInfo->FID = FID;
- X FileInfo->Date = (long) time((long *) 0); /* it's a time_t on BSD */
- X
- X if ((FileInfo->Stream = MakeInput(FileInfo)) == (FILE *) 0) {
- X fprintf(stderr, "%s: couldn't open filter for %s -- not indexed\n",
- X progname, FileInfo->Name);
- X (void) efree(FileInfo->Name);
- X (void) efree((char *) FileInfo);
- X return (t_FileInfo *) 0;
- X }
- X
- X return FileInfo;
- X}
- X
- Xvoid
- XDestroyFileInfo(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X if (FileInfo->Stream) {
- X if (FileInfo->FilterType >= 0 && FileInfo->FilterType < MaxFilterType){
- X (* FilterTable[FileInfo->FilterType].close)(FileInfo->Stream);
- X }
- X FileInfo->Stream = (FILE *) 0;
- X }
- X if (FileInfo->Name) (void) efree(FileInfo->Name);
- X (void) efree((char *) FileInfo);
- X}
- X
- Xint
- XAddFile(Name)
- X char *Name;
- X{
- X t_FileInfo *FileInfo;
- X
- X if (!Name || !*Name) return -1;
- X if ((FileInfo = MakeFileInfo(Name)) == (t_FileInfo *) 0) return -1;
- X
- X AddStream(FileInfo);
- X SaveFileInfo(FileInfo);
- X DestroyFileInfo(FileInfo);
- X
- X return 0;
- X}
- X
- XFILE *
- XMakeInput(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X FILE *fp;
- X char *Buffer;
- X unsigned BufLen;
- X extern FILE *fopen(), *popen();
- X
- X#define FSTRING FilterTable[FileInfo->FilterType].String
- X
- X if (FileInfo->FilterType > MaxFilterType) {
- X fprintf(stderr, "%s: Warning: filter type %d for %s too high (max %d)\n",
- X progname, FileInfo->FilterType, FileInfo->Name, MaxFilterType);
- X return (FILE *) 0;
- X }
- X
- X if (FilterTable[FileInfo->FilterType].Type != FileInfo->FilterType) {
- X fprintf(stderr, "Fatal Filter table error, %d\n", FileInfo->FilterType);
- X exit(3);
- X }
- X
- X if (FSTRING == (char *) 0) {
- X return fopen(FileInfo->Name, "r");
- X }
- X
- X BufLen = strlen(FileInfo->Name) * 2 + 4 + strlen(FSTRING);
- X /* The +4 is to allow for an embedded " < " plus a \0;
- X * we append "< Name", but also expand %s to be the Name, hence
- X * the strlen * 2
- X */
- X Buffer = emalloc(BufLen);
- X
- X (void) sprintf(Buffer, FSTRING, FileInfo->Name);
- X (void) strcat(Buffer, " < ");
- X (void) strcat(Buffer, FileInfo->Name);
- X
- X fp = popen(Buffer, "r");
- X (void) efree(Buffer);
- X return fp;
- X}
- X
- Xstatic long BytesRead = 0L;
- Xstatic int InWord = 0;
- X
- X/* Character input */
- X
- X#ifdef __GNU__
- Xinline
- X#endif
- Xint
- XRealGetChar(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X /* ASSERT: InWord && _chForLee == '\'' */
- X LastChar = getc(FileInfo->Stream);
- X if (WithinWord(LastChar) && LastChar != '\'') {
- X BytesRead++;
- X return '\'';
- X } else {
- X /* delete the single quote, as it was at the end of
- X * a word, not in the middle
- X */
- X BytesRead++;
- X return ' ';
- X }
- X /*NOTREACHED*/
- X /* exit(1); */
- X}
- X
- Xt_WordInfo *
- XReadWord(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X /* use two static storage areas so we can be called twice in a row.
- X * This is necessary to implement the WPF_LASTINBLOCK flag.
- X */
- X static t_WordInfo This, That;
- X static int ThisOrThat = 0;
- X t_WordInfo *WordInfo;
- X static char Buffer[MaxWordLength + 1];
- X int ch;
- X register char *q = Buffer;
- X static int WordInBlock;
- X static t_FID LastFid = 0L;
- X static long LastPos = 0L;
- X static int SawCommon = 0;
- X static int SawLetters = 0;
- X static int BlockInFile = 0L;
- X static unsigned long LastBlock;
- X unsigned long Start;
- X
- X WordInfo = (ThisOrThat ? &This : &That);
- X
- X if (FileInfo->FID != LastFid) {
- X LastFid = FileInfo->FID;
- X WordInBlock = (-1); /* none, yet! */
- X LastPos = BlockInFile = LastBlock = 0L;
- X BytesRead = 0L;
- X SawCommon = SawLetters = 0;
- X if (AsciiTrace) {
- X fprintf(stderr, "Reading file \"%s\"", FileInfo->Name);
- X }
- X }
- X
- X /* Skip non-word characters */
- X while ((ch = GetChar(FileInfo)) != EOF) {
- X if (StartsWord(ch)) break;
- X }
- X
- X /* ASSERT: we have read at least one character */
- X
- X if (ch == EOF) {
- X if (AsciiTrace) {
- X fprintf(stderr, "\n");
- X }
- X return (t_WordInfo *) 0;
- X }
- X
- X Start = BytesRead - 1;
- X
- X if (UseLineNumbers) {
- X BlockInFile = LineInFile;
- X } else {
- X BlockInFile = Start / FileBlockSize;
- X }
- X
- X if (BlockInFile != LastBlock) {
- X LastBlock = BlockInFile;
- X if (AsciiTrace > 1) {
- X fprintf(stderr, ".");
- X#ifdef sun
- X /* SunOS seems to line-buffer stderr! */
- X fflush(stderr);
- X#endif
- X }
- X WordInBlock = (-1);
- X }
- X
- X if (isupper(ch)) {
- X WordInfo->WordPlace.Flags = WPF_UPPERCASE;
- X ch = tolower(ch);
- X } else {
- X WordInfo->WordPlace.Flags = 0;
- X }
- X
- X InWord = 1; /* For GetChar() */
- X
- X do {
- X if (q - Buffer < MaxWordLength) {
- X *q++ = ch;
- X }
- X ch = GetChar(FileInfo);
- X TOLOWER(ch);
- X } while (WithinWord(ch) || EndsWord(ch));
- X
- X *q = '\0';
- X InWord = 0;
- X
- X#ifdef __GNUC__
- X /* this is to get round a gcc bug... */
- X {
- X int i = q - Buffer;
- X WordInfo->Length = i;
- X
- X if (i < MinWordLength) {
- X register char *p;
- X
- X for (p = Buffer; p < q; p++) {
- X if (isalpha(*p)) {
- X SawLetters = 1;
- X break;
- X }
- X }
- X return ReadWord(FileInfo);
- X }
- X }
- X#else
- X if ((WordInfo->Length = q - Buffer) < MinWordLength) {
- X register char *p;
- X
- X for (p = Buffer; p < q; p++) {
- X if (isalpha(*p)) {
- X SawLetters = 1;
- X break;
- X }
- X }
- X return ReadWord(FileInfo);
- X }
- X#endif
- X
- X WordInfo->Word = Buffer;
- X
- X (void) WordRoot(WordInfo);
- X
- X WordInfo->Length = strlen(WordInfo->Word);
- X
- X if (TooCommon(WordInfo)) {
- X SawCommon++;
- X WordInBlock++;
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 10) {
- X fprintf(stderr, "%s too common to index\n", WordInfo->Word);
- X }
- X#endif
- X return ReadWord(FileInfo);
- X } else if (SawCommon) {
- X SawCommon = 0;
- X WordInfo->WordPlace.Flags |= (WPF_LASTWASCOMMON|WPF_LASTHADLETTERS);
- X }
- X if (SawLetters) {
- X SawLetters = 0;
- X WordInfo->WordPlace.Flags |= WPF_LASTHADLETTERS;
- X }
- X
- X /* StuffBefore is the # of chars between the end of the last word and
- X * the start of this one.
- X */
- X if (Start > 1L) {
- X if (Start - (LastPos + 1) <= 0) {
- X WordInfo->WordPlace.StuffBefore = 1; /* save a byte in the index */
- X } else if (Start - (LastPos + 1) >= 255 ) {
- X WordInfo->WordPlace.StuffBefore = 255;
- X } else {
- X WordInfo->WordPlace.StuffBefore = Start - (LastPos + 1);
- X }
- X } else {
- X WordInfo->WordPlace.StuffBefore = 1; /* i.e., the default */
- X }
- X
- X WordInfo->WordPlace.FID = WordInfo->FID = FileInfo->FID;
- X WordInfo->WID = (t_WID) 0;
- X WordInfo->Next = (t_WordInfo *) 0;
- X WordInfo->WordPlaces = (t_WordPlace *) 0;
- X WordInfo->WordPlacesInHere = 0;
- X WordInfo->WordPlace.WordInBlock = (++WordInBlock);
- X WordInfo->WordPlace.BlockInFile = BlockInFile;
- X WordInfo->DataBlock = (char *) 0;
- X
- X WordInfo->Word[WordInfo->Length] = '\0';
- X
- X {
- X /* I want to avoid using malloc() here...
- X * Another kludge would be to malloc sizeof(t_WordInfo) +
- X * strlen(WordInfo->Word + 1) and to put the string at the end
- X * of (i.e. just after) the struct.
- X */
- X static char Word2[MaxWordLength + 1];
- X static char Word1[MaxWordLength + 1];
- X char *p = (ThisOrThat) ? Word1 : Word2;
- X
- X (void) strncpy(p, WordInfo->Word, (int) WordInfo->Length);
- X WordInfo->Word = p;
- X WordInfo->Word[WordInfo->Length] = '\0';
- X }
- X
- X LastPos = BytesRead - 1;
- X
- X ThisOrThat = !ThisOrThat;
- X /* toggle between 0 and 1. Boring life, really */
- X
- X if (!WordInfo->Word[0]) {
- X fprintf(stderr, "Null word in ReadWord()\n");
- X }
- X return WordInfo;
- X}
- X
- Xvoid
- XAddStream(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X /* I have to mark the last word in the block.
- X * I do that by marking the previous word if it was in a differant block
- X * than the current one.
- X */
- X t_WordInfo *WordInfo;
- X t_WordInfo *LastWord = 0;
- X
- X BytesRead = 0;
- X
- X while ((WordInfo = ReadWord(FileInfo)) != (t_WordInfo *) 0) {
- X if (LastWord) {
- X if (LastWord->WordPlace.BlockInFile !=
- X WordInfo->WordPlace.BlockInFile) {
- X LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
- X }
- X AddWord(LastWord);
- X }
- X LastWord = WordInfo;
- X }
- X if (LastWord) {
- X /* it's the last in the file, so it is also the last in the block */
- X LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
- X AddWord(LastWord);
- X }
- X
- X if (AsciiTrace) {
- X fprintf(stderr, "Read %lu bytes from \"%s\"\n", BytesRead, FileInfo->Name);
- X }
- X}
- X
- X/* lqaddfile has been carried through several incarnations of lq-text,
- X * and hence has more than one Inital Revision in the following history.
- X *
- X * $Log: lqaddfile.c,v $
- X * Revision 1.14 91/03/02 21:22:39 lee
- X * Added write access call.
- X *
- X * Revision 1.13 91/03/02 18:53:25 lee
- X * Common words are now counted, so you can now edit the common word list
- X * without invalidating the index.
- X *
- X * Revision 1.12 90/10/06 00:50:54 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.11 90/10/05 23:46:11 lee
- X * Allow compilation with -UASCIITRACE
- X *
- X * Revision 1.10 90/10/04 17:54:46 lee
- X * fixed a typo in the usage message.
- X *
- X * Revision 1.9 90/09/28 23:20:22 lee
- X * Put more of GetChar into a macro and parameterised TOLOWER.
- X *
- X * Revision 1.8 90/09/28 22:19:04 lee
- X * Did the previous fix _properly_!
- X *
- X * Revision 1.7 90/09/28 22:12:35 lee
- X * Made getchar a macro, and deleted the call to CallFree...
- X *
- X * Revision 1.6 90/09/20 18:46:03 lee
- X * Closed up a (very small) memory leak.
- X *
- X * Revision 1.5 90/09/19 20:16:41 lee
- X * Fixed problems associated with indexing an empty file.
- X *
- X * Revision 1.4 90/08/29 21:45:18 lee
- X * Alpha release
- X *
- X * Revision 1.3 90/08/09 19:17:12 lee
- X * *** empty log message ***
- X *
- X * Revision 1.1 90/02/27 11:05:02 lee
- X * Initial revision
- X *
- X * Revision 2.2 89/10/08 20:45:13 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:14:12 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.3 89/09/17 23:02:42 lee
- X * Various fixes; NumberInBlock now a short...
- X *
- X * Revision 1.2 89/09/16 21:16:11 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:05:52 lee
- X * Initial revision
- X *
- X */
- @@@End of lq-text/src/lqtext/lqaddfile.c
- echo end of part 06
- --
- Liam R. E. Quin, lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337
-