home *** CD-ROM | disk | FTP | other *** search
- From: lee@sq.sq.com (Liam R. E. Quin)
- Newsgroups: alt.sources
- Subject: lq-text Full Text Retrieval Database Part 03/13
- Message-ID: <1991Mar4.020149.16222@sq.sq.com>
- Date: 4 Mar 91 02:01:49 GMT
-
- : cut here --- cut here --
- : To unbundle, sh this file
- #! /bin/sh
- : part 03
- echo x - lq-text/src/UseHash
- cat > lq-text/src/UseHash << 'barefoot_choirboy'
- # Run this if you want to use the BSD hash package (ozmahash)
- cd src
- cp ozmahash/*.h h
- cp ozmahash/ndbm.h h/ozmahash.h
- barefoot_choirboy
- chmod +x UseHash
- echo x - lq-text/src/liblqtext/Defaults.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/Defaults.c <<'@@@End of lq-text/src/liblqtext/Defaults.c'
- X/* Defaults.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X *
- X * $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
- X *
- X * $Log: Defaults.c,v $
- X * Revision 1.7 90/10/06 00:11:37 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.6 90/08/29 21:46:25 lee
- X * Alpha release.
- X *
- X * Revision 1.5 90/08/09 19:16:08 lee
- X * *** empty log message ***
- X *
- X * Revision 1.4 90/04/21 17:26:26 lee
- X * now passes gcc -W (before Canada...)
- X *
- X * Revision 1.3 90/03/23 17:58:57 lee
- X * Integrated with globals.h and added a few more comments.
- X * Also fixed a bug whereby the configuration file over-rode both
- X * command-line options and environment variables!
- X *
- X * Revision 1.2 90/03/20 20:52:38 lee
- X * removed some globals...
- X *
- X *
- X */
- X
- X#define DefineThem /* turn externs off so we do initialisations here */
- X# include "globals.h" /* defines and declarations for database filenames */
- X#undef DefineThem
- X#undef EXTERN
- X#include <fcntl.h>
- X#include <errno.h>
- X#ifdef SYSV
- Xextern int _filbuf(); /* this must appear before stdio.h is included... */
- X#endif
- X#include <stdio.h>
- X#include <malloc.h>
- X#include <ctype.h>
- X#include "emalloc.h"
- X#include <sys/types.h>
- X#include "fileinfo.h"
- X#include "wordinfo.h"
- X#include "phrase.h"
- X
- X/* $Id: Defaults.c,v 1.7 90/10/06 00:11:37 lee Rel1-10 $
- X *
- X * This file is part of nx-text, Liam Quin's text retrieval package.
- X *
- X * Defaults.c -- set up filenames etc. from defaults + cmd line + env.
- X *
- X * -DUNDERHOME is used here, as is DEFAULTCOMMONWORDS, etc. from Makefile.
- X * See comments in Makefile.
- X *
- X * $Log: Defaults.c,v $
- X * Revision 1.7 90/10/06 00:11:37 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.6 90/08/29 21:46:25 lee
- X * Alpha release.
- X *
- X * Revision 1.5 90/08/09 19:16:08 lee
- X * *** empty log message ***
- X *
- X * Revision 1.4 90/04/21 17:26:26 lee
- X * now passes gcc -W (before Canada...)
- X *
- X * Revision 1.3 90/03/23 17:58:57 lee
- X * Integrated with globals.h and added a few more comments.
- X * Also fixed a bug whereby the configuration file over-rode both
- X * command-line options and environment variables!
- X *
- X * Revision 1.2 90/03/20 20:52:38 lee
- X * removed some globals...
- X *
- X *
- X */
- X
- X/* System and Library calls used in this function:
- X *
- X */
- Xextern int open(), close();
- Xextern void exit();
- X
- Xextern int atoi(), strcmp(), strlen();
- X#ifndef tolower
- X extern int tolower();
- X#endif
- Xextern char *strcpy();
- Xextern void perror();
- Xextern int ReadCommonWords(), IsDir();
- Xint cknatstr();
- Xstatic int NextChar();
- Xstatic void ReadDefaultFile();
- X
- X
- Xtypedef enum {
- X FW_Cmdline,
- X FW_Envvar,
- X FW_Default, /* use the default */
- X FW_File, /* from the config file */
- X FW_None /* don't use any at all */
- X} t_FromWhere;
- X
- Xstatic t_FromWhere DirFromWhere = FW_Default;
- Xstatic t_FromWhere CommonFromWhere = FW_Default;
- Xstatic t_FromWhere DocFromWhere = FW_Default;
- X
- Xextern int MakeDocPath(); /* hand it DOCPATH... */
- Xextern int AsciiTrace;
- X
- Xchar *mkdbm();
- Xchar *joinstr3();
- X
- X/* should PCM_HalfCase be in globals.h??? */
- Xt_PhraseCaseMatch PhraseMatchLevel = PCM_HalfCase;
- X
- Xvoid
- XSetDefaults(argc, argv)
- X int argc;
- X char **argv;
- X{
- X extern char *getenv();
- X extern char *progname;
- X char *p;
- X
- X /* main() should have set progname. If it didn't. we don't strip
- X * the leading / as this is (I hope!) a testing and not a production
- X * version... and an early test at that!
- X */
- X if (!progname || !*progname) progname = argv[0];
- X
- X /* loop over arguments, looking for
- X * -d -- set directory for database
- X * -c -- common words file
- X *
- X * don't use getopts, as we'll be using that later in main(),
- X * and it doesn't like being called twice.
- X * As a result, main() should ignore the z: option.
- X */
- X while (--argc > 0) {
- X if (**++argv == '-' || **argv == '+') {
- X char TurnOn = (**argv == '-');
- X
- X switch((*argv)[1]) {
- X case 'm': /* precise matching */
- X argv[0][1] = 'z'; /* so it gets ignored by getopt */
- X
- X if (!*(p = &argv[0][2])) {
- X if (argc > 1) {
- X argc--; argv++;
- X p = (*argv);
- X } else {
- X fprintf(stderr,
- X "%s: -m must be followed by a, h or p; see -x\n",
- X progname);
- X exit(1);
- X }
- X }
- X if (p[1]) {
- X fprintf(stderr,
- X "%s: -m must be followed by a, h or p, not \"%s\"\n",
- X progname, p);
- X }
- X
- X switch (*p) {
- X case 'p': /* precise */
- X PhraseMatchLevel = PCM_SameCase;
- X break;
- X case 'h': /* heuristic */
- X PhraseMatchLevel = PCM_HalfCase;
- X break;
- X case 'a': /* any, approxmate */
- X PhraseMatchLevel = PCM_AnyCase;
- X break;
- X default:
- X fprintf(stderr,
- X "%s: -m must be followed by \"p\", \"h\" or \"a\";\n",
- X progname);
- X fprintf(stderr,
- X "use %s -xv for more explanation.\n", progname);
- X exit(1);
- X }
- X break;
- X
- X case 'v': /* -v is the same as -t1 */
- X argv[0][1] = 'Z'; /* so it gets ignored by getopt */
- X ++AsciiTrace;
- X break;
- X case 't': /* trace level */
- X argv[0][1] = 'z'; /* so it gets ignored by getopt */
- X if (argv[0][2] != '\0') {
- X p = &argv[0][2];
- X } else {
- X if (argc > 1) {
- X argc--;
- X p = (*++argv);
- X } else {
- X p = "1";
- X }
- X }
- X if (cknatstr(p)) {
- X AsciiTrace = atoi(p);
- X } else {
- X fprintf(stderr, "%s: -t: \"%s\" is not a number\n",
- X progname, p);
- X exit(1);
- X }
- X if (AsciiTrace <= 0) AsciiTrace = 1;
- X fprintf(stderr, "%s: trace level set to %d\n",
- X progname, AsciiTrace);
- X
- X break;
- X case 'c': /* common file */
- X if (TurnOn) {
- X CommonFromWhere = FW_Cmdline;
- X argv[0][1] = 'z'; /* so it gets ignored by getopt */
- X if ((*argv)[2] != '\0') {
- X CommonWordFile = &(*argv[2]);
- X } else {
- X if (argc > 1) {
- X CommonWordFile = argv[1];
- X argc--; argv++;
- X } else {
- X fprintf(stderr,
- X "%s: -c option must be followed by a filename",
- X progname);
- X exit(1);
- X }
- X }
- X } else { /* Turn off, +c, may be undocumented right now */
- X CommonFromWhere = FW_None;
- X break;
- X }
- X break;
- X case 'd':
- X argv[0][1] = 'z'; /* so it gets ignored by getopt */
- X DirFromWhere = FW_Cmdline;
- X if (argv[0][2] != '\0') {
- X DatabaseDir = &argv[0][2];
- X } else {
- X if (argc > 1) {
- X DatabaseDir = argv[1];
- X argc--; argv++;
- X } else {
- X /* @error */
- X fprintf(stderr,
- X "%s: %cd must be followed by a directory name",
- X progname, TurnOn ? '-' : '+');
- X exit(1);
- X }
- X }
- X break;
- X } /* end switch */
- X } else {
- X /* not an option, so stop looking */
- X break;
- X }
- X } /* end while */
- X
- X /* now we have parsed the command line arguments, so look for the
- X * default directory
- X */
- X if (DirFromWhere == FW_Default) {
- X char *t;
- X
- X if ((t = getenv("LQTEXTDIR")) != (char *) 0) {
- X DatabaseDir = emalloc(strlen(t) + 1);
- X (void) strcpy(DatabaseDir, t);
- X DirFromWhere = FW_Envvar;
- X } else {
- X#ifdef UNDERHOME
- X char *home = getenv("HOME");
- X
- X if (home) {
- X DatabaseDir = joinstr3(home, "/", UNDERHOME);
- X if (!IsDir(DatabaseDir)) {
- X fprintf(stderr,
- X "%s: database directory \"%s\" inaccessible.\n",
- X progname, DatabaseDir);
- X exit(1);
- X }
- X } else {
- X fprintf(stderr, "%s: can't find your login directory ($HOME)\n",
- X progname);
- X exit(1);
- X }
- X#endif /* UNDERHOME*/
- X /* in either case it's the default... */
- X DirFromWhere = FW_Default;
- X }
- X }
- X
- X if (!DatabaseDir || !*DatabaseDir) {
- X /* This can happen if there is no default, or if the user types
- X * lqword -d ""
- X * just to be malicious :-)
- X */
- X fprintf(stderr,
- X "%s: You must give a database directory with -d or $LQTEXTDIR\n",
- X progname);
- X fprintf(stderr, " use %s -xv for more details.\n", progname);
- X exit(1);
- X }
- X
- X /* IsDir is in DocPath.c -- perhaps this should be, too. */
- X if (!IsDir(DatabaseDir)) {
- X char *msg = (char *) 0;
- X
- X switch (DirFromWhere) {
- X case FW_Cmdline:
- X msg = " (specified with the -d option)";
- X break;
- X case FW_Envvar:
- X msg = " (from $LQTEXTDIR)";
- X break;
- X }
- X fprintf(stderr, "%s: \"%s\"%s is not a directory.\n",
- X progname, DatabaseDir, msg ? msg : " ");
- X exit(1);
- X }
- X
- X /* set default filenames */
- X#define IfNot(x, y) ((x) ? (x) : (y))
- X
- X FileIndex = mkdbm(DatabaseDir, IfNot(FileIndex, FILEINDEX));
- X WordIndex = mkdbm(DatabaseDir, IfNot(WordIndex, WORDINDEX));
- X
- X DataBase = joinstr3(DatabaseDir, "/", IfNot(DataBase, DATABASE));
- X FidFile = joinstr3(DatabaseDir, "/", IfNot(FidFile, FIDFILE));
- X WidFile = joinstr3(DatabaseDir, "/", IfNot(WidFile, WIDFILE));
- X WidIndexFile =
- X joinstr3(DatabaseDir, "/", IfNot(WidIndexFile, WIDINDEXFILE));
- X
- X ReadDefaultFile();
- X
- X if (AsciiTrace) {
- X fprintf(stderr, "%s: lqtext directory \"%s\"\n",progname,DatabaseDir);
- X }
- X
- X if (CommonFromWhere == FW_Default) {
- X char *c = getenv("LQCOMMON");
- X
- X if (c) {
- X CommonWordFile = emalloc(strlen(c) + 1);
- X (void) strcpy(CommonWordFile, c);
- X CommonFromWhere = FW_Envvar;
- X }
- X }
- X
- X if (CommonFromWhere != FW_None && CommonWordFile && *CommonWordFile) {
- X extern int errno;
- X int c;
- X
- X if (*CommonWordFile != '/') {
- X CommonWordFile = joinstr3(DatabaseDir, "/", CommonWordFile);
- X }
- X
- X if ((c = open(CommonWordFile, O_RDONLY, 0)) < 0) {
- X if (CommonFromWhere != FW_Default) {
- X int e = errno;
- X char *msg = " ";
- X
- X switch (CommonFromWhere) {
- X case FW_Cmdline:
- X msg = " (from the -c option)";
- X break;
- X case FW_Envvar:
- X msg = " (from $COMMONWORDS)";
- X break;
- X }
- X
- X fprintf(stderr,"%s: can't read common-word file%s ",progname,msg);
- X errno = e;
- X if (errno) {
- X perror(CommonWordFile);
- X } else {
- X fprintf(stderr, "\"%s\"\n", CommonWordFile);
- X }
- X exit(1);
- X }
- X CommonWordFile = (char *) 0;
- X } else {
- X (void) close(c); /* it's OK */
- X }
- X }
- X
- X if ((p = getenv("DOCPATH")) != (char *) 0) {
- X switch (DocFromWhere) {
- X case FW_File:
- X if (AsciiTrace > 1) {
- X fprintf(stderr, "%s: DOCPATH (%s) overrides %s (%s)\n",
- X#ifdef CONFIGFILE
- X progname, p, CONFIGFILE, DocPath
- X#else
- X progname, p, "README", DocPath
- X#endif
- X );
- X }
- X efree(DocPath);
- X /* FALL THROUGH */
- X case FW_Default:
- X default: /* ? */
- X DocPath = emalloc((unsigned) (strlen(p) + 1));
- X (void) strcpy(DocPath, p);
- X DocFromWhere = FW_Envvar;
- X break;
- X }
- X }
- X
- X if (!DocPath || !*DocPath) {
- X DocPath = ".";
- X }
- X
- X#define SetOrNot(s) ( (s && *s) ? s : (s ? "[empty]" : "[null]" ) )
- X
- X /* this is always here -- it's only checked once, and is actually
- X * rather useful.
- X */
- X if (AsciiTrace > 2) {
- X fprintf(stderr, "%s: CommonWordFile = \"%s\"\n", progname,
- X SetOrNot(CommonWordFile));
- X fprintf(stderr, "%s: DatabaseDir = \"%s\"\n", progname,
- X SetOrNot(DatabaseDir));
- X fprintf(stderr, "%s: DocPath = \"%s\"\n", progname,
- X SetOrNot(DocPath));
- X fprintf(stderr, "%s: FileIndex = \"%s\"\n", progname,
- X SetOrNot(FileIndex));
- X fprintf(stderr, "%s: WordIndex = \"%s\"\n", progname,
- X SetOrNot(WordIndex));
- X fprintf(stderr, "%s: DataBase = \"%s\"\n", progname,
- X SetOrNot(DataBase));
- X fprintf(stderr, "%s: FidFile = \"%s\"\n", progname,
- X SetOrNot(FidFile));
- X fprintf(stderr, "%s: WidFile = \"%s\"\n", progname,
- X SetOrNot(WidFile));
- X fprintf(stderr, "%s: WidIndexFile = \"%s\"\n", progname,
- X SetOrNot(WidIndexFile));
- X }
- X
- X (void) MakeDocPath(DocPath);
- X /* DocPath is no longer needed, so getenv() can be called again now */
- X
- X if (CommonWordFile && *CommonWordFile) {
- X (void) ReadCommonWords(CommonWordFile);
- X }
- X}
- X
- Xvoid
- XDefaultUsage()
- X{
- X fprintf(stderr, "\
- X -c file -- ignore words that are listed in the namd file\n\
- X -d dir -- use the lq-text database in the named directory\n\
- X -m c -- set matching criteria -- c is \"p\", \"h\" or \"a\"\n");
- X if (AsciiTrace) {
- X fprintf(stderr, "\
- X -m p uses precise matching, where CaSe is significant;\n\
- X -m h uses heuristic matching, which is the default, and\n\
- X -m a uses approximate matching.\n");
- X }
- X
- X fprintf(stderr, "\n\
- X -t N -- set trace level t N (default is zero)\n\
- X -x -- print %s explanation\n\
- X -xv -- print %s explanation\n\
- X -V -- print version information\n\
- X -v -- be verbose (same as -t 1)\n",
- X AsciiTrace ? "a shorter" : "this",
- X AsciiTrace ? "this" : "a longer");
- X if (AsciiTrace) {
- X fprintf(stderr, "\
- XThe current database directory is \"%s\";\n\
- X%s will search the path \"%s\" for documents.\n", DatabaseDir, progname, DocPath);
- X }
- X}
- X
- X/* This should be in smalldb.c I think */
- Xchar *
- Xmkdbm(root, prefix)
- X char *root; /* /tmp/lqtext */
- X char *prefix; /* wordlist, --> /tmp/lqtext.{dir,pag} for dbm */
- X{
- X#if DBMCREAT == 0
- X extern int errno;
- X#endif
- X /* Although ndbm will create files automatically, gdbm and dbm will
- X * not, so we do that here.
- X * Also, it might take a while to get to here, so it will be a lot
- X * better if we get an error message now.
- X */
- X char *p = joinstr3(root, "/", prefix);
- X
- X#if DBMCREAT == 0
- X q = joinstr3(p, ".", "dir");
- X errno = 0; /* paranoia */
- X
- X if ((i = open(q)) < 0 && errno == ENOENT) {
- X i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */
- X
- X if (i < 0) {
- X fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
- X (void) exit(1);
- X }
- X
- X (void) close(i);
- X }
- X (void) strcpy(&q[strlen(q) - 3], "pag");
- X
- X if ((i = open(q)) < 0 && errno == ENOENT) {
- X i = open(q, O_CREAT|O_RDWR, 0666); /* rw-rw-rw & umask */
- X
- X if (i < 0) {
- X fprintf(stderr, "%s: can't create \"%s\"\n", progname, q);
- X (void) exit(1);
- X }
- X
- X (void) close(i);
- X }
- X
- X (void) efree(q);
- X
- X#endif /*DBMCREAT*/
- X
- X return p; /* the prefix for dbm, not the whole path */
- X}
- X
- X/* this belongs in string.c or something */
- Xchar *
- Xjoinstr3(a, b, c)
- X char *a, *b, *c;
- X{
- X char *p;
- X int i = strlen(a), j = (b[0] != '\0' && b[1] == '\0') ? 1 : strlen(b);
- X
- X p = emalloc(i + j + strlen(c) + 1);
- X /* ASSERT: p != 0 */
- X (void) strcpy(p, a);
- X (void) strcpy(&p[i], b);
- X (void) strcpy(&p[i + j], c);
- X
- X return p;
- X}
- X
- X#define LCNOMAP 0 /* Token -- leave case alone */
- X#define LCMAP 1 /* map to lower case */
- X
- Xstatic int RMLine = 0;
- X
- Xstatic void
- XReadDefaultFile()
- X{
- X extern int errno;
- X
- X static char *NextToken(); /* see below */
- X#ifdef CONFIGFILE
- X char *ReadMe = joinstr3(DatabaseDir, "/", CONFIGFILE);
- X#else
- X char *ReadMe = joinstr3(DatabaseDir, "/", "README");
- X#endif
- X FILE *fp;
- X char *Token;
- X
- X
- X /* This is paranoid... */
- X if (!ReadMe || !*ReadMe) {
- X fprintf(stderr, "%s: Internal: %s: %d: ReadMe %s\n",
- X progname, __FILE__, __LINE__, SetOrNot(ReadMe));
- X exit(1);
- X }
- X
- X errno = 0;
- X if ((fp = fopen(ReadMe, "r")) == (FILE *) 0) {
- X if (errno == EPERM) {
- X fprintf(stderr,
- X "%s: Warning: you don't have permission to read \"%s\"\n",
- X progname, ReadMe);
- X } else if (AsciiTrace) {
- X int e = errno;
- X
- X fprintf(stderr, "%s: warning: can't open config file ", progname);
- X errno = e;
- X perror(ReadMe);
- X }
- X return;
- X }
- X
- X /* Read README up to an "end" line, ignoring lines starting with # */
- X
- X while ((Token = NextToken(fp, ReadMe, LCMAP)) != (char *) 0) {
- X if (STREQ(Token, "end")) goto finish;
- X if (STREQ(Token, "common")) {
- X if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
- X fprintf(stderr, "%s: %s %d: unexpected eof at common file\n",
- X progname, ReadMe, RMLine);
- X exit(1);
- X } else if (CommonFromWhere == FW_Default) {
- X CommonWordFile = emalloc((unsigned) (strlen(Token) + 1));
- X (void) strcpy(CommonWordFile, Token);
- X CommonFromWhere = FW_File;
- X }
- X } else if (STREQ(Token, "path") || STREQ(Token, "docpath")) {
- X if (!(Token = NextToken(fp, ReadMe, LCNOMAP))) {
- X fprintf(stderr, "%s: %s: %d: unexpected eof at common file\n",
- X progname, ReadMe, RMLine);
- X exit(1);
- X } else {
- X DocPath = emalloc((unsigned) (strlen(Token) + 1));
- X (void) strcpy(DocPath, Token);
- X DocFromWhere = FW_File;
- X }
- X } else {
- X fprintf(stderr, "%s: \"%s\": %d: token(\"%s\") unexpected\n",
- X progname, ReadMe, RMLine, Token);
- X exit(1);
- X }
- X } /* while */
- X
- Xfinish:
- X (void) fclose(fp);
- X return;
- X}
- X
- Xstatic char *
- XNextToken(fd, Name, Map)
- X FILE *fd;
- X char *Name;
- X int Map;
- X{
- X int ch;
- X static char buf[50];
- X register char *q = buf;
- X int InQuote = 0;
- X int OriginalMap = Map;
- X
- X while ((ch = NextChar(fd, Name, Map)) != EOF) {
- X switch (ch) {
- X case '"': case '\'':
- X if (q == buf && !InQuote) InQuote = ch;
- X else if (ch == InQuote) {
- X *q = '\0';
- X if (AsciiTrace > 10) {
- X fprintf(stderr, "RM[%s] ", buf);
- X }
- X return buf;
- X }
- X Map = 0; /* no case conversion inside strings */
- X break;
- X case '\\':
- X if ((ch = NextChar(fd, Name, Map)) == EOF) {
- X fprintf(stderr, "%s: %s; %d: EOF after \\ unexpected!\n",
- X progname, Name, RMLine);
- X exit(1);
- X }
- X *q++ = ch;
- X break;
- X case ' ':
- X case '\n':
- X if (InQuote) {
- X fprintf(stderr, "%s: %s: %d: missing quote -->%s<--\n",
- X progname, Name, RMLine, InQuote);
- X exit(1);
- X }
- X *q = '\0';
- X if (q > buf) return buf;
- X else return NextToken(fd, Name, OriginalMap);
- X /*NOTREACHED*/
- X break;
- X default:
- X *q++ = ch;
- X }
- X }
- X if (q > buf) {
- X fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
- X progname, Name, RMLine);
- X exit(1);
- X }
- X return (char *) 0;
- X}
- X
- Xstatic int
- XNextChar(fd, Name, Map)
- X FILE *fd;
- X char *Name;
- X int Map;
- X{
- X int ch;
- X
- X while ((ch = getc(fd)) != EOF) {
- X switch (ch) {
- X case '#':
- X do {
- X if ((ch = getc(fd)) == EOF) {
- X fprintf(stderr, "%s: %s: %d: unexpected end of file\n",
- X progname, Name, RMLine);
- X exit(1);
- X }
- X } while (ch != '\n');
- X /* ASSERT: ch == '\n' */
- X ++RMLine;
- X break;
- X case '\n':
- X ++RMLine;
- X if (!Map) return ch;
- X /* else FALL THROUGH */
- X case ' ': case '\t': case '\f': case '\r':
- X if (!Map) {
- X return ' ';
- X }
- X
- X default:
- X return (Map && isupper(ch)) ? tolower(ch) : ch;
- X }
- X } /* while */
- X return EOF;
- X}
- X
- Xint
- Xcknatstr(str)
- X char *str;
- X{
- X /* check that a string represents a positive or 0 number */
- X register char *p = str;
- X
- X /* skip leading white space */
- X while (isspace(*p)) p++;
- X if (!*p) return 0;
- X
- X /* allow a leading sign */
- X if (*p == '-' || *p == '+') p++;
- X if (!*p) return 0;
- X
- X /* now skip digits... */
- X while (isdigit(*p)) p++;
- X
- X return (p > str && *p == '\0');
- X}
- X
- X/* you can tell I am tired by the extra end-while etc. comments.
- X * wonder if it will work?
- X * perhaps if I took my socks off too.
- X *
- X * Hmm, yeah, that worked.
- X */
- @@@End of lq-text/src/liblqtext/Defaults.c
- echo x - lq-text/src/liblqtext/DocPath.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/DocPath.c <<'@@@End of lq-text/src/liblqtext/DocPath.c'
- X/* DocPath.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X *
- X * $Id: DocPath.c,v 1.5 90/10/06 00:11:53 lee Rel1-10 $
- X *
- X *
- X * $Log: DocPath.c,v $
- X * Revision 1.5 90/10/06 00:11:53 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.4 90/10/05 23:43:19 lee
- X * Put the debugging in isfile() within ASCIITRACE ifdefs.
- X *
- X * Revision 1.3 90/08/29 21:46:32 lee
- X * Alpha release.
- X *
- X * Revision 1.2 90/08/09 19:16:12 lee
- X * *** empty log message ***
- X *
- X *
- X */
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#ifdef BSD
- X# include <sys/param.h>
- X# define PATH_MAX MAXPATHLEN
- X#else /*not BSD*/
- X# include <limits.h> /* for PATH_MAX */
- X#endif
- X#include <stdio.h>
- X#include "emalloc.h"
- X#include "fileinfo.h"
- X
- X/** Unix system calls: **/
- Xextern int stat();
- X/** C Library functions: **/
- Xextern int strlen();
- Xextern char *strcpy();
- X/** Within this file: **/
- Xextern int IsDir();
- X/** **/
- X
- X#ifdef ASCIITRACE
- Xextern int AsciiTrace;
- X#endif
- X
- Xtypedef struct s_DocPath {
- X char *DirName;
- X struct s_DocPath *Next;
- X} t_DocPath;
- X
- Xstatic t_DocPath *XDocPath = 0;
- X
- X#ifndef PATH_MAX
- X# define PATH_MAX 2048
- X#endif
- X
- Xchar *
- X_FindFile(Name)
- X char *Name;
- X{
- X int IsFile();
- X
- X t_DocPath *p;
- X static char Buffer[PATH_MAX + 3]; /* +1 for "\0" */
- X
- X if (!XDocPath) {
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 4) {
- X fprintf(stderr, "FindFile(%s) --> %s\n", Name,
- X IsFile(Name) ? Name : (char *) 0);
- X }
- X#endif
- X return IsFile(Name) ? Name : (char *) 0;
- X }
- X
- X for (p = XDocPath; p; p = p->Next) {
- X (void) sprintf(Buffer, "%s/%s", p->DirName, Name);
- X if (IsFile(Buffer)) {
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 4) {
- X fprintf(stderr, "FindFile(%s) --> %s\n", Name, Buffer);
- X }
- X#endif
- X return Buffer;
- X }
- X }
- X
- X return (char*) 0;
- X}
- X
- Xint
- XMakeDocPath(Path)
- X char *Path;
- X{
- X extern char *getenv();
- X
- X char *Start, *End;
- X t_DocPath **dpp;
- X
- X if (XDocPath == (t_DocPath *) 0) {
- X dpp = &XDocPath;
- X *dpp = (t_DocPath *) 0;
- X
- X /* For each element in DocPath, */
- X for (Start = Path; Start && *Start; Start = End) {
- X char SaveEnd;
- X
- X /* find the end of this bit of the path */
- X for (End = Start; *End && *End != ':'; End++)
- X ;
- X
- X if (End == Start) break;
- X
- X SaveEnd = (*End);
- X *End = '\0';
- X
- X /* if not a directory, delete from path */
- X if (!IsDir(Start)) {
- X *End = SaveEnd;
- X continue;
- X }
- X
- X /* add to the linked list */
- X *dpp = (t_DocPath *) emalloc(sizeof(t_DocPath));
- X (*dpp)->DirName = emalloc(strlen(Start) + 1);
- X (void) strcpy((*dpp)->DirName, Start);
- X dpp = &(*dpp)->Next;
- X (*dpp) = (t_DocPath *) 0;
- X if ((*End = SaveEnd) != '\0') {
- X End++;
- X }
- X }
- X }
- X return 0;
- X}
- X
- Xint
- XIsDir(Dir)
- X char *Dir;
- X{
- X struct stat statbuf;
- X
- X if (!Dir || !*Dir) return 0;
- X if (stat(Dir, &statbuf) < 0) return 0;
- X if ((statbuf.st_mode & S_IFMT) != S_IFDIR) {
- X return 0;
- X }
- X return 1;
- X}
- X
- Xint
- XIsFile(Path)
- X char *Path;
- X{
- X struct stat statbuf;
- X
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 20) {
- X fprintf(stderr, "IsFile(%s)\n", Path);
- X }
- X#endif
- X if (stat(Path, &statbuf) < 0) return 0;
- X if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
- X return 0;
- X }
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 20) {
- X fprintf(stderr, "\t\tIsFile(%s) returns true.\n", Path);
- X }
- X#endif
- X return 1;
- X}
- @@@End of lq-text/src/liblqtext/DocPath.c
- echo x - lq-text/src/liblqtext/FileList.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/FileList.c <<'@@@End of lq-text/src/liblqtext/FileList.c'
- X/* FileList.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/*
- X *
- X * FileList -- operations on the list of files. This is the Document
- X * Directory part of NX-Text.
- X *
- X * $Id: FileList.c,v 1.8 90/10/13 02:39:05 lee Rel1-10 $
- X *
- X * $Log: FileList.c,v $
- X * Revision 1.8 90/10/13 02:39:05 lee
- X * deleted some incorrect code.
- X *
- X * Revision 1.7 90/10/13 02:21:03 lee
- X * NEEDALIGN stuff
- X *
- X * Revision 1.6 90/10/07 20:37:18 lee
- X * changed ifdef sparc to ifdef NEEDALIGN
- X *
- X * Revision 1.5 90/10/06 00:11:55 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.4 90/09/29 23:46:14 lee
- X * very minor speedup, and changed a free() to efree().
- X *
- X * Revision 1.3 90/09/20 19:11:05 lee
- X * deleted unused locking code.
- X * removed a sun4-specific memory leak. Other minor changes.
- X *
- X * Revision 1.2 90/08/29 21:46:33 lee
- X * Alpha release.
- X *
- X * Revision 1.1 90/08/09 19:16:15 lee
- X * Initial revision
- X *
- X * Revision 2.2 89/10/08 20:29:10 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:12:08 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.2 89/09/16 21:15:54 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:01:36 lee
- X * Initial revision
- X *
- X *
- X */
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#include <stdio.h>
- X#include <malloc.h>
- X#include <unistd.h>
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#include <signal.h>
- X#include <errno.h>
- X#include <fcntl.h>
- X#include <string.h>
- X
- X#include "smalldb.h"
- X#include "fileinfo.h"
- X#include "emalloc.h"
- X
- X/** Unix system calls that need to be declared: **/
- Xextern int stat();
- Xextern int open(), close(), creat();
- Xextern void exit();
- Xextern int read(), write();
- Xextern unsigned alarm();
- X/** library functions that need to be declared: */
- Xextern int lockf();
- Xextern unsigned sleep();
- Xextern void perror();
- Xextern long atol();
- X
- X/** other (lqtext) functions **/
- Xt_FID GetNextFID();
- Xt_FileInfo *GetFileInfo();
- X/** **/
- X
- Xt_FID
- XGetMaxFID()
- X{
- X extern int errno;
- X
- X int fd;
- X struct stat StatBuf;
- X char Buffer[20];
- X
- X /* ensure that the file is there */
- X if (stat(FidFile, &StatBuf) == -1) {
- X return 0;
- X }
- X
- X if ((fd = open(FidFile, O_RDWR, 0)) < 0) {
- X fprintf(stderr, "Warning: Can't open FID file");
- X return 0;
- X }
- X
- X /* Read the file */
- X if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
- X fprintf(stderr, "Can't read from \"%s\"\n", FidFile);
- X exit(1);
- X }
- X
- X (void) close(fd);
- X
- X Buffer[StatBuf.st_size] = '\0';
- X
- X return atol(Buffer);
- X}
- X
- X/*ARGSUSED*/
- Xt_FID
- XGetNextFID(Size)
- X long Size; /* to let it keep short FIDs for huge files, execpt I don't */
- X{
- X extern int errno;
- X extern long atol();
- X extern long lseek();
- X
- X int fd;
- X char Buffer[21];
- X struct stat StatBuf;
- X t_FID Result;
- X
- X /* ensure that the file is there */
- X if (stat(FidFile, &StatBuf) == -1) {
- X fprintf(stderr, "Creating FID file \"%s\"\n", FidFile);
- X if ((fd = creat(FidFile, 02666)) < 0) {
- X fprintf(stderr, "Can't create FID file \"%s\"\n", FidFile);
- X exit(1);
- X }
- X (void) close(fd);
- X return GetNextFID(Size);
- X
- X /*NOTREACHED*/
- X }
- X
- X if ((fd = open(FidFile, O_RDWR, 0)) < 0) {
- X fprintf(stderr, "Can't open FID file");
- X perror(FidFile);
- X exit(1);
- X }
- X
- X errno = 0;
- X
- X /* Read the file */
- X if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
- X fprintf(stderr, "Can't read from \"%s\"\n", FidFile);
- X exit(1);
- X }
- X
- X Buffer[StatBuf.st_size] = '\0';
- X
- X Result = atol(Buffer);
- X
- X if (Result == 0L || *Buffer == '-') {
- X Result = 1L;
- X }
- X
- X (void) sprintf(Buffer, "%lu\n", Result + 1);
- X
- X /* Move to the start of the file and write the now value.
- X * No need to truncate the file, because it didn't shrink!
- X */
- X (void) lseek(fd, 0, 0L);
- X (void) write(fd, Buffer, (unsigned int) strlen(Buffer));
- X (void) close(fd);
- X
- X return Result;
- X}
- X
- Xtypedef struct {
- X t_FID FID;
- X time_t DateLastIndexed;
- X int FilterType;
- X unsigned NameLength;
- X char CurrentLocation[1];
- X} t_PhysicalIndexEntry;
- X
- Xt_PhysicalIndexEntry *
- XFileInfo2Phys(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X t_PhysicalIndexEntry *PIE;
- X register int NameLength;
- X
- X if (!FileInfo || !FileInfo->Name) return (t_PhysicalIndexEntry *) 0;
- X
- X NameLength = strlen(FileInfo->Name);
- X
- X PIE = (t_PhysicalIndexEntry *) emalloc(
- X sizeof(t_PhysicalIndexEntry) + NameLength + 1);
- X
- X if (!PIE) return (t_PhysicalIndexEntry *) 0;
- X
- X PIE->FID = FileInfo->FID;
- X PIE->DateLastIndexed = FileInfo->Date;
- X PIE->FilterType = FileInfo->FilterType;
- X PIE->NameLength = NameLength;
- X (void) strcpy(PIE->CurrentLocation, FileInfo->Name);
- X return PIE;
- X}
- X
- Xt_FileInfo *
- XPhys2FileInfo(PIE)
- X t_PhysicalIndexEntry *PIE;
- X{
- X t_FileInfo *FileInfo;
- X
- X if (!PIE || !PIE->NameLength) return (t_FileInfo *) 0;
- X
- X FileInfo = (t_FileInfo *) emalloc(sizeof(t_FileInfo));
- X FileInfo->FID = PIE->FID;
- X FileInfo->Date = PIE->DateLastIndexed;
- X FileInfo->FilterType = PIE->FilterType;
- X FileInfo->Stream = (FILE *) 0;
- X if (PIE->NameLength) {
- X#if 0
- X char *doc;
- X#endif
- X
- X FileInfo->Name = emalloc(PIE->NameLength + 1);
- X (void) strncpy(FileInfo->Name, PIE->CurrentLocation,
- X PIE->NameLength);
- X FileInfo->Name[PIE->NameLength] = '\0';
- X
- X#if 0
- X /* with this in place, wordinfo spends over 40% of its time
- X * in stat!
- X */
- X if ((doc = FindFile(FileInfo->Name)) != (char *) 0) {
- X /* hence, we never retrieve non-existent files */
- X FileInfo->Name = erealloc(FileInfo->Name, strlen(doc) + 1);
- X (void) strcpy(FileInfo->Name, doc);
- X }
- X#endif
- X } else {
- X FileInfo->Name = (char *) 0;
- X }
- X
- X return FileInfo;
- X}
- X
- Xint
- XSaveFileInfo(FileInfo)
- X t_FileInfo *FileInfo;
- X{
- X t_PhysicalIndexEntry *PIE;
- X datum key, data;
- X DBM *db;
- X int RetVal;
- X char Buffer[20];
- X
- X if (!FileInfo) return -1;
- X
- X if ((PIE = FileInfo2Phys(FileInfo)) == (t_PhysicalIndexEntry *) 0) {
- X return -1;
- X }
- X
- X if ((db = startdb(FileIndex)) == (DBM *) 0) {
- X return -1;
- X }
- X
- X if (FileInfo->Name && *(FileInfo->Name)) {
- X /* For the reverse mapping, FileName --> FID ... store an
- X * entry of the form ([\377]317, "hello").
- X * This scheme simply has to go.
- X * I favour a btree, but that may be needlessly complex.
- X */
- X int KeyLen = strlen(FileInfo->Name);
- X key.dptr = emalloc(KeyLen + 2); /* +2: "\375" and \0 */
- X /* Note: the N= is so that a file called "123" does not cause
- X * confusion with the reverse mapping
- X */
- X *(key.dptr) = '\375';
- X (void) strcpy(&(key.dptr[1]), FileInfo->Name);
- X key.dsize = KeyLen + 1;
- X /* length of name + length of "\375" -- the nul at the end
- X * is not included.
- X */
- X
- X (void) sprintf(Buffer, "%lu", FileInfo->FID);
- X data.dptr = Buffer;
- X data.dsize = strlen(Buffer);
- X (void) dbm_store(db, key, data, DBM_REPLACE);
- X (void) efree(key.dptr);
- X }
- X
- X (void) sprintf(Buffer, "F%lu", FileInfo->FID);
- X
- X key.dptr = Buffer;
- X key.dsize = strlen(Buffer);
- X
- X data.dptr = (char *) PIE;
- X data.dsize = sizeof(t_PhysicalIndexEntry) + PIE->NameLength;
- X
- X RetVal = dbm_store(db, key, data, DBM_REPLACE);
- X
- X enddb(db);
- X
- X return RetVal;
- X}
- X
- Xt_FID
- XName2FID(Name)
- X char *Name;
- X{
- X DBM *db;
- X datum key, result;
- X extern long atol();
- X
- X key.dsize = strlen(Name);
- X /* see previous routine for comments about this +2 ugliness */
- X key.dptr = emalloc(key.dsize + 2);
- X *(key.dptr) = '\375';
- X (void) strcpy(&(key.dptr[1]), Name);
- X key.dsize += 1; /* for the cookie; we don't include the \0 */
- X
- X if ((db = startdb(FileIndex)) == (DBM *) 0) {
- X fprintf(stderr, "Name2FID can't get FID for %s (database \"%s\"\n", Name, FileIndex);
- X (void) efree(key.dptr);
- X return -1;
- X }
- X result = dbm_fetch(db, key);
- X enddb(db);
- X
- X (void) efree(key.dptr);
- X
- X return (result.dsize == 0) ? (t_FID) 0 : atol(result.dptr);
- X}
- X
- Xt_FileInfo *
- XGetFileInfo(FID)
- X t_FID FID;
- X{
- X t_FileInfo *FileInfo;
- X datum key, data;
- X DBM *db;
- X char Buffer[20];
- X#ifdef NEEDALIGN
- X t_PhysicalIndexEntry *PIE;
- X#endif
- X
- X (void) sprintf(Buffer, "F%lu", FID);
- X key.dptr = Buffer;
- X key.dsize = strlen(Buffer);
- X
- X if ((db = startdb(FileIndex)) == (DBM *) 0) {
- X return (t_FileInfo *) 0;
- X }
- X
- X data = dbm_fetch(db, key);
- X enddb(db);
- X
- X if (data.dsize == 0) {
- X return (t_FileInfo *) 0;
- X }
- X
- X#ifdef NEEDALIGN
- X PIE = (t_PhysicalIndexEntry *) emalloc(data.dsize + 1);
- X (void) memcpy((char *) PIE, data.dptr, data.dsize);
- X FileInfo = Phys2FileInfo(PIE);
- X (void) efree((char *) PIE);
- X#else
- X
- X /* Now we have a PIE, so we need a FileInfo... */
- X FileInfo = Phys2FileInfo(/*NOSTRICT*/(t_PhysicalIndexEntry *) data.dptr);
- X#endif
- X
- X return FileInfo;
- X}
- X
- Xint
- Xstrcontains(ShortString, LongString)
- X char *ShortString;
- X char *LongString;
- X{
- X register char *p;
- X
- X int strprefix();
- X
- X for (p = LongString; *p; p++) {
- X if (*p == *ShortString && strprefix(ShortString, p)) {
- X return 1;
- X }
- X }
- X return 0;
- X}
- X
- Xint
- Xstrprefix(Prefix, String)
- X register char *Prefix;
- X register char *String;
- X{
- X while (*String++ == *Prefix++)
- X if (!*Prefix) return 1;
- X return 0;
- X}
- @@@End of lq-text/src/liblqtext/FileList.c
- echo x - lq-text/src/liblqtext/FilterType.c 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/FilterType.c <<'@@@End of lq-text/src/liblqtext/FilterType.c'
- X/* FilterType.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/* FilterType -- determine how to deal with a given file.
- X * Part of Liam Quin's NX-Text text retrieval package.
- X *
- X * $Id: FilterType.c,v 1.6 90/10/06 00:11:56 lee Rel1-10 $
- X *
- X * $Log: FilterType.c,v $
- X * Revision 1.6 90/10/06 00:11:56 lee
- X * Prepared for first beta release.
- X *
- X * Revision 1.5 90/09/24 21:20:31 lee
- X * changed a free() to an efree() -- the last one!
- X *
- X * Revision 1.4 90/09/20 20:07:35 lee
- X * fixed a tiny memory hole...
- X *
- X * Revision 1.3 90/08/29 21:46:35 lee
- X * Alpha release.
- X *
- X * Revision 1.2 90/08/09 19:16:18 lee
- X * BSD lint and fixes...
- X *
- X * Revision 2.2 89/10/08 20:44:34 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X *
- X */
- X
- X#include <stdio.h>
- X#include <malloc.h>
- X#include "emalloc.h"
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#include <errno.h>
- X#include <fcntl.h>
- X#include <string.h>
- X#include <ctype.h>
- X
- X#include "fileinfo.h"
- X#define FILTERDEF /* see filter.h */
- X#include "filter.h"
- X#include "wordrules.h" /* for min word length -- don't index files shorter */
- X
- X#define Prefix(pref,str) ((*(pref)==(*str))&&!strncmp(pref,str,strlen(pref)))
- X
- Xextern int open(), close();
- Xextern int read();
- Xextern int strcontains();
- X
- X/* The current filter types are:
- X * FTYPE_NEWS 1
- X * FTYPE_MAIL 2
- X * FTYPE_CDMS 3
- X * FTYPE_MOSTLYASCII 4
- X * FTYPE_C_SOURCE 5
- X */
- X
- X/* InitFilterTable might one day be called from Defaults.c....
- X * At which point, it will read an ascii file that describes the
- X * various filters, I suppose.
- X *
- X * For,now, it does nothing. It is only called once, and should return 0
- X * for success or -1 for failure.
- X */
- Xint
- XInitFilterTable()
- X{
- X return 0;
- X}
- X
- Xint
- XGetFilterType(FileInfo, StatBuf)
- X t_FileInfo *FileInfo;
- X struct stat *StatBuf;
- X{
- X int Type = MaxFilterType + 1;
- X char Buffer[1024];
- X int fd = (-1); /* initialised for lint */
- X int AmountRead = 0; /* initialised for lint */
- X int ch;
- X int Length;
- X FILE *fp = (FILE *) 0;
- X
- X /* GetFilterType() is called to determine which input filter (if any)
- X * should be used to read a given file.
- X * This routine should know about compressed files.
- X *
- X * It currently knows about mail, news and C files.
- X * There are also hooks for CDMS files (a word-processing package).
- X *
- X * If the file should not be indexed at all (e.g. it's a core dump),
- X * we return -1.
- X */
- X
- X if (!FileInfo || !FileInfo->Name || !*(FileInfo->Name)) return (-1);
- X
- X if (StatBuf->st_size < MinWordLength) return (-1);
- X
- X Length = strlen(FileInfo->Name);
- X
- X if (FileInfo->Name[Length - 1] == 'Z' && Length > 2 &&
- X FileInfo->Name[Length - 2] == '.') {
- X char *Buf = emalloc(Length + 10);
- X
- X (void) sprintf(Buf, "zcat < \"%s\"", FileInfo->Name);
- X
- X fp = popen(Buf, "r");
- X (void) efree(Buf);
- X if (fp == (FILE *) 0) {
- X return (-1);
- X }
- X }
- X
- X if (fp) {
- X if ((AmountRead = fread(fp, Buffer, sizeof(Buffer))) < MinWordLength) {
- X (void) pclose(fp);
- X fp = (FILE *) 0; /* try again with read() */
- X }
- X }
- X
- X if (!fp) {
- X if ((fd = open(FileInfo->Name, O_RDONLY, 0)) < 0) {
- X return -1;
- X }
- X if ((AmountRead = read(fd, Buffer, sizeof(Buffer)-1)) < MinWordLength) {
- X (void) close(fd);
- X return -1;
- X }
- X }
- X if (fp) {
- X (void) pclose(fp);
- X } else {
- X (void) close(fd);
- X }
- X
- X /* Check the magic table for CDMS: */
- X if ((unsigned char) Buffer[0] == 128 && Buffer[1] == 'M') {
- X if (AmountRead > 35) { /* size of CDMS file header */
- X Type = FTYPE_CDMS;
- X return (FileInfo->FilterType = Type);
- X }
- X }
- X
- X if (AmountRead < 30) {
- X register char *p = Buffer;
- X
- X /* who cares if it's this small? */
- X for (; p - Buffer < AmountRead; p++) {
- X if (!isascii(*p)) {
- X return (-1);
- X }
- X }
- X return 0;
- X }
- X
- X /* Not cdms -- try news/mail;
- X * mail files start with From;
- X * news starts with From, Path or Relay-Version
- X */
- X if (isupper(Buffer[0])) {
- X Buffer[AmountRead] = '\0';
- X AmountRead--;
- X if (Prefix("Xref: ", Buffer)) {
- X return (FileInfo->FilterType = FTYPE_NEWS);
- X } else if (Prefix("Newsgroups: ", Buffer)) {
- X return (FileInfo->FilterType = FTYPE_NEWS);
- X } else if (Prefix("Relay-Version: ", Buffer)) {
- X return (FileInfo->FilterType = FTYPE_NEWS);
- X } else if (Prefix("From", Buffer)) {
- X if (strcontains("\nPath: ", Buffer)) {
- X /* bug: should only check header, not body! */
- X return FTYPE_NEWS;
- X } else {
- X return FTYPE_MAIL;
- X }
- X } else if (Prefix("Path: ", Buffer)) {
- X if (strcontains("\nNewsgroups: ", Buffer)) {
- X return FTYPE_NEWS;
- X } else {
- X return FTYPE_MAIL;
- X }
- X } else if (Prefix("Return-Path: ", Buffer)) {
- X return FTYPE_MAIL; /* MH-style mail */
- X }
- X }
- X
- X /* look for C, trying not to get muddled up with shell scripts */
- X ch = FileInfo->Name[Length - 1];
- X
- X if ((ch == 'c' || ch == 'h') && (Length > 2) &&
- X FileInfo->Name[Length - 2] == '.') {
- X /* We could require one of
- X * . a comment
- X * . a #[ ^i]*(include|define|ifn?def|if)[ ^i]+
- X * . main[ ^i\n]*(
- X * . a declaration -- int, char, long, unsigned, static
- X * in the first block of the file.
- X * Can't be bothered today.
- X */
- X if (strcontains("#line", Buffer)) {
- X return (-1); /* preprocessed already, index the original! */
- X /* we ought to say why we are not indexing it! */
- X }
- X
- X /* we are very predisposed to thinking of this as C... */
- X if (Prefix("#include", Buffer) ||
- X strcontains("/*", Buffer) ||
- X strcontains("#define", Buffer) ||
- X strcontains("argc", Buffer) ||
- X strcontains("()", Buffer) ||
- X strcontains("#include", Buffer)) {
- X return FTYPE_C_SOURCE;
- X }
- X }
- X
- X /* if still not done, choose between Don't Index and Ascii Filter
- X * (which simply strips non-ascii characters).
- X */
- X if (Type >= MaxFilterType) {
- X register char *p;
- X int AsciiCount = 0;
- X int OtherCount = 0;
- X
- X for (p = Buffer; p - Buffer < AmountRead; p++) {
- X if (isascii(*p)) AsciiCount++;
- X else OtherCount++;
- X if (!*p) {
- X /* If it has nulls in it, it isn't a normal file,
- X * and we have no idea what to do with it!
- X * (if we did know, it would have had a magic number,
- X * so we wouldn't have got here)
- X */
- X Type = (-1);
- X break;
- X }
- X if (Type > 0) {
- X if (AsciiCount > OtherCount * 5) {
- X Type = (OtherCount) ? FTYPE_MOSTLYASCII : 0;
- X } else {
- X Type = (-1); /* too much garbage */
- X }
- X }
- X }
- X }
- X
- X if (Type > MaxFilterType) Type = -1; /* don't index */
- X return Type;
- X}
- @@@End of lq-text/src/liblqtext/FilterType.c
- echo x - lq-text/src/liblqtext/Makefile 1>&2
- sed 's/^X//' >lq-text/src/liblqtext/Makefile <<'@@@End of lq-text/src/liblqtext/Makefile'
- X# Makefile for LQ-Text, a full text retrieval package by Liam R. Quin
- X#
- X# This Makefile belongs in the src/liblqtext directory.
- X# Note that most of the actual configuration is done in ../Makefile and
- X# in ../h/global.h, and not here. This file is for representing the
- X# dependancies between source components and specifying the steps
- X# required to build the library $(DESTDIR)/$(TEXTLIB)
- X#
- X# $Id: Makefile,v 1.3 90/10/06 00:06:22 lee Rel1-10 $
- X#
- X# $Log: Makefile,v $
- X# Revision 1.3 90/10/06 00:06:22 lee
- X# deleted mkdep output.
- X#
- X# Revision 1.2 90/09/29 23:48:33 lee
- X# does cmp on the right file now...
- X#
- X# Revision 1.1 90/08/09 19:17:07 lee
- X# Initial revision
- X#
- X#
- X#
- X
- XPWD=liblqtext
- X
- XTEXTLIB=liblqtext.a
- XLIAMLIB=liblq.a
- XDESTDIR=../lib
- XRANLIB=ranlib
- XTEXTLINTLIB=llib-llqtext.ln
- XLIAMLINTLIB=llib-llq.ln
- XLINT=lint
- XLINTFLAGS=-a -b -c -h -x
- X
- XEXTRA=-I../h
- X
- Xall: $(DESTDIR)/$(TEXTLIB) $(DESTDIR)/$(LIAMLIB)
- X
- Xinstall: all
- X
- Xlint: $(DESTDIR)/$(TEXTLINTLIB) $(DESTDIR)/$(LIAMLINTLIB)
- X -echo The lint libraries are up to date.
- X
- X$(DESTDIR)/$(TEXTLINTLIB): $(TEXTLINTLIB)
- X mv $(TEXTLINTLIB) $(DESTDIR)/$(TEXTLINTLIB)
- X
- X$(DESTDIR)/$(LIAMLINTLIB): $(LIAMLINTLIB)
- X mv $(LIAMLINTLIB) $(DESTDIR)/$(LIAMLINTLIB)
- X
- XNDBMCFILES=
- XNDBMOFILES=
- X
- X## keep all of the following consistent: ###################################
- X
- XTEXTOBJS = WordInfo.o DocPath.o Defaults.o FileList.o Phrase.o Root.o \
- X numbers.o pblock.o smalldb.o system.o FilterType.o \
- X asciitrace.o $(NDBMOFILES)
- X
- XTEXTSRC = DocPath.c Defaults.c FileList.c Phrase.c Root.c WordInfo.c \
- X malloc.c numbers.c pblock.c smalldb.c system.c FilterType.c \
- X asciitrace.c $(NDBMCFILES)
- X
- XLIAMOBJS = malloc.o progname.o cmdname.o
- XLIAMSRC = malloc.c progname.c cmdname.c
- X
- X## end of mutually related stuff ##########################################
- X
- X$(TEXTLINTLIB): $(TEXTSRC)
- X $(LINT) -Clqtext $(LINTFLAGS) $(CFLAGS) $(TEXTSRC)
- X
- Xsaber_src:
- X #cd $(PWD)
- X #load $(CFLAGS) $(TEXTSRC) $(LIAMSRC)
- X #cd ..
- X
- Xsaber_obj:
- X #cd $(PWD)
- X #load $(CFLAGS) $(TEXTOBJS) $(LIAMOBJS)
- X #cd ..
- X
- X$(LIAMLINTLIB): $(LIAMSRC)
- X $(LINT) -Clq $(LINTFLAGS) $(CFLAGS) $(LIAMSRC)
- X
- X$(DESTDIR)/$(TEXTLIB): $(TEXTLIB)
- X -test -d $(DESTDIR) || mkdir $(DESTDIR)
- X -test -f $(DESTDIR)/$(TEXTLIB) || cp /dev/null $(DESTDIR)/$(TEXTLIB)
- X -( cmp $(TEXTLIB) $(DESTDIR)/$(TEXTLIB) || cp $(TEXTLIB) $(DESTDIR) )
- X -/bin/rm -f $(TEXTLIB)
- X
- X$(DESTDIR)/$(LIAMLIB): $(LIAMLIB)
- X -test -d $(DESTDIR) || mkdir $(DESTDIR)
- X -test -f $(DESTDIR)/$(LIAMLIB) || cp /dev/null $(DESTDIR)/$(LIAMLIB)
- X -( cmp $(LIAMLIB) $(DESTDIR)/$(LIAMLIB) || cp $(LIAMLIB) $(DESTDIR) )
- X -/bin/rm -f $(LIAMLIB)
- X
- X$(TEXTLIB): $(TEXTOBJS)
- X rm -f $(TEXTLIB)
- X ar rv $(TEXTLIB) $(TEXTOBJS)
- X $(RANLIB) $(TEXTLIB)
- X
- X$(LIAMLIB): $(LIAMOBJS)
- X rm -f $(LIAMLIB)
- X ar rv $(LIAMLIB) $(LIAMOBJS)
- X $(RANLIB) $(LIAMLIB)
- X
- Xtidy:
- X /bin/rm -f *.o core
- X
- Xclean: tidy
- X /bin/rm -f $(TARGETS) $(TEST)
- X
- Xdepend:
- X mkdep $(CFLAGS) *.c
- X
- X# DO NOT DELETE THIS LINE -- mkdep uses it.
- X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
- X
- X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
- @@@End of lq-text/src/liblqtext/Makefile
- echo end of part 03
- --
- Liam R. E. Quin, lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337
-