home *** CD-ROM | disk | FTP | other *** search
- /*
- * waislook.c
- *
- * WAIS search driver. Based on John Franks' WAISGN program.
- *
- * (C) Copyright 1994 The University Court of the University of Edinburgh
- * (C) Copyright 1994 John Franks
- *
- * Author: Chris Adie <C.J.Adie@ed.ac.uk>
- *
- */
-
- /******************************************************************************/
- /* INCLUDE FILES */
- /******************************************************************************/
-
- #include <stdio.h>
- #include <io.h>
- #include <ctype.h>
- #include <string.h>
- #include <math.h>
- #include "cutil.h"
- #include "irext.h"
- #include "irsearch.h"
- #include "docid.h"
- #include "irtfiles.h"
- #include "waisgn.h"
- #include "version.h"
-
- /******************************************************************************/
- /* CONSTANT DEFINITIONS */
- /******************************************************************************/
-
- /* This section deliberately left blank */
-
- /******************************************************************************/
- /* MACRO FUNCTION DEFINITIONS */
- /******************************************************************************/
-
-
- /******************************************************************************/
- /* TYPE DEFINITIONS */
- /******************************************************************************/
-
- /* This section deliberately left blank */
-
- /******************************************************************************/
- /* GLOBAL VARIABLES AND FUNCTIONS IMPORTED */
- /******************************************************************************/
-
- /* The following are defined in irsearch.c */
- extern boolean search_for_words(char *words,database *db,long doc_id,char *words_used);
- extern long next_best_hit(hit *besthit,database *db);
- extern char FileTypeFromTable(char *FileName);
-
- /******************************************************************************/
- /* GLOBAL VARIABLES EXPORTED */
- /******************************************************************************/
-
- /* char *log_file_name = NULL; */
-
- /******************************************************************************/
- /* VARIABLES PRIVATE TO THIS FILE */
- /******************************************************************************/
-
- static char host[MIDLEN] = "";
- static char dbname[MIDLEN] = "index";
- static char gntype[SMALLLEN] = "7w";
- static char words[MAXLEN] = "";
- static char gntitle[MAXLEN] = "";
- static char port[SMALLLEN] = "";
- static char virtpath[MAXLEN] = "";
-
- static FILE *dfp = stderr;
-
- static FILE *logfile = NULL;
-
- static enum {interactive, http, gopher} mode = interactive;
-
- static int usedir = FALSE;
- static int userange = FALSE;
- static int is0h = FALSE;
- static int debug = FALSE;
-
- static double MaxRawScore = 0.0;
-
- /******************************************************************************/
- /* FUNCTIONS PRIVATE TO THIS FILE */
- /******************************************************************************/
-
- /*
- * Change backslashes in the path to forward slashes, and then apply the URL
- * character escaping rules.
- */
- static void EscapeURL(char *path) {
- char *cp;
- char *cp2;
- char buf[BUFSIZE];
-
- /* Change backslashes to forward slashes */
- cp = path;
- while (*cp) {
- if (*cp=='\\') {
- *cp = '/';
- }
- cp++;
- }
- /* Apply character escaping rules */
- cp = path;
- cp2 = buf;
- while ( *cp ) {
- switch (*cp) {
- case ',':
- case ';':
- case '"':
- case '\'':
- case '&':
- case '=':
- case '(':
- case ')':
- case '{':
- case '}':
- case '%':
- case ' ':
- sprintf( cp2, "%%%X", (int) *cp);
- cp2 += 3;
- cp++;
- break;
- default:
- *cp2++ = *cp++;
- }
- }
- *cp2 = '\0';
- strcpy(path,buf);
- }
-
-
- /*
- * For a particular match, produce output according to the selected protocol.
- */
- static void doline(hit * match) {
- int score;
- int size;
- char *cp,
- *relpath,
- type1,
- typebuf[MIDLEN],
- pathbuf[MAXLEN],
- relpathbuf[MAXLEN],
- name[MAXLEN];
-
- strcpy( pathbuf, match->filename);
- relpath = pathbuf;
-
- strcpy( relpathbuf, relpath);
-
- strcpy( name, match->headline);
- cp = name;
- while ( *cp) {
- if ( isspace(*cp))
- *cp = ' ';
- cp++;
- }
-
- if (MaxRawScore>0.0) {
- score = (int)((match->weight / MaxRawScore) * 1000.0);
- } else {
- score = 0;
- }
-
- size = match->end_character-match->start_character;
-
- if ( userange) {
- type1 = '0';
- sprintf( typebuf, "R%ld-%ld-%range",
- match->start_character, match->end_character);
- }
-
- else if ( *(match->type) == 'D') { /* DVI type */
- type1 = '9';
- strcpy( typebuf, "9");
- }
- else if ( is0h) { /* 0h type */
- type1 = '0';
- strcpy( typebuf, "0h");
- }
- else if ( usedir) { /* Return directory containing file */
- type1 = '1';
- strcpy( typebuf, "1");
- }
- else {
- type1 = '0';
- strcpy( typebuf, "0");
- if (mode==gopher) {
- type1 = FileTypeFromTable(relpathbuf);
- typebuf[0] = type1;
- typebuf[1] = '\0';
- }
- }
-
- if (mode==http) {
- EscapeURL(relpathbuf);
- printf("<li> <a href=\"http://%s:%s", host, port);
- if (virtpath[0]!='\0') {
- printf("/%s",virtpath);
- }
- printf("/%s\">%s</a><BR>(Score=%d, Size=%d)\n", relpathbuf, name, score, size);
- } else
- if (mode==gopher) {
- printf("%c%s\t%s\\%s\t%s\t%s\r\n",type1, name, typebuf, relpathbuf, host, port);
- } else {
- printf("%s %s\n",relpathbuf,name);
- }
- }
-
-
-
- int senderr(char *msg) {
- if (mode==http) {
- printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", msg);
- printf( "<BODY><HR><H2>%s</H2>\n", msg );
- printf( "Sorry, an error has occurred in");
- printf( " the WAIS index search.\n<HR></BODY>\n");
- } else
- if (mode==gopher) {
- printf( "3Server error: %s\t\terror.host\t0\r\n.\r\n", msg);
- } else {
- printf("An error has occurred in the search: %s\n",msg);
- }
- return 0;
- }
-
-
- static void httpintro(void) {
- printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
- printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
- printf( "The following items were returned as matches\n");
- printf( "for <B>`%s'</B> by the WAIS index search.\n", words);
- printf( "They are ordered with the best matches first.\n");
- printf( "<P>\n<UL>\n", words);
- }
-
-
- static void toobad(char *CatalogURL) {
- printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
- printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
- printf( "Sorry, no matches for <B>`%s'</B> were returned\n", words);
- printf( "by the WAIS index search. You may try again with\n");
- printf( "different search terms.\n <ISINDEX> \n");
- printf( "<p>The <a href=\"%s\">catalog of this WAIS database</a>\n",CatalogURL);
- printf( "may be helpful.</BODY>\n");
- }
-
- /*
- * This function is based very loosely on a function of the same
- * name in Don Gilbert's Go_Ask_WAIS utility. I am very grateful
- * for the help in dealing with WAIS that his routine has provided
- * provided and for his kind permission to use it here. Any errors are
- * mine and not Don's. JMF
- */
- static void AskWais(char *SearchWords,int maxhits) {
- database *db;
- long i;
- query_parameter_type parameters;
- boolean searchResult;
- hit theHit;
- char CatalogURL[MAXLEN];
- char *p;
-
- if ( debug)
- fprintf( dfp, "Opening data base %s\n", dbname);
- strcpy(CatalogURL, dbname);
- strcat(CatalogURL, dictionary_ext);
- if (_access(CatalogURL, 0) == -1) {
- senderr( "The database does not exist\n");
- exit( 2);
- }
- if ( (db = openDatabase(dbname, false, true)) == (database *) NULL) {
- senderr( "Failed to open database\n");
- exit( 2);
- }
-
- parameters.max_hit_retrieved = ((maxhits > 0) ? maxhits : 256);
- set_query_parameter(SET_MAX_RETRIEVED_MASK, ¶meters);
- searchResult = search_for_words(SearchWords, db, 0, NULL);
-
- /* Initialise the maximum raw hit weight */
- MaxRawScore = 0.0;
-
- if (searchResult == true) {
- finished_search_word(db);
- if ( debug)
- fprintf( dfp, "Dbase search successful\n");
-
- if (next_best_hit(&theHit, db) != 0) {
- if ( debug)
- fprintf( dfp, "Headline = %s\n",
- theHit.headline);
- if (mode==http) {
- /* Generate the (relative) URL of the catalog. */
- p = strrchr(dbname,'\\');
- if (p==NULL) p = dbname; else p++;
- strncpy(CatalogURL,p,sizeof(CatalogURL));
- strncat(CatalogURL,".cat",sizeof(CatalogURL));
- EscapeURL(CatalogURL);
- toobad(CatalogURL);
- } else
- if (mode==gopher) {
- printf( ".\r\n");
- } else {
- printf("No match found\n");
- }
- finished_best_hit(db);
- closeDatabase(db);
- return;
- }
-
- if (mode==http) {
- httpintro();
- }
-
- i = 1;
- do {
- if (theHit.weight > 0) {
- if (MaxRawScore<=0.0) {
- MaxRawScore = theHit.weight;
- }
- doline(&theHit);
- }
- i++;
- } while ( i < parameters.max_hit_retrieved &&
- (next_best_hit(&theHit, db) == 0));
-
- if (mode==http) {
- printf( "</ul>\n<P>You may repeat your search with\n");
- printf( "a new search term. <P> <ISINDEX></BODY>\n");
- }
- } else {
- senderr( "The database search failed.");
- exit( 2);
- }
- finished_best_hit(db);
- closeDatabase(db);
- return;
- }
-
-
- /******************************************************************************/
- /* GLOBAL FUNCTIONS EXPORTED */
- /******************************************************************************/
-
- /*
- * The main program.
- */
- int main(int argc,char *argv[]) {
- char *cp;
- int i;
-
- if (argc<=1) {
- /* No arguments */
- printf("Usage: %s [-d dbname] [-h host] [-p port] [-debug]\n",argv[0]);
- printf(" [-http|-gopher] [-t title] [-q virtpath] [-v] search words ...\n");
- exit(0);
- }
-
- /* Collect the arguments */
- for (i=1;i<argc;i++) {
-
- if (strcmp(argv[i],"-debug")==0) {
- debug = TRUE;
- } else
- if (strcmp(argv[i],"-h")==0) {
- strncpy(host,argv[++i],sizeof(host));
- } else
- if (strcmp(argv[i],"-p")==0) {
- strncpy(port,argv[++i],sizeof(port));
- } else
- if (strcmp(argv[i],"-d")==0) {
- strncpy(dbname,argv[++i],sizeof(dbname));
- /* Strip off trailing extension if any */
- cp = strrchr(dbname,'\\');
- if (cp==NULL) {
- cp = dbname;
- }
- cp = strrchr(cp,'.');
- if (cp!=NULL) {
- *cp = '\0';
- }
- } else
- if (strcmp(argv[i],"-t")==0) {
- strncpy(gntitle,argv[++i],sizeof(gntitle));
- } else
- if (strcmp(argv[i],"-v")==0) {
- #ifdef WIN32
- printf("%s\n",VERWIN32);
- if (argc == 2)
- exit(0);
- #endif
- } else
- if (strcmp(argv[i],"-http")==0) {
- mode = http;
- } else
- if (strcmp(argv[i],"-gopher")==0) {
- mode = gopher;
- } else
- if (strcmp(argv[i],"-q")==0) {
- strncpy(virtpath,argv[++i],sizeof(virtpath));
- } else
- if (argv[i][0]=='-') {
- printf("Unknown option %s\n",argv[i]);
- exit(0);
- } else {
- /* Remaining arguments are the words to search for. */
- *words = '\0';
- while( i < argc) {
- strncat(words, argv[i], sizeof(words));
- if ( i < argc - 1 ) {
- /* Single space between words */
- strncat(words, " ",sizeof(words));
- }
- i++;
- }
- }
-
- }
-
- if ( debug) {
- fprintf( dfp, "Database: %s\n", dbname);
- if (*host) fprintf( dfp, "Host: %s\n", host);
- if (*port) fprintf( dfp, "Port: %s\n", port);
- fprintf( dfp, "Type: %s\n", gntype);
- if (*gntitle) fprintf( dfp, "Title: %s\n", gntitle);
- switch (mode) {
- case interactive: cp = "interactive"; break;
- case http: cp = "http"; break;
- case gopher: cp = "gopher"; break;
- default: cp = "unknown";
- }
- fprintf( dfp, "Protocol: %s\n", cp);
- fprintf( dfp, "Search term: %s\n", words);
- }
-
- /*
- * gntype is "7w", "7wr", or "7wh" according to whether
- * it is plain text, a range, or type 0h.
- * If "7w" has a 'd' appended it means that instead
- * of returning the selector of the file matched, the selector of
- * the directory containing it should be returned. It can also
- * be "7wd" indicating that the search should return
- * the directory containing the matching item rather than the
- * item itself.
- */
-
- switch ( gntype[2]) {
- case 'd':
- usedir = TRUE;
- break;
- case 'r':
- userange = TRUE;
- break;
- case 'h':
- is0h = TRUE;
- break;
- }
-
- AskWais( words, MAXHITS_RETURNED);
-
- return 0;
- }
-
-
-
-