home *** CD-ROM | disk | FTP | other *** search
Wrap
#!/usr/sbin/perl # # Acknowledgements # # Thanks to Guy Brooker (guy@jw.estec.esa.nl) for his AA interface, # which was the starting point for this program. # # Paul Clark # paul@cs.arizona.edu # # Modifications # # 2/22/94 Version 1.0, shell script version Paul Clark # 4/21/94 Version 1.1, multiple archives support Paul Clark # 4/22/94 Version 1.2, perl script Paul Clark # 8/04/94 Version 1.2b, WAIS clone Brian Behlendorf # 8/05/94 1.2b1, bug fixes Brian Behlendorf # **** **** **** **** CONFIGURABLE VARIABLES **** **** **** **** $HTTPD_HOME="/wired/www.tools/glimpse/" ; $GLIMPSE_LOC="/wired/www.tools/glimpse/bin/glimpse" ; $indexdir="/wired/www.tools/glimpse/indices"; $serverroot="/wired/www/"; $titledb="/wired/www.tools/db/files/titles"; # **** **** **** **** NO CONFIGURATION NEEDED BELOW **** **** **** **** # open the title DBM file dbmopen (%TTLDB, "$titledb", 0664); $path = $ENV{'PATH_INFO'}; #if ( m|^/([0-9][0-9]*)(.*)$| ) { # $script = $1; # $path = $2; #}# else { # &err_nopathinfo; #} ($ENV{'HOME'} = $indexdir) || &err_cantcwd; # Ensure that Glimpse is available on this machine -x $GLIMPSE_LOC || &err_noglimpse ; # Ensure that index is available -r "$indexdir/.glimpse_index" || &err_noindex ; # To support an ISINDEX type search, set query string if given # an argument on the command line $prefix="whole=on&case=off&query=" if ( $#ARGV >= 0 ); # Check that a query has been made ($query = $ENV{'QUERY_STRING'}) || &err_newquery ; # Strip the variables out from the query string, # and assign them into variables, prefixed by 'QS_' $query =~ s/%0D//g; $query =~ s/%20/+/g; @qvars = split( /\&/, $prefix . $query ); foreach (@qvars) { split(/=/); $fname = $_[0]; $fvalue = $_[1]; $cmd = "\$QS_$fname = \"$fvalue\";" ; $cmd = eval $cmd if ( $fname =~ /^[a-z_A-Z]\w*$/ ); } #chop $QS_query; $OPT_errors="-$QS_errors" if $QS_errors =~ /^[0-8]$/; $OPT_errors="-B -y" if $QS_errors =~ /^Best\+match$/; $OPT_case="-i" unless $QS_case =~ /^on$/; $OPT_whole="-w" unless $QS_whole =~ /^on$/; $OPT_filter="-F \"$path\"" if $path; print "Content-type: text/html\n\n" ; print "<HEAD><TITLE>Glimpse response for query \"$QS_query\"\n"; print "</TITLE></HEAD><BODY>\n"; ($html_query = $QS_query) =~ s/\+/ /g; print "<H1>Glimpse search for \"$html_query\"</H1><HR>\n"; @args = split(/\+/, $QS_query); while (@args) { $tmp = shift(@args); $tmp =~ s/^OR/,/g; $tmp =~ s/^AND/;/g; $total_query = "$total_query$tmp"; } #print "You're looking for: $total_query"; if (index($total_query, "\|") > 0) { if ($OPT_whole eq "-w" ) { $OPT_whole = ""; print "<P>Substring searches not supported with the OR operation, for some reason.</B>"; } } chdir $indexdir; $cmd = "$GLIMPSE_LOC -y -c $OPT_case $OPT_whole $OPT_errors -H . " . "$OPT_filter \'$total_query\' 2>&1 |"; ($html_cmd = $cmd) =~ s/>/>/g; print "<P><!-- Command = \"$html_cmd\" -->"; open(GOUT, $cmd ); while (<GOUT>) { s/&/\&/g; s/</\</g; s/>/\>/g; ( /^([^ ]*):\s*([0-9][0-9]*)/) || next; $file = $1; $count = $2; ($a, $b, $c, $d, $e, $f, $g, $length, @i) = stat($file); $file = substr($file, length($serverroot)); $file =~ s/^\/*/\//g ; $rel = $count*1000/$length; $files{$rel}=$file; } ($scale, @rest) = (reverse sort(keys %files)); $scale = 1000/$scale if ($scale); print "<dl>\n"; $count = 0; foreach $key (reverse sort(keys %files)) { $count++; $cleankey = int($key*$scale); $title = &get_title($files{$key}); print "<dt><B>", $count, ":</B> <a href=\"", $files{$key}, "\">", $title, "</a>\n"; print "<dd>Score: <B>", $cleankey, "</B>\n"; } print "</DL>\n"; print "<B>No items found. Either that word doesn't appear anywhere or it's so common (like \"an\" or \"the\") it doesn't get indexed.</B><P>" if ($count < 1); print "<HR>" ; print "Many thanks to <a href=\"http://glimpse.cs.arizona.edu:1994/\">Glimpse</a>\n"; print "</BODY>\n" ; close(GOUT); sub diag_exit { # exit on error exit 1; } sub err_newquery { # The script was called without a query. # Provide an ISINDEX type response for browsers # without form support. print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>Glimpse Search of Wired's Web Site</TITLE> </HEAD> <BODY> EOM $path = "/" unless $path; ($foo = "$path/index.html") =~ s/\/\//\//g; ($namepath = &get_title($foo)) =~ s/index.html//g; print "<H2>Search Within \"$namepath\"</H2>\n"; print "<FORM ACTION=\"/cgi-bin/aglimpse$path\">\n"; print "If your browser does not support forms, <a href=\"/cgi-bin/aglimpsenf$path\">use the form-free version.</a>\n"; print <<'EOM' ; <P>You can enter one search term or a logical combination of two or more terms.<BR> For example: <B>"Wired AND Digital"</B> or <B>"Barney OR TV"</B>. <P> <INPUT NAME="query" size=80> <P> <INPUT NAME="case" TYPE="checkbox">Make this search case sensitive<BR> <INPUT NAME="whole" TYPE="checkbox">Look for this word (these words) within other words <P> You can also select specify a range of error in your search terms. For example, with an error range set to 1, a search for "turkey" can also return "turnkey". One can also search for a "best match", meaning it'll look for the word with 0 errors, then 1 error, then 2 errors, etc. Usually ranges of 1 or 2 are useful for common spelling errors. <P> <SELECT NAME="errors"> <OPTION>0 <OPTION>1 <OPTION>2 <OPTION>3 <OPTION>4 <OPTION>5 <OPTION>6 <OPTION>7 <OPTION>8 <OPTION>Best match </SELECT> <P> <INPUT TYPE="submit" VALUE="Begin Search"> <INPUT TYPE="reset" VALUE="Reset Values"> EOM if ($path) { print "<P>You are currently searching below the \"$namepath\" directory.\n"; $path =~ (s/;/\//g); $cmd = "ls -l $serverroot$path | grep \"^d\" |"; open (LIST, $cmd); $titlcount = 0; while (<LIST>) { print "You can restrict your search by clicking on one of these subdirectories:\n\n<UL>\n" unless ($titlcount++); @sub = split(/\s+/); $tmp2 = $sub[8]; ($tmp3 = "$serverroot$path$tmp2/index.html") =~ s/\/+/\//g; if (-e "$tmp3") { $dirname = &get_title("$path$tmp2/index.html") } $dirname = "$path$tmp2" unless ($dirname); $dirname =~ s/\/index.html//g; print "<LI><a href=\"/cgi-bin/aglimpse$path$tmp2/\">$dirname</a>\n"; $dirname = ""; } close (LIST); print "</UL>\n"; } print "</FORM></BODY>\n"; print "Many thanks to <a href=\"http://glimpse.cs.arizona.edu:1994/\">Glimpse</a>\n"; &diag_exit; } sub err_noglimpse { # # Glimpse was not found # Report a useful message # print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>Glimpse not found</TITLE> </HEAD> <BODY> <H1>Glimpse not found</H1> This gateway relies on <CODE>Glimpse</CODE> search tool. If it is installed, please set the correct path in the script file. Otherwise obtain the latest version from <A HREF="file://ftp.cs.arizona.edu/glimpse">ftp.cs.arizona.edu</A> </BODY> EOM &diag_exit; } sub err_noindex { # Glimpse index was not found # Give recommendations for indexing print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>Glimpse Index not found</TITLE> </HEAD> <BODY> <H1>Glimpse Index not found</H1> Glimpse cannot proceed without index. Please check if the directory being searched is indexed by <code>glimpseindex</code>. </BODY> EOM &diag_exit; } sub print_it { print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>Glimpse Index not found</TITLE> </HEAD> <BODY> <H1>Glimpse Index not found</H1> You were looking for [$1]. by <code>glimpseindex</code>. </BODY> EOM # &diag_exit; } sub err_noquery { # No path_info was there. print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>No search terms submitted</TITLE> </HEAD> <BODY> <H1>No search terms submitted</H1> Please enter a term to search for. <ISINDEX ACTION="/cgi-bin/aglimpse"> </BODY> EOM &diag_exit; } sub err_cantcwd { # No path_info was there. print <<'EOM' ; Content-type: text/html <HEAD> <TITLE>Error: index path incorrect</TITLE> </HEAD> <BODY> <H1>Index Path Incorrect</TITLE> Configuration error on the server side. <ISINDEX ACTION="/cgi-bin/aglimpse"> </BODY> EOM &diag_exit; } sub get_title { # get the title of an HTML doc $gfile = shift(@_); ($otherfile = "$serverroot/$gfile") =~ s/\/+/\//g; $G = $TTLDB{$otherfile}; if ($G) { return $G; } else { return $gfile; } }