home *** CD-ROM | disk | FTP | other *** search
- #!/bin/sh
- perl=/usr/local/etc/architext/perl
- eval "exec $perl -x $0 $*"
- #!perl
-
- ## Copyright Architext Software, 1994 (c)
- ##
- ## This CGI script allows users to index Architext databases
- ## through a WWW interface. Naturally, there are plenty of security
- ## concerns associated with this scheme.
- ##
- ## This script appears as several different web pages, depending on
- ## its invocation. If invoked with a 'db=<database>' argument, the
- ## script prints out the configuration information for a particular
- ## database and then allows the user to change it. If
- ## invoked with 'Index=Index' and 'dbname=<database>', it will ininitialize
- ## a database which has an existing database configuration file.
- ## Without any of these arguments, it displays a list of currently
- ## existing database.conf files, and allows the user to choose a database
- ## to index.
- $| = 1; ## don't buffer output
-
- $root = "/usr/local/etc/architext";
-
- die "Invalid root directory '$root'\n" unless -d $root;
-
- unshift(@INC, "$root/perllib");
- require 'architext.pl';
- require 'architextConf.pl';
-
- %form = &Architext'readFormArgs;
- %attr = &ArchitextConf'readConfig("$root/Architext.conf", $form{'db'});
-
- if (&Architext'remoteMode($root)) {
- $helppath = &Architext'helpPath();
- } else {
- $helppath = $attr{'ArchitextURL'};
- }
-
- $scriptname = "AT-index.cgi";
-
- ## check for password, if one is specified in Architext.conf
- ## if it doesn't appear as a form arg, present password page
- &Architext'password($attr{'ArchitextURL'},
- $scriptname,
- $attr{'Password'},
- $form{'password'});
-
- $attr{'Password'} =~ tr/a-zA-Z/n-za-mN-ZA-M/;
- $postpass =
- "<INPUT TYPE=\"hidden\" NAME=\"password\" VALUE=\"$attr{'Password'}\">" if
- ($attr{'Password'});
- $getpass = "?password=$attr{'Password'}" if $postpass;
-
- if ($form{'Stop'}) {
- &Architext'printHeader($attr{'ArchitextURL'},
- "Stop Indexing: $form{'db'}");
- $pidfile = "$root/collections/$form{'db'}.pid";
- if (! -e $pidfile) {
- &Architext'exitError($attr{'ArchitextURL'},
- "<b>You tried to stop an indexing process that wasn't running</b>");
- }
- open(PID, "$pidfile");
- while (<PID>) {
- $pid = $_;
- }
- close(PID);
- kill 'TERM', $pid;
- unlink($pidfile);
- open(TERM, ">$root/collections/$form{'db'}.term");
- close(TERM);
- print "<p> <b>Indexing process number $pid has been terminated.</b>\n";
- print <<EOF;
- <p>
- <FORM ACTION="AT-admin.cgi" METHOD=POST>
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'db'}">
- <INPUT TYPE="submit" NAME="Admin" VALUE="Admin">
- Go back to the admin page for this collection.
- $postpass
- </FORM>
- EOF
- ;
- &Architext'Copyright($attr{'ArchitextURL'});
- exit(0);
- }
-
- if ($form{'db'}) {
- ## Print out configuration options for an already-existing
- ## database so the user knows what will happen upon indexing
-
- ## Dump our standard header
- &Architext'printHeader($attr{'ArchitextURL'},
- "Collection Indexing: $form{'db'}");
-
- ## Check for the desired database.conf file.
- $dbconfig = $root . "/collections/" . $form{'db'} . ".conf";
- if (! -r $dbconfig) {
- &Architext'exitFileError($attr{'ArchitextURL'},
- $dbconfig, "does not exist"); }
-
- ## Read all the configuration information.
- ##%attr = &ArchitextConf'readConfig("$root/Architext.conf", $form{'db'});
-
- print "<p> The collection you have chosen has the ";
- print "following characteristics:\n";
- ## Print the form.
-
- ## This form is just a little list of the options.
- &printForm($form{'db'});
- &Architext'Copyright($attr{'ArchitextURL'});
-
- } elsif ($form{'Index'}) {
- ## Assuming all the relevant configuration options are specified
- ## as form arguments, this mode actually starts the indexing .
-
- ## Header again
- &Architext'printHeader($attr{'ArchitextURL'},
- "Collection Indexing: $form{'dbname'}");
-
- if (!$form{'dbname'}) {
- &Architext'exitError($attr{'ArchitextURL'},
- "Index specified with no dbname."); }
-
- ## Can we read the db.conf file?
- $dbconfig = $root . "/collections/" . $form{'dbname'} . ".conf";
- if (! -e $dbconfig && ! -r $dbconfig) {
- &Architext'exitFileError($attr{'ArchitextURL'},
- $dbconfig,
- "does not exist or is not readable.");
- }
-
- if (-e "$root/collections/$form{'dbname'}.pid") {
- print <<EOF;
- <p><b>An indexing process is already in progress for this collection.</b>
- <FORM ACTION="AT-index.cgi" METHOD=POST>
- <INPUT TYPE="submit" NAME="Stop" VALUE="Stop Indexing">
- Stop the indexing process that is currently running on this collection.
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'dbname'}">
- $postpass
- </FORM>
- EOF
- ;
- &Architext'Copyright($attr{'ArchitextURL'});
- exit(0);
- }
-
- ## tell the user it started the process.
- ## variables below tell user how to access log file and prog
- ## progress file from browser, and tell indexer exactly where
- ## to put them.
- $logname = "AT-$form{'dbname'}.log";
- $progname = "AT-$form{'dbname'}.prog";
- $errname = "AT-$form{'dbname'}.err";
- $realpath = "$root/collections";
- $urlpath = $attr{'ArchitextURL'};
- print "<H2>Indexing initiated.</H2>\n";
- print "<p><hr>\n";
- if ($getpass) {
- $getpass .= "&";
- } else {
- $getpass = "?";
- }
- $getpass .= "db=$form{'dbname'}";
- print <<EOF;
- <b>Next steps:</b>
- <p>While you are waiting for the indexing process to complete, you can:
- <p> <FORM ACTION="AT-generate.cgi" METHOD=POST>
- <INPUT TYPE="submit" NAME="Status" VALUE="View Logs">
- View the log files created by the indexing process.
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'dbname'}">
- $postpass
- </FORM>
- <p>
- <FORM ACTION="AT-index.cgi" METHOD=POST>
- <INPUT TYPE="submit" NAME="Stop" VALUE="Stop Indexing">
- Stop the indexing process that was just initiated.
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'dbname'}">
- $postpass
- </FORM>
- <p> <FORM ACTION="AT-generate.cgi" METHOD=POST>
- <INPUT TYPE="submit" NAME="Generating" VALUE="Generate">
- Generate a search page for this collection.
- $postpass
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'dbname'}">
- </FORM>
- <p>
- <FORM ACTION="AT-admin.cgi" METHOD=POST>
- <INPUT TYPE="hidden" NAME="db" VALUE="$form{'dbname'}">
- <INPUT TYPE="submit" NAME="Admin" VALUE="Admin">
- Go back to the admin page for this collection.
- $postpass
- </FORM>
-
- <p><b>Note:</b>Unless you have
- configured this collection to send email when the indexing process
- is done, you will need to view the log files or visit the admin page
- for this collection if you want to know when indexing has finished.
- EOF
- ;
- ## Kick off the indexing process here.
- $aindex = $root . "/aindex.pl";
- $!="";
- unless (fork) { # this is the child
- unless (fork) { # child's child
- sleep 1 until getppid == 1;
- ## &Closer closes file descriptors before execing to workaround
- ## NCSA bug in httpd version 1.3
- &Closer();
- ## third and fourth args make indexer build progress and log files
- exec($aindex, $form{'dbname'}, "$realpath/$logname",
- "$realpath/$progname", "$realpath/$errname");
- }
- ##first child exits quickly
- exit 0;
- }
-
- wait; ## parent reaps first child quickly
- &Architext'Copyright($attr{'ArchitextURL'});
- } else {
- ## Print out the top-level screen: scan the root directory for
- ## db.conf files, and allow the user to select a db.conf
- ## file.
-
- opendir(CONF, "$root/collections");
- @dbconf = grep(/\.conf$/, readdir(CONF));
-
- &Architext'printHeader($attr{'ArchitextURL'},"Collection Indexing");
-
- if ($#dbconf > -1) {
- print <<EOF;
- <FORM ACTION="http:AT-index.cgi" METHOD=POST>
- Choose a document collection to index.<P>
- <DL>
- <DT>
- Existing <a href="${helppath}AT-helpdoc.html#Document Collections">
- document collections:</a> <DD>
- <SELECT NAME="db" SIZE=5>
- EOF
- ;
- for (@dbconf) {
- s|\.conf$||;
- next if $_ eq 'Architext';
- print "<OPTION> $_\n";
- }
- print <<EOF;
- </SELECT>
- </DL>
- <INPUT TYPE="submit" NAME="Select" VALUE="Select">
- $postpass
- </FORM>
- EOF
- ;
- } else {
- print <<EOF;
- <p> There are no document collections currently defined. Please
- click on the configure button to go to the collection configuration
- screen.
- <FORM ACTION="http:AT-config.cgi" METHOD=POST>
- <INPUT TYPE="submit" VALUE="Configure">
- $postpass
- </FORM>
- EOF
- ;
- }
- &Architext'Copyright($attr{'ArchitextURL'});
- }
-
- sub printForm {
- local($db) = shift;
- local($url) = $attr{'ArchitextURL'};
- print "<UL>\n";
- &Architext'printLineItem($url,'IndexExecutable',
- $attr{'IndexExecutable'})
- if &Architext'debugMode();
- &Architext'printLineItem($url,'SearchExecutable',
- $attr{'SearchExecutable'})
- if &Architext'debugMode();
- &Architext'printLineItem($url,'StemTable',$attr{'StemTable'})
- if &Architext'debugMode();
- &Architext'printLineItem($url,'StopTable',$attr{'StopTable'})
- if &Architext'debugMode();
- &Architext'printLineItem($url,'CollectionInfo', $attr{'CollectionInfo'})
- if &Architext'debugMode();
- &Architext'printLineItem($url,'CollectionIndex',
- $attr{'CollectionIndex'});
- if ($attr{'CollectionContents'} =~ /^\+/) {
- $attr{'CollectionContents'} =~ s/^\+//;
- print <<EOF;
- <li> <a href="${helppath}AT-helpdoc.html#CollectionContents">
- CollectionContents:</a> Index the files listed in
- '$attr{'CollectionContents'}'.
- EOF
- ;
- } else {
- $files = join(", ", split(/[:,;\s]+/, $attr{'CollectionContents'}));
- print <<EOF;
- <li> <a href="${helppath}AT-helpdoc.html#CollectionContents">
- CollectionContents:</a> Index the files '$files' using these rules:<ul>
- EOF
- ;
- &Architext'printLineItem($url,'IndexFilter',
- $attr{'IndexFilter'});
- &Architext'printLineItem($url,'ExclusionRules',
- $attr{'ExclusionRules'});
- print "</ul>";
- }
- &Architext'printLineItem($url,'AdminMail',
- $attr{'AdminMail'});
- print "</UL>\n";
-
- print <<EOF;
- <p> Click on the <b>Index</b> button to start indexing.
- Depending on the size of your collection, this may take
- anywhere from a few moments to a few hours.
- After you initiate the indexing process you will be given links
- to several log files that you can reload periodically to monitor the
- progress of the indexing process. If you specified an admin email address
- for this collection, you will also be notified via email
- when the indexing process finishes.
- <p><FORM ACTION="http:AT-index.cgi" METHOD=POST>
- <INPUT TYPE="submit" NAME="index" VALUE="Index">
- Start an indexing process for this collection.
- <INPUT TYPE="hidden" NAME="Index" VALUE="Index">
- <INPUT TYPE="hidden" NAME="dbname" VALUE="$db">
- $postpass
- </FORM>
-
- <p>
- <FORM ACTION="AT-admin.cgi" METHOD=POST>
- <INPUT TYPE="hidden" NAME="db" VALUE="$db">
- <INPUT TYPE="submit" NAME="Admin" VALUE="Admin">
- Go back to the admin page for this collection.
- $postpass
- </FORM>
- EOF
- ;
- }
-
- ## closes the first 20 filehandles to kill unclosed socket
- ## from NCSA httpd 1.3, which is a bug that prevents
- ## indexing process from being able to background.
- sub Closer {
- if (`uname -s` =~ /irix/i) {
- # /usr/include/sys.s.
- $SYS_close = 1006;
- }
- else {
- $SYS_close = 6;
- }
- die "Must define \$SYS_close" unless defined($SYS_close);
- for ($i=0; $i<20; $i++) {
- syscall($SYS_close, $i+0);
- }
- }
-
-
-
-