home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
-
- #print "Content-Type: text/html\n\n";
- # Define Server specific variables
-
- require "search_engine_turath.pl";
-
- $root_web_path = "/users/turath/httpd/htdocs/";
- # The following outputs the CGI Header
-
- read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
-
- # Process info from Fill in Form
-
- @pairs = split(/&/, $buffer);
- foreach $pair (@pairs) {
- ($name, $value) = split(/=/, $pair);
- $value =~ tr/+/ /;
- $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
- $value =~ s/<!--(.|\n)*-->//g;
- $FORM{$name} = $value;
- }
-
- $keywords = $FORM{'keywords'};
-
- #Lower case keyboard
- $keywords=~ tr/`/╨/;
- $keywords=~ tr/qwertyuiop[]/╓╒╦▐▌█┌σ╬═/;
- $keywords=~ tr/asdfghjkl\;\'/╘╙φ╚ß╟╩Σπ▀╪/;
- $keywords=~ tr/,/µ/;
- $keywords=~ s/b/ß╟/g;
- $keywords=~ tr/zxcvnm\.\//╞┴─╤∞╔╥┘/;
-
-
- #Upper case keyboard
- $keywords=~ tr/~/°/;
- $keywords=~ tr/QWER/≤≡⌡±/;
- $keywords=~ s/Y/┼/g;
- $keywords=~ s/T/ß┼/g;
- $keywords=~ s/G/ß├/g;
- $keywords=~ s/H/├/g;
- $keywords=~ tr/ASJ/÷≥▄/;
- $keywords=~ tr/X/·/;
- $keywords=~ s/B/ß┬/g;
- $keywords=~ tr/N/┬/;
-
-
- $exact_match = $FORM{'exact_match'};
-
- # Take the keywords that were entered and parse them into an array
- # of keywords based on word boundary (\s+ splits on whitespace)
-
- @keyword_list = split(/\s+/,$keywords);
-
- # If the person has not yet entered a search term, output a form that
- # will ask them for a search word.
-
-
- if ($keywords eq "") {
- &PrintHeaderHTML;
- &PrintNoKeywordHTML;
- &PrintFooterHTML;
- exit;
- } # End of if keywords
-
- # Begin to send back the dynamic search results page with the header.
-
- &PrintHeaderHTML;
-
- #
- # We traverse the whole directory structure under $root_web_path
- # and in doing so, we also parse the HTML files to see if they have
- # the keywords and what their title is.
- #
- # The following sets up the initial variables
- # @dirs is the array of directories as a placeholder for going back up
- # the directory tree when we run out of files in a subdirectory.
- # $cur_dir is the current directory number and is a reference to the @dirs
- # array.
- #
- # Directory Handles are straight ASCII and consist of "DIR" + $cur_dir
-
- $number_of_hits = 0;
- $cur_dir = 0;
- @dirs = ($root_web_path);
- opendir("DIR$cur_dir", $dirs[$cur_dir]);
-
- $end_of_all_files = 0;
-
- while (!($end_of_all_files)) {
- # The following is used to trace down the next file. If there is no
- # next file (the whole directory tree was traversed), $end_of_all_files
- # is set to positive and then the whole process is ended.
- while (1) {
- $filename = &GetNextEntry("DIR$cur_dir", $dirs[$cur_dir]);
- $fullpath = "$dirs[$cur_dir]/$filename";
- #
- # CASE 1) File is null and but still can traverse back up the directory
- #
- if (!($filename) && $cur_dir > 0) {
- closedir("DIR$cur_dir");
- $cur_dir--;
- next;
- }
-
- #
- # CASE 2) File is null but nowhere else to go. So we end the searching.
- #
- if (!($filename)) {
- closedir("DIR$cur_dir");
- $end_of_all_files = 1;
- last;
- }
-
- #
- # CASE 3) File is a directory so traverse down it.
- #
- if (-d $fullpath) {
- if (-r $fullpath && -x $fullpath) {
- $cur_dir++;
- $dirs[$cur_dir] = $fullpath;
- opendir("DIR$cur_dir", $dirs[$cur_dir]);
- next;
- } else {
- # Since the dir does not have r or x perms we go on
- next;
- }
- } # End of Case 3 (File is directory
-
- #
- # CASE 4) The file is an unwanted file
- #
- $unwanted_file = 0;
-
- foreach (@unwanted_files) {
- if ($fullpath =~ /$_/) {
- $unwanted_file = 1;
- }
- } # End of foreach unwanted files
-
- if ($unwanted_file) {
- next;
- } # End of Case 4 Unwanted File
-
- #
- # CASE 5) The file is really something to search
- #
- # So we break out of the while loop
- if (-r $fullpath) {
- last;
- } # Make sure the file is readable
-
- } # End of While (1)
-
-
- if (!($end_of_all_files)) {
-
- # We set the not_found_words = to the array list and pick out
- # words we find so that the not_found_words should not have
- # anything in it if all the words were found.
- #
- @not_found_words = @keyword_list;
- $are_we_in_head = 0;
- open(SEARCHFILE, $fullpath);
- $headline = "";
- while(<SEARCHFILE>) {
- $line = $_;
- $headline .= $line if ($are_we_in_head == 0);
- $are_we_in_head = 1
- if (($line =~ m!</head>!i) || ($line =~ m!</title>!i));
- &FindKeywords($exact_match, $line, *not_found_words);
- } # End of SEARCHFILE
- close (SEARCHFILE);
-
- if (@not_found_words < 1) {
- # Isolate out the <TITLE></TITLE> information
-
- $headline =~ s/\n/ /g;
- $headline =~ m!<title>(.*)</title>!i;
- $title = $1;
-
- if ($title eq "") {
- $title = "╟ß┌Σµ╟Σ █φ╤ π┌╤▌";
- }
- $fullpath =~ s!$root_web_path/!!;
-
- &PrintBodyHTML($fullpath, $title);
- $number_of_hits++;
-
- } # If there are no not_found_words
-
- } # If Not The End of all Files
- } # End of While Not At The End Of All Files
-
-
- # Print up the footer
-
- if ($number_of_hits == 0) {
- &PrintNoHitsBodyHTML;
-
- }
-
- &PrintFooterHTML; # Print The Footer HTML Search.....
-
- ############################################################
- #
- # subroutine: FindKeywords
- # Usage:
- # &FindKeywords("on", $line, *not_found_words);
- #
- # Parameters:
- # $exact_match = "on" if we are not pattern matching
- # $line = line to search on
- # *not_found_words = array of keywords that have not
- # matched yet
- #
- # Output:
- # *not_found_words will have keywords spliced out of it
- # as they are found.
- #
- ############################################################
-
- sub FindKeywords
- {
- local($exact_match, $line, *not_found_words) = @_;
- local($x, $match_word);
-
- if ($exact_match eq "on") {
- for ($x = @not_found_words; $x > 0; $x--) {
- # \b matches on word boundary
- $match_word = $not_found_words[$x - 1];
- if ($line =~ /\b$match_word\b/i) {
- splice(@not_found_words,$x - 1, 1);
- } # End of If
- } # End of For Loop
- } else {
- for ($x = @not_found_words; $x > 0; $x--) {
- $match_word = $not_found_words[$x - 1];
- if ($line =~ /$match_word/i) {
- splice(@not_found_words,$x - 1, 1);
- } # End of If
- } # End of For Loop
- } # End of ELSE
-
- } # End of FindKeywords
-
- ############################################################
- #
- # subroutine: GetNextEntry
- # Usage:
- # &GetNextEntry(DIRECTORY_HANDLE, "directory_name");
- #
- # Parameters:
- # DIRECTORY_HANDLE = handle to currently open directory
- # $directory = full path of directory
- #
- # Output:
- # $filename = name of next file, null if no more files
- # in current directory
- #
- ############################################################
-
- sub GetNextEntry {
- local($dirhandle, $directory) = @_;
-
- while ($filename = readdir($dirhandle)) {
- if (($filename =~ /htm.?/i) ||
- (!($filename =~ /^\.\.?$/) &&
- -d "$directory/$filename")) {
- last;
- } # End of IF Filename is html document or a directory
- } # End of while still stuff to read
-
- # Filename will be valid if it is a directory or an HTML file.
- $filename;
-
- } # End of GetNextEntry
-
-