home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl -w
- # Creates a structured HTML list ('sitemap') of HTML files
- # Copyright (C) 1998,1999 Daniel Naber <dnaber@mini.gt.owl.de>
- # version 1.10, 1999-08-29 (version number is independent from java version)
- # See below for configuration.
- # Usage from command line: ./tree.pl [htmldir] >outputfile
- #
- # See http://www.ev-stift-gymn.guetersloh.de/server/tree_e.html for the
- # latest version. It would be nice to include a link to this page if you
- # use the script to generate a public page.
- #
- # CHANGES:
- # 1997-09-07: first version
- # (...)
- # 1998-28-04: new option: @includefiles, @excludepatterns now
- # called @excludefiles; one space after $pictag
- # 1998-10-06: checks if $htmldir exists and if it's a directory;
- # $patternfile is now called $templatefile
- # 1998-08-16: made ISO 8601 date default
- # 1999-04-17: small documentation update
- # 1999-08-29: added $filetag_end, thanks to Doug Melton;
- # small html cleanup
- #
- # TODO/BUGS/PROBLEMS:
- # -$htmldir may not point to a link
- # -links beneath $htmldir will be ignored
- # -due to stupid program design, you cannot make a tree that only
- # consists of files given in @indexfiles
- #
- # COPYRIGHT:
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- # - user-configurable options ----------------------------------------------
-
- # directory with the html files (may be overriden by command line argument),
- # don't set a trailing slash
- if( $ARGV[0] ) {
- $htmldir = $ARGV[0];
- } else {
- $htmldir = "/usr/local/httpd/htdocs";
- }
-
- $templatefile = "tree-template.html"; # take this file to build the output page
- #$templatefile = ""; # comment in and you'll just get the list
- $baseurl = ""; # this will be in front of any URL
-
- $cgi = 0; # set to 1 to use this as a cgi script
- # set the following options both to 1 to generate a list you can use offline
- $offline = 0; # enables you to use generated file offline (from disk)
- $indexrefs = 0; # make links to 'dir/index.html' etc. (instead of 'dir/')
-
- @indexfiles = ('index.html', 'index.shtml'); # default-files' names
- @inchtml = ('shtml', 'html', 'htm'); # take files with these suffixes as HTML files
- #@incpics = (); # don't include pictures
- @incpics = ('gif', 'jpg', 'jpeg'); # list pictures with these suffixes
- $listsize = 1; # include size in kb for every file?
-
- $self = "/server/az.html"; # output file (relative path; won't be linked in the list)
- $selftitle = "Sitemap";
-
- # do only include these files/directories, use '*' as a wildcard,
- # use '@includefiles = ();' to include all files matching the pattern
- # except those in @excludefiles:
- @includefiles = ();
- # do not include these files/directories:
- @excludefiles = ('/secret/*');
-
- $listwithouttitle = 0; # include html files without <title>..</title>?
-
- #$date = "DAY.MONTH.YEAR"; # german format
- #$date = "MONTH/DAY/YEAR"; # american format
- $date = "YEAR-MONTH-DAY"; # date according to ISO 8601
-
- # for those of you who like the plain output:
- $dirtag = '<ul>';
- $dirtag_end = '</ul>';
- $foldertag = '<li>';
- $htmltag = '<li>';
- $pictag = '<li>';
- $nolinktag = '<li>';
- $filetag_end = '</li>';
-
- # for those who like output with an icon in front of every item:
- # (this seems to be valid HTML, but it's not good HTML)
- #$dirtag = '<dl>';
- #$dirtag_end = '</dl>';
- #$foldertag = '<dt><img src="tree_img/folder.open.gif" alt="folder">';
- #$htmltag = '<dt><img src="tree_img/generic.gif" alt="html file">';
- #$pictag = '<dt><img src="tree_img/image2.gif" alt="picture">';
- #$nolinktag = '<dt><img src="tree_img/folder.open.gif" alt="other file">';
- #$filetag_end = '</dt>';
-
- $modifiedtag = '<img src="/images/new.gif" alt="page updated recently">'; # mark files that changed not long ago
- $modifiedtime = 3*24; # mark files that are not older than $modifiedtime hours (0 = option off)
-
- # - nearly no configuration below ------------------------------------------
-
- # $st = time(); # comment in if you're interested in runtime
- use File::Find;
- $depth = 0;
- ($htmlct, $htmlsize, $picct, $picsize) = (0, 0, 0, 0); # count size und number
- $partlist = "";
-
- &getdate;
- &init;
- &first_part_output;
- find(\&doperfile, $htmldir);
- &list_output;
- &last_part_output;
- # $diff = time() - $st; print STDERR "time: $diff secs\n"; # see above
- exit;
-
- # --------------------------------------------------------------------------
-
- sub getdate {
- my ($sec,$min,$hour,$mday,$mon,$year) = 0; # avoid warning with perl's -w option
- ($sec,$min,$hour,$mday,$mon,$year) = localtime(time());
- ($mon < 12) ? ($mon++) : ($mon = 1);
- $year += 1900;
- $mon = "0".$mon if( length($mon) == 1 );
- $mday = "0".$mday if( length($mday) == 1 );
- $date =~ s#DAY#$mday#i;
- $date =~ s#MONTH#$mon#i;
- $date =~ s#YEAR#$year#i;
- }
-
- sub init {
- if( $cgi ) {
- select(STDOUT); $| = 1;
- $nph = 1 if( $0 =~ m#nph-tree# );
- print "HTTP/1.0 200 OK\n" if( $nph );
- print "Content-Type: text/html\n\n";
- }
- if( ! -d $htmldir ) {
- print "Error: $0: '$htmldir' doesn't exist or isn't a directory.";
- exit;
- }
- my $expat; # enable '*' as wildcard in @excludefiles
- foreach $expat (@excludefiles) {
- $expat =~ s#\*#.*?#g;
- }
- foreach $expat (@includefiles) { # the same in @includefiles
- $expat =~ s#\*#.*?#g;
- }
- }
-
- sub first_part_output {
- $output = &load($templatefile);
- $output =~ s#<!-- \$date -->#$date#igs;
- my ($first_part) = ($output =~ m#^(.*?)<!-- \$list -->#is);
- $first_part = "" if ( ! defined($first_part) ); # avoid warning
- print $first_part;
- }
-
- sub doperfile {
- my $thisfile = $File::Find::name;
- $thisfile .= "/" if( -d $thisfile );
- my ($thisfile_rel) = ($thisfile =~ m#^$htmldir(/.*)#); # part after $htmldir
-
- my $expat;
- # include only files from @includefiles:
- if( scalar(@includefiles) >= 1 ) {
- my $do_use = 0;
- foreach $expat (@includefiles) {
- if( $thisfile_rel =~ m#^$expat$# ) {
- $do_use = 1;
- last;
- }
- }
- return if( ! $do_use );
- }
-
- # exclude files from @excludefiles:
- foreach $expat (@excludefiles) {
- return if( $thisfile_rel =~ m#^$expat$# );
- }
-
- if( ! &isfile($thisfile, @indexfiles)
- && ($thisfile =~ m#/$#
- || &isfileclass($thisfile, @inchtml)
- || &isfileclass($thisfile, @incpics)) ) {
- push(@filelist, $thisfile);
- }
- }
-
- sub list_output {
- my $thisfile;
- my $dirsdone = ""; # have we been here already?
- my $thisdir = "";
- my $dirtag_ct = 0;
- @filelist = sort(@filelist);
- print "$dirtag\n";
- $dirtag_ct++;
- foreach $thisfile (@filelist) {
- ($url) = ($thisfile =~ m#$htmldir(/.*)#i);
- $olddepth = $depth;
- $depth = ($url =~ s#/#/#gi); # 1 = html-root
- $olddir = $thisdir;
- ($thisdir) = ($url =~ m#(.*/).*?#i);
- if( $thisdir ne $olddir && ! ($dirsdone =~ m#^$thisdir$#m) ) { # deeper level or same level
- $dirsdone .= "$thisdir\n";
- $partlist .= "$dirtag_end\n" x ($olddepth-$depth+1);
- $dirtag_ct -= &minzero($olddepth-$depth+1);
- $partlist .= " $nolinktag$baseurl$url$filetag_end\n" if( ! &getdefaultfile("$htmldir$thisdir") );
- $partlist .= "$dirtag\n";
- $dirtag_ct++;
- } elsif( ! ($thisdir =~ m#$olddir#i) ) { # higher level
- $partlist .= "$dirtag_end\n" x ($olddepth-$depth);
- $dirtag_ct -= &minzero($olddepth-$depth);
- $partlist .= &getinfo($thisfile, 0);
- } else { # same level as before
- $partlist .= &getinfo($thisfile, 0);
- $partlist =~ s#$dirtag\n$dirtag_end\n##ig; # clean up HTML
- print $partlist;
- $partlist = "";
- }
- }
- print $partlist;
- # close list correctly:
- print "$dirtag_end\n" x $dirtag_ct;
- }
-
- sub last_part_output {
- $htmlsize = int($htmlsize/1000); # size in kB
- $picsize = int($picsize/1000);
- $output =~ s#<!-- \$htmlct -->#$htmlct#igs;
- $output =~ s#<!-- \$htmlsize -->#$htmlsize#igs;
- $output =~ s#<!-- \$picsct -->#$picct#igs;
- $output =~ s#<!-- \$picsize -->#$picsize#igs;
- my ($last_part) = ($output =~ m#<!-- \$list -->(.*)$#is);
- $last_part = "" if ( ! defined($last_part) ); # avoid warning
- print $last_part;
- }
-
- # --------------------------------------------------------------------------
-
- sub getdefaultfile {
- my $dir = shift;
- my $item;
- foreach $item (@indexfiles) {
- if( -e "$dir$item" ) { # there's a defaultfile
- $partlist .= &getinfo("$dir$item", 1);
- return $item;
- }
- }
- return 0;
- }
-
- sub getinfo
- {
- my $thisfile = shift;
- my $isindexfile = shift;
- my ($suffix) = ($thisfile =~ m#.*\.(.*)#);
- my ($size, $exactsize) = &getsize($thisfile);
- my $entry = "";
- my $linkurl;
- $offline ? ($linkurl = $htmldir.$url) : ($linkurl = $url);
- $linkurl = $baseurl.$linkurl;
- if( &isfileclass($thisfile, @inchtml) ) {
- $htmlsize += $exactsize;
- $htmlct++;
- my $string = &load_part($thisfile);
- if( $thisfile eq "$htmldir$self" ) { # output file itself
- $entry .= " $nolinktag$selftitle";
- } elsif( $string =~ m#<title>(.*?)</title>#is ) { # common case
- if( $isindexfile ) {
- $entry .= " $foldertag";
- } else {
- $entry .= " $htmltag";
- }
- $entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
- $entry .= " <a href=\"$linkurl\">$1";
- $entry .= " ($size kB)" if( $listsize );
- $entry .= "</a>";
- } else { # files with no title tag
- if( $listwithouttitle ) {
- $entry .= " $nolinktag$baseurl$url";
- $entry .= " ($size kB)" if( $listsize );
- }
- }
- $entry .= "\n";
- } elsif( &isfileclass($thisfile, @incpics) ) {
- $picsize += $exactsize;
- $picct++;
- my ($filenameonly) = ($url =~ m#.*/(.*)#i);
- $entry .= " $pictag";
- $entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
- $entry .= " <a href=\"$linkurl\">$filenameonly";
- $entry .= " ($size kB)" if( $listsize );
- $entry .= "</a>\n";
- }
-
- # links to dir/ or to dir/index.html (see configuration section)
- if( $indexrefs ) {
- my ($filepart) = ($thisfile =~ m#.*/(.*)#);
- $entry =~ s#href="(.*?/)"#href="$1$filepart"#i;
- }
-
- return $entry;
- }
-
- sub is_it_modified {
- my $filename = shift;
- ($mtime) = (stat($filename))[9];
- if( $modifiedtime && ((time() - $mtime) < ($modifiedtime*60*60)) ) {
- return 1;
- } else {
- return 0;
- }
- }
-
- sub getsize { # get filesize in (kB, bytes)
- my $file = shift;
- my $exactsize = -s $file;
- my $size = int($exactsize/1000);
- $size = 1 if( $size == 0 );
- return $size, $exactsize;
- }
-
- sub isfileclass { # check filesuffix
- my $file = shift;
- my @fileclass = @_;
- my $item;
- foreach $item (@fileclass) {
- return 1 if( $file =~ m#\.$item$# );
- }
- return 0;
- }
-
- sub isfile { # check filename
- my $file = shift;
- my @files = @_;
- my $item;
- foreach $item (@files) {
- return 1 if( $file =~ m#/$item$# );
- }
- return 0;
- }
-
- sub load_part { # only load file till </title> is reached
- my $file = shift;
- my $string = "";
- open(INPUT, "<$file") || die "Cannot open '$file': $!";
- while(<INPUT>) {
- $string .= $_;
- last if( $_ =~ m#</title>#i );
- }
- close(INPUT);
- $string = "" if( ! defined($string) && $file =~ m#^$htmldir/$self$# ); # avoid warning
- return $string;
- }
-
- sub load {
- my $file = shift;
- my $string;
- open(INPUT, "<$file") || return "";
- undef $/;
- $string = (<INPUT>);
- $/ = "\n";
- close(INPUT);
- return $string;
- }
-
- sub minzero { # returns 0 if argument is < 0, else returns the argument
- my $var = shift;
- if( $var > 0 ) {
- return $var;
- } else {
- return 0;
- }
- }
-