home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- #
- # $Source: /repousr/u/rae/src/TEXT/perl/RCS/narc.pl,v $
- # $Revision: 2.0 $
- # $Author: rae $
- # $Date: 90/03/25 15:43:00 $
-
- # narc -- News ARChiver
-
- # to avoid the buffering problems
- #
- $| = 0;
-
- # Check for a NARCRC environment variable and use it if
- # it's there.
- #
- if($ENV{'NARCRC'})
- {
- $narcrc = $ENV{'NARCRC'};
-
- # if NARCRC doesn't start with a '/', prepend
- # $HOME to it
- #
- if($narcrc !~ /\/.*/)
- {
- $narcrc = $ENV{"HOME"} . "/" . $narcrc;
- }
- }
- else
- {
- # Default NARCRC: "$HOME/.narcrc"
- #
- $narcrc = $ENV{'HOME'} . "/.narcrc";
- }
-
- # testing flag. Usually zero.
- #
- $verbose=0;
- $standalone = "stand";
-
- # get the name of the news server. Can be replaced with
- # a hardcoded '$server=machine;' if you like
- #
- $server_file = "/usr/local/lib/news/server";
- $server = `cat $server_file` || die "Can't open $server_file";
- chop $server;
-
- # Where to find nntp
- #
- $nntp = "/usr/local/bin/nntp $server";
-
- # open narcrc for input. The format is:
- #
- # /full/path/to/archive/directory
- # dirname newsgroup last-time-of-access
- # dirname newsgroup last-time-of-access
- # dirname newsgroup last-time-of-access
- # ...
- #
- # 'dirname' is appended to the initial path. The special
- # case dirname of 'default' will use the last element of
- # the newsgroup name. For example,
- # default comp.sources.unix 19900101 000000
- # will use the dirname 'unix'
-
- open(NARCRC, "<$narcrc");
- $SIG{'INT'} = 'handler';
- $SIG{'QUIT'} = 'handler';
-
- # The first line of the narcrc is where to store everythign
- #
- $repo = <NARCRC>;
- chop($repo);
- print "Saving files in $repo\n" if $verbose;
-
- # $narc_line[$narc_index] is where each new line of the narcrc is
- # stored . This only gets written out after a successful 'narc'.
- #
- $narc_index = 1;
-
- # for each line in the narcrc
- # chop it into separate bits
- # ask nntp for all new news for the group
- # for each new news item
- # read article into /tmp/narc$$
- # scan for magic string 'Archive-name'
- # get the subdir and part-name from this line
- # if hit eof, then no magic string was found, do next news item
- # create all leading directories if they're not there already
- # move /tmp/narc$$ to full pathname of archived article
- while($narc_entry = <NARCRC>)
- {
- chop($narc_entry);
-
- # This is a software end-of-file because I am having trouble with
- # garbage showing up at the ends of file. With shar files it's
- # not such a big deal, since they usually have 'exit 0' at the end
- # of them, but with the .narcrc, it might be accidentally parsed.
- #
- last if($narc_entry eq "EOF");
-
- ($real_dirname, $newsgroup, $lastdate, $lasttime) = split(' ', $narc_entry);
-
- # check for degenerate empty case
- #
- last if $newsgroup eq "" || $lastdate eq "" || $lasttime eq "";
-
- # if "default", set dirname to last element
- # of the newsgroup name
- #
- if($real_dirname eq "default" && $newsgroup =~ /.*\.([^.]*)$/)
- {
- $dirname = $1;
- }
- else
- {
- $dirname = $real_dirname;
- }
-
- # tell the user what's going on
- #
- "$lastdate $lasttime" =~ /^(..)(..)(..) (..)(..)(..)$/ &&
- printf("%s %s 19%s-%s-%s %s:%s\n",
- $newsgroup, "[$dirname]", $1, $2, $3, $4, $5);
-
- # open the stdout of the nntp command
- #
- open(MSGIDS, "$nntp newnews $newsgroup $lastdate $lasttime|")
- || die "Can't get message ID's for $newsgroup from $server";
-
- # flag to see if we do anything
- $we_did_something = 0;
-
- # for each message ID..
- #
- while($msgid = <MSGIDS>)
- {
- # set the we_did_something flag so we know to update
- # the narcrc
- #
- $we_did_something = 1;
-
- # Zap the trailing newline on $msgid
- #
- chop($msgid);
- print "\t$msgid\n" if $verbose;
-
- # Get the article and store it in /tmp so we don't
- # need to use multiple [slow] nntp's for the same article
- #
- system("$nntp article '$msgid' > /tmp/narc$$");
-
- # Open the article and check for "Archive-name"
- #
- open(BODY, "</tmp/narc$$") || die "Can't read retrieved article $msgid in file /tmp/narc$$";
- while(<BODY>)
- {
- if(/^Archive-name:/)
- {
- ($foo, $arc_name) = split;
- @tree = split('/', $arc_name);
- # Put it in the '$standalone' dir if it's a single shar
- unshift(tree, $standalone) if $#tree == 0;
- print "\t", join('/', @tree), "\n";
- last;
- } #if Archive-name
- } # while BODY
-
- # if eof then "Archive-name" wasn't found
- #
- last if eof;
-
- # close the file
- #
- close(<BODY>);
-
- # Now build the full pathname of where the file is
- # supposed to live
- $fullpath = $repo . '/' . $dirname . '/' . join('/', @tree);
- $name = pop(@tree);
- $relpath = $dirname . '/' . join('/', @tree);
-
- # make sure the dirs are there
- #
- &mkpath($repo . '/' . $dirname . '/' . join('/', @tree));
-
- if(-f $fullpath)
- {
- # A copy of the file already exists, so name this one the
- # same as the existing one, but with a number appended to
- # it to delineate its version.
- #
- for($i=2; -f $fullpath . $i; $i++)
- { }
-
- # tack the number onto the end of the file name
- #
- $fullpath .= $i;
- }
-
- # and finally, put it there
- system("mv /tmp/narc$$ $fullpath");
-
- } # while MSGIDS
- close(<MSGIDS>);
-
- # only update the timestamp if we did anything
- #
- $timestamp = $we_did_something ? &now() : "$lastdate $lasttime";
-
- # Save the narcrc lines in memory in order so we can save them in
- # their original order. Just using 'each' on %NARCRC currently
- # inverts their order [as of Perl 3.0]
- #
- $narc_line[$narc_index++] = sprintf("%s\t%s\t%s", $real_dirname, $newsgroup, $timestamp);
- } # while narc_entry
- close(<NARCRC>);
-
- # Save an updated narcrc
- #
- print "Saving $narcrc..";
- open(NARCRC, ">$narcrc");
- print NARCRC $repo, "\n";
- for($i=1; $i < $narc_index; $i++)
- {
- print NARCRC $narc_line[$i], "\n";
- }
- # software EOF
- print NARCRC "EOF\n";
- close(<NARCRC>);
- print "\n";
-
- # Subroutine to return the current date and time in
- # nntp format, which is "YYMMDD HHMMSS"
- #
- sub now
- {
- ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time());
- $mon++;
- sprintf("%02d%02d%02d %02d%02d%02d", $year,$mon,$mday,$hour,$min,$sec);
- }
-
- sub handler
- {
- local($sig) = @_;
- print "\n\nCaught signal $sig -- exiting $ARGV[0]\n";
- close(<ARTICLE>);
- close(<BODY>);
- close(<MSGIDS>);
- close(<NARCRC>);
- close(<SAVEFILE>);
- exit 0;
- }
-
- sub mkpath
- {
- local($fullpath) = @_;
- local(@tree) = split('/', $fullpath);
- local(@tmp) = ();
-
- if($tree[0] eq "")
- {
- push(@tmp, shift(@tree));
- }
- foreach $dir (@tree)
- {
- push(@tmp, $dir);
- $path = join('/', @tmp);
- print "\t%% checking for '$path'\n" if $verbose;
- mkdir("$path", 0777) || die "Can't create directory $path"
- if ! -d "$path";
- }
- }
-
- # Emacs cutsomisation
- #
- # Local Variables:
- # mode:fundamental
- # tab-width:4
- # End:
-