home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources,comp.lang.perl
- From: dglo@ADS.COM (Dave Glowacki)
- Subject: A PERL script to check "junk" for newsgroups
- Message-ID: <~Y+J#@ads.com>
- Date: Wed, 13 Feb 91 23:25:39 GMT
-
- Since, as a rule, EVERY C or shell program posted must be followed up
- by a PERL script, here's my version of NEWJUNK.
-
- Mine is called 'check-junk'. It grabs the Newsgroups: lines from all
- junked articles, processes them according to a couple of configuration
- files and either mails a report to any addresses specified on the command
- line or prints the report to stdout (if there weren't any arguments.)
-
- The two configuration files are lists of patterns. The first list
- (junk-trash-list) throws away the entire Newsgroups: line for an article
- if a pattern from it matches any of the newsgroups in the line. The
- second list (junk-ignore-list) only ignores the newsgroup matched by
- a particular pattern.
-
- I use 'junk-trash-list' to throw away references to regional newsgroups
- like 'sub', 'dnet', and 'ne' where things tend to be crossposted to
- both a regional group and a local group. 'Junk-ignore-list' is more of
- a specific newsgroup/hierarchy eliminator for things like 'alt.sex.*' or
- 'alt.desert.storm.its.not.scud.its.al-hussein.dammit'.
-
- To install this, stick everything in /usr/lib/news (or wherever you
- put these things) and make sure NEWSCTL and NEWSARTS are set correctly.
- I run it with '/usr/lib/news/check-junk news@ads.com' every night
- before I expire.
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # The tool that generated this appeared in the comp.sources.unix newsgroup;
- # send mail to comp-sources-unix@uunet.uu.net if you want that tool.
- # If this archive is complete, you will see the following message at the end:
- # "End of shell archive."
- # Contents: check-junk junk-ignore-list junk-trash-list
- # Wrapped by dglo@saturn on Wed Feb 13 14:36:09 1991
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'check-junk' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'check-junk'\"
- else
- echo shar: Extracting \"'check-junk'\" \(4127 characters\)
- sed "s/^X//" >'check-junk' <<'END_OF_FILE'
- X#!/usr/local/bin/perl
- X#
- X# Build a list of all newsgroups sent to 'junk' newsgroup
- X# and either mail the report to the addresses listed on the command line
- X# or print it to STDOUT
- X#
- X# The report is a series of lines of the form 'nnn articles for newsgroup'
- X#
- X# $Header: /var/news/src/ADS-scripts/RCS/check-junk,v 1.7 1991/02/13 22:31:08 dglo Exp $
- X
- X# subroutine to read in the C news environment
- X#
- X$NEWSCONFIG='/usr/lib/news/bin/config';
- X%NEWSENV = ();
- Xsub newsconfig {
- X if (open(NEWSENV, "sh -x $NEWSCONFIG 2>&1 |")) {
- X while (<NEWSENV>) {
- X $NEWSENV{$1} = $2 if (/(.*)=(.*)\n/);
- X }
- X close(NEWSENV);
- X 1;
- X } else {
- X 0;
- X }
- X}
- X
- X# News locations (handle both C news and B news)
- X#
- Xif (&newsconfig()) {
- X $NEWSCTL = $NEWSENV{'NEWSCTL'};
- X $NEWSARTS = $NEWSENV{'NEWSARTS'};
- X} else {
- X $NEWSCTL="/usr/lib/news";
- X $NEWSARTS="/usr/spool/news";
- X}
- X
- X# see if the result is supposed to be mailed somewhere
- X#
- X$maillist = '';
- Xwhile (@ARGV > 0) {
- X $_ = pop(ARGV);
- X $maillist .= ' ' . $_;
- X}
- X
- X# either write to a temp file (to be possibly mailed) or to STDOUT
- X#
- Xif ($maillist) {
- X $tmpfile = "/tmp/junkmail.$$";
- X open(TMPFILE, ">$tmpfile") || die "Can't open a temporary file!\n";
- X} else {
- X open(TMPFILE, ">-") || die "Couldn't send output to STDOUT!\n";
- X select(TMPFILE); $| = 1; select(STDOUT);
- X}
- X
- X# read in list of patterns for which entire Newsgroups line is trashed
- X#
- X@trashlist = ();
- Xif ( -e "$NEWSCTL/junk-trash-list" ) {
- X if (open(LIST, "$NEWSCTL/junk-trash-list")) {
- X while (<LIST>) {
- X chop;
- X push(trashlist, $_);
- X }
- X close(LIST);
- X } else {
- X print TMPFILE "Couldn't open '$NEWSCTL/junk-trash-list'!\n";
- X }
- X} else {
- X print TMPFILE "Couldn't find '$NEWSCTL/junk-trash-list'!\n";
- X}
- X
- X# read in list of patterns to ignore
- X#
- X@ignorelist = ();
- Xif ( -e "$NEWSCTL/junk-ignore-list" ) {
- X if (open(LIST, "$NEWSCTL/junk-ignore-list")) {
- X while (<LIST>) {
- X chop;
- X push(ignorelist, $_);
- X }
- X close(LIST);
- X } else {
- X print TMPFILE "Couldn't open '$NEWSCTL/junk-ignore-list'!\n";
- X }
- X} else {
- X print TMPFILE "Couldn't find '$NEWSCTL/junk-ignore-list'!\n";
- X}
- X
- X# read in list of good newsgroups
- X#
- X%newsgroup = ();
- Xif ( -e "$NEWSCTL/active") {
- X if (open(ACTIVE, "$NEWSCTL/active")) {
- X while (<ACTIVE>) {
- X s/ .*\n//;
- X $newsgroup{$_} = 1;
- X }
- X close(ACTIVE);
- X } else {
- X print TMPFILE "Couldn't open '$NEWSCTL/active'!\n";
- X }
- X} else {
- X print TMPFILE "Couldn't find '$NEWSCTL/active'!\n";
- X}
- X
- Xopen(JUNKNG, "grep '^Newsgroups:' $NEWSARTS/junk/* |") ||
- X die "Couldn't search for Newsgroups in articles in 'junk'!\n";
- Xwhile (<JUNKNG>) {
- X chop;
- X
- X # get the list of newsgroups
- X #
- X s/^.*:Newsgroups: //;
- X s/\s*//g;
- X $list = $_;
- X
- X # see if we should trash this line
- X #
- X foreach $_ (split(/,/, $list)) {
- X foreach $i (@trashlist) {
- X if (/$i/) {
- X $list = '';
- X last;
- X }
- X }
- X }
- X
- X # Check each newsgroup on the line
- X #
- X foreach $_ (split(/,/, $list)) {
- X
- X # if it doesn't already exist...
- X #
- X if ($newsgroup{$_}) {
- X $unignored = 0;
- X } else {
- X
- X # see if it's one we WANT to junk
- X #
- X $unignored = 1;
- X foreach $i (@ignorelist) {
- X if (/$i/) {
- X $unignored = 0;
- X last;
- X }
- X }
- X }
- X
- X # found one we may want to keep
- X #
- X if ($unignored) {
- X $allng{$_}++;
- X if ($list ne $_) {
- X if (defined($thislist{$_})) {
- X $thislist{$_} .= ':' . $list;
- X } else {
- X $thislist{$_} = $list;
- X }
- X }
- X }
- X }
- X}
- X
- X# routine to sort the list of junked newsgroups
- X#
- Xsub nogood {
- X local($result);
- X
- X $result = $allng{$b} - $allng{$a};
- X return $result if ($result);
- X
- X if ($a lt $b) {
- X return -1;
- X } elsif ($a gt $b) {
- X return 1;
- X }
- X return 0;
- X}
- X
- X# print the report
- X#
- Xforeach $i (sort nogood keys(allng)) {
- X $plural = ($allng{$i} == 1 ? " " : "s");
- X print TMPFILE $allng{$i}," article",$plural," for ",$i;
- X print TMPFILE " (",$thislist{$i},")" if (defined($thislist{$i}));
- X print TMPFILE "\n";
- X}
- Xclose(TMPFILE);
- X
- X# mail the report (if there's something to mail)
- X#
- Xif ($maillist) {
- X system "Mail -s 'Junked newsgroups' $maillist < $tmpfile" if ( -s $tmpfile );
- X unlink $tmpfile;
- X}
- END_OF_FILE
- if test 4127 -ne `wc -c <'check-junk'`; then
- echo shar: \"'check-junk'\" unpacked with wrong size!
- fi
- chmod +x 'check-junk'
- # end of 'check-junk'
- fi
- if test -f 'junk-ignore-list' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'junk-ignore-list'\"
- else
- echo shar: Extracting \"'junk-ignore-list'\" \(99 characters\)
- sed "s/^X//" >'junk-ignore-list' <<'END_OF_FILE'
- X^alt\.desert\.storm\.its\.*
- X^alt\.drugs
- X^alt\.sex.*
- X^erg\..*
- X^eunet\..*
- X^eucon\..*
- X^la\..*
- X^to\..*
- END_OF_FILE
- if test 99 -ne `wc -c <'junk-ignore-list'`; then
- echo shar: \"'junk-ignore-list'\" unpacked with wrong size!
- fi
- # end of 'junk-ignore-list'
- fi
- if test -f 'junk-trash-list' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'junk-trash-list'\"
- else
- echo shar: Extracting \"'junk-trash-list'\" \(37 characters\)
- sed "s/^X//" >'junk-trash-list' <<'END_OF_FILE'
- X^dnet\..*
- X^ne\..*
- X^sub\..*
- X^znet\..*
- END_OF_FILE
- if test 37 -ne `wc -c <'junk-trash-list'`; then
- echo shar: \"'junk-trash-list'\" unpacked with wrong size!
- fi
- # end of 'junk-trash-list'
- fi
- echo shar: End of shell archive.
- exit 0
- --
- Dave Glowacki dglo@ads.com Advanced Decision Systems
-