home *** CD-ROM | disk | FTP | other *** search
- Path: hactar!abqhh!news.Hamburg.Germany.EU.net!Germany.EU.net!mcsun!uknet!bnr.co.uk!bnrgate!nrcnet0!cunews!torn!cs.utexas.edu!sun-barr!decwrl!pa.dec.com!reid
- From: reid@decwrl.DEC.COM (Brian Reid)
- Newsgroups: comp.sources.d,news.admin
- Subject: arbitron program (v2.4.4--last updated 20 Oct 1989)
- Keywords: arbitron, software
- Message-ID: <1992Nov2.155442.14118@PA.dec.com>
- Date: 2 Nov 92 15:54:42 GMT
- Sender: news@PA.dec.com (News)
- Organization: DEC Network Systems Laboratory
- Lines: 251
- Originator: reid@torrey.pa.dec.com
-
- This is the script for the "arbitron" system that is used to produce the
- data for the monthly USENET readership surveys in news.lists. It is not so
- much a "source" as it is a piece of USENET folklore, posted every month so
- that new people will be sure to find it. (CF the "History of USENET" articles
- posted every month). It is posted to these newsgroups rather than to a source
- group so that it will be widely available but will not be archived.
-
- #! /bin/sh
- # @(#)arbitron 2.4.4 10/20/89
- # arbitron -- this program produces rating sweeps for USENET.
- #
- # Usage: arbitron
- #
- # To use this program, edit the "configuration" section below so that the
- # information is correct for your site, and then run it. It will produce a
- # readership survey for your machine and mail that survey to decwrl.dec.com,
- # with a cc to you.
- #
- # To participate in the international monthly ratings sweeps,
- # run "arbitron" every month. I will run the statistics program on the first
- # day of each month; it will include any report that has reached it by that
- # time. To make sure your site's data is included, run the survey program no
- # later than the 20th day of each month.
- #
- # Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
- # Updated and bugfixed by
- # Spencer Thomas, U.of Utah
- # Geoff Kuenning, SAH Consulting
- # Updated to work with 2.10.1 and older news systems by
- # Lindsay Cleveland, AT&T Technologies/Bell Labs
- # Made to work with 16-bit address spaces by
- # Andy Walker, Maths Dept., University of Nottingham, UK
- # Nagging Bourne shell bug fixed by
- # Tom Donahue, Rabbit Software Corp
- # Various suggestions provided by:
- # Karl Tombre, CRIN, Vandoeuvre France
- # Dan Kolkowitz, Stanford
- # Paul Eggert, Unisys
- # Fixes for YP and NN by
- # Dan Kolkowitz, Stanford
- # Brent Chapman, Capital Market Technology
- #
- # Note that the results of this program are dependent on the rate at which
- # you expire news. If you are a small site that expires news rapidly, the
- # results may indicate fewer active readers than you actually have.
- #
- ###########################################################################
- # Configuration information. Edit this section to reflect your site data. #
- TMPDIR=/tmp
- NEWS=/usr/lib/news
- SPOOL=/usr/spool/news
-
- # Make a crude stab at determining the system type. If your installation has
- # only one type of system, you can edit out the "if" statement and just turn
- # this into an assignment statement of the correct value.
- if [ -d /usr/ucb ]
- then
- STYPE="bsd"
- else
- STYPE="usg"
- fi
-
- # Range of /etc/passwd UID's that represent actual people (rather than
- # maintenance accounts or daemons or whatever)
- lowUID=5
- highUID=9999
-
- # If you aren't running a distributed news system (nntpd & rrn, usually),
- # leave NEWSHOST blank. Else set it to the name of the host from which you
- # can rcp a copy of the active file.
- NEWSHOST=
-
- # uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
- summarypath="netsurvey@decwrl.dec.com $USER"
-
-
- # We need to find the uucp name of your host. If this code doesn't work,
- # then just put it in literally like this:
- # hostname="decwrl"
-
- case $STYPE in
- bsd) cmd='hostname || uuname -l';;
- sysv)cmd='uname -n || uuname -l || hostname';;
- *) cmd='uuname -l';;
- esac;
-
- hostname=`sh -c "$cmd" 2>&-`
-
- PATH=$NEWS:$PATH
- ############################################################################
- export PATH
- # ---------------------------------------------------------------------------
- trap exit 1 2 3 15
- trap 's=$?; rm -f $TMPDIR/arb.*.$$; exit $s' 0
- set `date`
- dat="$2$6"
- destination="${MAILER-mail} $summarypath"
-
- ################################
- # Here are several expressions, each of which figures out approximately how
- # many people use this machine. Comment out all but 1 of them; pick the one
- # you like best. Initially the most universal but least reliable of them is
- # uncommented.
- PASSWD=$TMPDIR/arb.passwd.$$
- (ypcat passwd || cat /etc/passwd) > $PASSWD
-
- # # ###### Scheme #1: fast but usually returns too big a number
- nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" <$PASSWD`
-
- # # ###### Scheme #2 (works with BSD systems)
- #nusers=`last | sed '/^wtmp begins/d; s/ .*//; /^$/d' | sort -u | wc -l`
-
- # # ###### Scheme #3 (works with USG systems)
- #nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`
-
- ################################
- #
- # Set up awk scripts; these are too large to pass as arguments on most
- # systems.
- #
- # This awk script generates the actual output report.
- # We use 'sed' to substitute in the shell variables to save ourselves
- # endless hassle trying to find quoting/backslashing problems.
- #
- # The input to this script consists of two types of lines (pre-sorted):
- #
- # (1) Active-file lines. These have four fields: newsgroup name,
- # first existing article, last article number, 'y' or 'n'
- # to allow/disallow posting.
- # mod.mac 00001 00001 y
- #
- # (2) .newsrc-derived lines. These have three fields: the newsgroup
- # name, the user name and the articles-read information. The latter
- # can be arbitrarily complex. It can also be arbitrarily long;
- # this can potentially break either awk or sed, in which
- # case the script will not work.
- # mod.map joe 1-199
- #
- # The script uses the type 1 lines to define the newsgroups
- # and their active article ranges. The .newsrc (type 2) lines are
- # then used to deduce which users are reading that group (a group
- # is being read if the last article seen is in that group's active
- # article range).
- #
- sed "/^#/d
- s/NUSERS/$nusers/g
- s/HOSTNAME/$hostname/g
- s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
- # makereport -- utility for "arbitron". Early versions were copied from a
- # similar script distributed with "subscribers.sh" by Blonder, McCreery, and
- # Herron.
- #
- BEGIN { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
- #
- # Active file line: dispose of previous group (if any), record group, and
- # record first and last article numbers. Set group's reader count to none.
- NF == 4 { if (grpname != "") {
- printf("%d %s\n",grpcount, grpname)
- }
- grpname = $1
- grpfirst = $3
- grplast = $2
- grpcount = 0
- }
- #
- # .newsrc line. Break out the final number, which is the last article that
- # has actually been read. This is a pretty good indicator of the person's
- # true interest in the group. If 'lastread' for the group is a current
- # (unexpired) article, record a reader for that group. Finally, record
- # the user as a "real" user of the news system.
- #
- NF == 3 { if ($1 != grpname) next;
- n1 = split($3, n2, "-")
- n3 = split(n2[n1], n4, ",")
- lastread = n4[n3]
- if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
- grpcount++
- if (realuser[$2] != 1) {
- realuser[$2] = 1
- realusers++
- }
- }
- }
- #
- # End of file. Print the report in 2 columns.
- END { printf("9999 Host\t\t%s\n","HOSTNAME")
- printf("9998 Users\t\t%d\n",NUSERS)
- printf("9997 NetReaders\t%d\n",realusers)
- printf("9996 ReportDate\t%s\n","DATE")
- printf("9995 SystemType\tnews-arbitron-2.4\n")
- # For reorganized network, report a group even if nobody reads it. This will
- # help us keep track of where the groups propagate.
- printf("%d %s\n",grpcount, grpname)
- }
- DOG
-
- cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
- BEGIN { seen["/"]=1; seen[""] = 1; }
- { if (seen[$6]!=1) {
- printf("if [ -r %s/.nn/rc ] ; then ", $6)
- printf("sed -n '/^+/s/^. \([0-9]*\) \(.*\)/\2 %s \1/p'",$1)
- printf(" <%s/.nn/rc;\n",$6)
- printf("elif [ -r %s/.newsrc ] ; then ", $6)
- printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
- seen[$6]=1;
- }
- }
- MOUSE
-
- # First, make sure we have an active file
- if [ -z "$NEWSHOST" ]
- then ACTIVE=$NEWS/active
- else ACTIVE=/tmp/arb.active.$$
- rcp $NEWSHOST:$NEWS/active $ACTIVE
- fi
-
- if [ ! -s $ACTIVE ]
- then
- echo arbitron: ACTIVE file missing or empty. Cannot continue. >&2
- (exit 1); exit
- fi
-
- # Next, get the list of .newsrc files with duplicates and unreadable files
- # removed.
- awk -F: -f $TMPDIR/arb.pwd.$$ <$PASSWD | sh >$TMPDIR/arb.tmp.$$
-
- # Check to make sure that we found some
- if [ -s $TMPDIR/arb.tmp.$$ ]
- then # See if "active" file has 4 fields or only two (pre-2.10.2)
- set `sed 1q < $ACTIVE`
- if [ $# -eq 2 ]
- then egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE |
- while read group last
- do dir=`echo "$group" | sed 's;\.;/;g'`
- first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
- case $STYPE in
- usg) echo "$group $last ${first:-$last} X";;
- *) echo "$group $last ${first-$last} X"
- esac
- done
- else egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE
- fi |
- sort - $TMPDIR/arb.tmp.$$ |
- awk -f $TMPDIR/arb.fmt.$$ |
- sort -nr |
- sed '/^$/d
- s/^999[0-9] //' |
- $destination
- else echo arbitron: Unable to find any readable .newsrc files >&2
- (exit 1); exit
- fi
-