home *** CD-ROM | disk | FTP | other *** search
- /* inpaths.c -- track the paths of incoming news articles and prepare
- * in a format suitable for decwrl pathsurveys
- *
- *
- * This program inputs a list of filenames of news articles, and outputs a
- * data report which should be mailed to the decwrl Network Monitoring
- * Project at address "pathsurvey@decwrl.dec.com". Please run it once a month
- * if you can, in time so that the results arrive at decwrl by the 1st
- * day of the month.
- *
- *
- * Run it like this:
- *
- * cd /usr/spool/news
- * find . -type f -print | inpaths "yourhost" | mail pathsurvey@decwrl.dec.com
- *
- * where "yourhost" is the host name of your computer, e.g. "decwrl".
- *
- * The input to "inpaths" must be a list of the file names of news articles,
- * relative to the spooling directory. "./news/config/2771" and
- * "news/config/2771" are both legal inputs, but "/usr/spool/news/config/2771"
- * is not. * If you have some other way of generating a list of news file
- * names, such as running a script over the history file, you can use that
- * instead. Inpaths handles crossposting regardless of which technique
- * you use.
- *
- * If you get an error message "no traffic found. Check $CWD", then the
- * problem is most likely that the path names you are giving it are not
- * relative to the spooling directory, e.g. you are feeding it lines like
- * "/usr/spool/news/news/config/2771" instead of "./news/config/2771"
- *
- * There are 3 options: -s, -m, and -l for short, medium, and long report.
- * The default is to produce a long report. If you are worried about mail
- * expenses you can send a shorter report. The long report is typically
- * about 50K bytes for a major site, and perhaps 25K bytes for a smaller
- * site.
- *
- * Brian Reid
- * V1 Sep 1986
- * V2.4 May 1989
- *
- * Special thanks to Mel Pleasant and Bob Thrush for significant help with
- * portability bugs.
- *
- */
-
- /* if you are compiling on a USG machine (SysV, etc),
- please uncomment the following line: */
-
- /* #define SYSV */
-
-
-
- #define VERSION "2.4"
- #include <stdio.h>
- #include <fcntl.h>
- #include <ctype.h>
- #include <sys/types.h>
- #include <sys/stat.h>
-
- #define HEADBYTES 1024
-
- #ifdef SYSV
- long time();
- #else SYSV
- time_t time();
- #endif SYSV
-
- extern int exit();
- extern char *malloc();
- extern char *strcpy();
-
- /* this is index() or strchr() included here for portability */
-
- char *index(ptr,chr)
- char *ptr,chr;
- {
- do {if (*ptr==chr) return(ptr);} while (*ptr++);
- return ( (char *) NULL);
- }
-
- main (argc,argv)
- int argc;
- char **argv;
- {
- char linebuf[1024], jc, *lptr, *cp, *cp1, *cp2;
- char rightdelim;
- char *pathfield, *groupsfield;
- int crossposted;
- char artbuf[HEADBYTES], ngfilename[256];
- struct stat statbuf, *sbptr;
- char *scanlimit;
- char *hostname;
- char hostString[128];
- int needHost;
- static int passChar[256];
- int isopen,columns,verbose,totalTraffic;
- long nowtime,age,agesum;
- float avgAge;
-
- /* definitions for getopt */
- extern int optind;
- extern char *optarg;
-
- /* structure used to tally the traffic between two hosts */
- typedef struct trec {
- struct trec *rlink;
- struct nrec *linkid;
- int tally;
- } ;
-
- /* structure to hold the information about a host */
- typedef struct nrec {
- struct nrec *link;
- struct trec *rlink;
- char *id;
- long sentto; /* tally of articles sent to somebody from here */
- } ;
- struct nrec *hosthash[128], *hnptr, *list, *relay;
- struct trec *rlist;
- int i, article, gotbytes, c;
- extern errno;
-
- hostname = "unknown";
- verbose = 2;
- while (( c=getopt(argc, argv, "sml" )) != EOF)
- switch (c) {
- case 's': verbose=0; break;
- case 'm': verbose=1; break;
- case 'l': verbose=2; break;
- case '?': fprintf(stderr,
- "usage: %s [-s] [-m] [-l] hostname\n",argv[0]);
- exit(1);
- }
- if (optind < argc) {
- hostname = argv[optind];
- } else {
- fprintf(stderr,"usage: %s [-s] [-m] [-l] `hostname`\n",argv[0]);
- exit(1);
- }
-
- fprintf(stderr,"computing %s inpaths for host %s\n",
- verbose==0 ? "short" : (verbose==1 ? "medium" : "long"),hostname);
- for (i = 0; i<128; i++) hosthash[i] = (struct nrec *) NULL;
-
- /* precompute character types to speed up scan */
- for (i = 0; i<=255; i++) {
- passChar[i] = 0;
- if (isalpha(i) || isdigit(i)) passChar[i] = 1;
- if (i == '-' || i == '.' || i == '_') passChar[i] = 1;
- }
- totalTraffic = 0;
- nowtime = (long) time(0L);
- agesum = 0;
-
- while (gets(linebuf) != (char *) NULL) {
- lptr = linebuf;
- isopen = 0;
-
- /* Skip blank lines */
- if (linebuf[0] == '\0') goto bypass;
-
- /* Skip files that do not have pure numeric names */
- i = strlen(lptr)-1;
- do {
- if (!isdigit(linebuf[i])) {
- if (linebuf[i]=='/') break;
- goto bypass;
- }
- i--;
- } while (i>=0);
-
- /* Open the file for reading */
- article = open(lptr, O_RDONLY);
- isopen = (article > 0);
- if (!isopen) goto bypass;
- sbptr = &statbuf;
- if (stat(lptr, sbptr) == 0) {
-
- /* Record age of file in hours */
- age = (nowtime - statbuf.st_mtime) / 3600;
- agesum += age;
- /* Reject names that are not ordinary files */
- if ((statbuf.st_mode & S_IFREG) == 0) goto bypass;
- /* Pick the file name apart into an equivalent newsgroup name */
- if (*lptr == '.') {
- lptr++;
- if (*lptr == '/') lptr++;
- }
- cp = ngfilename;
- while (*lptr != 0) {
- if (*lptr == '/') *cp++ = '.';
- else *cp++ = *lptr;
- lptr++;
- }
- cp--; while (isdigit(*cp)) *cp-- = NULL;
- if (*cp == '.') *cp = NULL;
- } else goto bypass;
-
- /* Read in the first few bytes of the article; find the end of the header */
- gotbytes = read(article, artbuf, HEADBYTES);
- if (gotbytes < 10) goto bypass;
-
- /* Find "Path:" header field */
- pathfield = (char *) 0;
- groupsfield = (char *) 0;
- scanlimit = &artbuf[gotbytes];
- for (cp=artbuf; cp <= scanlimit; cp++) {
- if (*cp == '\n') break;
- if (pathfield && groupsfield) goto gotpath;
- if (strncmp(cp, "Path: ", 6) == 0) {
- pathfield = cp; goto nextgr;
- }
- if (strncmp(cp, "Newsgroups: ", 12) == 0) {
- groupsfield = cp; goto nextgr;
- }
- nextgr:
- while (*cp != '\n' && cp <= scanlimit) cp++;
- }
- if (groupsfield == (char *) 0 || (pathfield == (char *) 0))
- goto bypass;
-
- gotpath: ;
-
- /* Determine the name of the newsgroup to which this is charged. It is not
- necessarily the name of the file in which we found it; rather, use the
- "Newsgroups:" field. */
-
- crossposted = 0;
- groupsfield += 12; /* skip 'Newsgroups: ' */
- while (*groupsfield == ' ') groupsfield++;
- cp= (char *) index(groupsfield,'\n'); *cp = 0;
- cp=(char *) index(groupsfield,',');
- if (cp) {
- crossposted++;
- *cp = 0;
- }
-
- /* To avoid double-billing, only charge the newsgroup if the pathname matches
- the contents of the Newsgroups: field. This will also prevent picking up
- junk and control messages.
- */
- if (strcmp(ngfilename,groupsfield)) goto bypass;
-
- /* Extract all of the host names from the "Path:" field and put them in our
- host table. */
- cp = pathfield;
- while (*cp != NULL && *cp != '\n') cp++;
- if (cp == NULL) {
- fprintf(stderr,"%s: end of Path line not in buffer.\n",lptr);
- goto bypass;
- }
-
- totalTraffic++;
- *cp = 0;
- pathfield += 5; /* skip 'Path:' */
- cp1 = pathfield;
- relay = (struct nrec *) NULL;
- rightdelim = '!';
- while (cp1 < cp) {
- /* get next field */
- while (*cp1=='!') cp1++;
- cp2 = ++cp1;
- while (passChar[(int) (*cp2)]) cp2++;
-
- rightdelim = *cp2; *cp2 = 0;
- if (rightdelim=='!' && *cp1 != (char) NULL) {
- /* see if already in the table */
- list = hosthash[*cp1];
- while (list != NULL) {
- /*
- * Attempt to speed things up here a bit. Since we hash
- * on the first char, we see if the second char is a match
- * before calling strcmp()
- */
- if (list->id[1] == cp1[1] && !strcmp(list->id, cp1)) {
- hnptr = list;
- break; /* I hate unnecessary goto's */
- }
- list = list->link;
- }
- if(list == NULL) {
- /* get storage and splice in a new one */
- hnptr = (struct nrec *) malloc(sizeof (struct nrec));
- hnptr->id = (char *) strcpy(malloc(1+strlen(cp1)),cp1);
- hnptr->link = hosthash[*cp1];
- hnptr->rlink = (struct trec *) NULL;
- hnptr->sentto = (long) 0;
- hosthash[*cp1] = hnptr;
- }
- }
- /*
- At this point "hnptr" points to the host record of the current host. If
- there was a relay host, then "relay" points to its host record (the relay
- host is just the previous host on the Path: line. Since this Path means
- that news has flowed from host "hnptr" to host "relay", we want to tally
- one message in a data structure corresponding to that link. We will
- increment the tally record that is attached to the source host "hnptr".
- */
-
- if (relay != NULL && relay != hnptr) {
- rlist = relay->rlink;
- while (rlist != NULL) {
- if (rlist->linkid == hnptr) goto have2;
- rlist = rlist->rlink;
- }
- rlist = (struct trec *) malloc(sizeof (struct trec));
- rlist->rlink = relay->rlink;
- relay->rlink = rlist;
- rlist->linkid = hnptr;
- rlist->tally = 0;
-
- have2: rlist->tally++;
- hnptr->sentto++;
- }
-
- cp1 = cp2;
- relay = hnptr;
- if (rightdelim == ' ' || rightdelim == '(') break;
- }
- bypass: if (isopen) close(article) ;
- }
- /* Now dump the host table */
- if (!totalTraffic) {
- fprintf(stderr,"%s: error--no traffic found. Check $CWD.\n",argv[0]);
- exit(1);
- }
-
- avgAge = ((double) agesum) / (24.0*(double) totalTraffic);
- printf("ZCZC begin inhosts %s %s %d %d %3.1f\n",
- VERSION,hostname,verbose,totalTraffic,avgAge);
- for (jc=0; jc<127; jc++) {
- list = hosthash[jc];
- while (list != NULL) {
- if (list->rlink != NULL) {
- if (verbose > 0 || (100*list->sentto > totalTraffic))
- printf("%ld\t%s\n",list->sentto, list->id);
- }
- list = list->link;
- }
- }
- printf("ZCZC end inhosts %s\n",hostname);
-
- printf("ZCZC begin inpaths %s %s %d %d %3.1f\n",
- VERSION,hostname,verbose,totalTraffic,avgAge);
- for (jc=0; jc<127; jc++) {
- list = hosthash[jc];
- while (list != NULL) {
- if (verbose > 1 || (100*list->sentto > totalTraffic)) {
- if (list->rlink != NULL) {
- columns = 3+strlen(list->id);
- sprintf(hostString,"%s H ",list->id);
- needHost = 1;
- rlist = list->rlink;
- while (rlist != NULL) {
- if (
- (100*rlist->tally > totalTraffic)
- || ((verbose > 1)&&(5000*rlist->tally>totalTraffic))
- ) {
- if (needHost) printf("%s",hostString);
- needHost = 0;
- relay = rlist->linkid;
- if (columns > 70) {
- printf("\n%s",hostString);
- columns = 3+strlen(list->id);
- }
- printf("%d Z %s U ", rlist->tally, relay->id);
- columns += 9+strlen(relay->id);
- }
- rlist = rlist->rlink;
- }
- if (!needHost) printf("\n");
- }
- }
- list = list->link;
- }
- }
- printf("ZCZC end inpaths %s\n",hostname);
- fclose(stdout);
- exit(0);
- }
-