home *** CD-ROM | disk | FTP | other *** search
- From: em@dce.ie (Eamonn McManus)
- Newsgroups: news.software.b,alt.sources
- Subject: Re: unbatcher out of sync?
- Message-ID: <scratchmatch@dce.ie>
- Date: 15 Jan 91 23:34:28 GMT
-
- henry@zoo.toronto.edu (Henry Spencer) writes:
- >It means "something's wrong with your batch": relaynews did not find a
- >"#! rnews nnnnn" line where one should have been. Typically this means
- >garbling during preparation or transmission. One notorious trouble spot
- >is that the batch format cannot tolerate transformations of newlines to
- >CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on.
-
- We had `unbatcher out of sync' problems at a site I was involved in, which
- was fed its news by mail from a VMS site (ugh). The VMS mailer (PMDF) got
- confused when lines exceeded 256 characters, as References lines often do,
- and would make a total hash of the header when this happened. As a result,
- the "#! rnews" count would always be off by a small amount for the affected
- article. C News resyncs at the next "#! rnews" line, but if the count is
- too long for the actual article contents it will have missed the start of
- the article following the garbled one.
-
- To kludge around this problem I wrote a program `patchbatch' which zips
- through a news batch looking for "#! rnews" lines with incorrect counts.
- If it finds one, it hunts back and forth a small amount for the next "#!
- rnews" line and adjusts the incorrect one to point to it. This was
- surprisingly effective: while it was running I believe it never failed to
- correct a munged batch.
-
- I'm including the source of patchbatch in case it is of use to the original
- poster, or anyone else.
-
- ,
- Eamonn
-
- /* patchbatch.c - patch a news batch. */
-
- /* By Eamonn McManus <emcmanus@cs.tcd.ie>, February 1990.
- * This program is not copyrighted.
- *
- * Blast through a news batch checking the offsets after `#! rnews'.
- * If we find that the offset does not lead to another `#! rnews' line
- * or EOF, we search around for the line somewhere in the vicinity. If
- * it is found, we go back and patch the original offset to point to the
- * correct place. This is useful for example on systems where long lines
- * get truncated or split in transmission, since in this case the stated
- * offset will be wrong.
- *
- * This is the hackiest program I have written in a long time.
- */
-
- #include <stdio.h>
- #include <string.h>
- #include <sys/fcntl.h> /* For O_RDWR. */
- #include <sys/types.h>
- #include <sys/stat.h>
-
- extern long strtol();
-
- char verbose;
- extern int optind;
-
-
- main(argc, argv)
- char **argv;
- {
- int i, status;
- while ((i = getopt(argc, argv, "v")) != -1)
- switch (i) {
- case 'v':
- verbose = 1; break;
- default:
- goto usage;
- }
- if (optind == argc) {
- usage:
- fprintf(stderr, "Usage: patchbatch file [...]\n");
- exit(2);
- }
- status = 0;
- for (i = optind; i < argc; i++)
- if (patchbatch(argv[i]) < 0)
- status = 1;
- exit(status);
- }
-
-
- static char lead[] = "#! rnews ";
- #define LEADLEN (sizeof lead - 1)
- #define FUDGE (2 * sizeof lead)
-
- int patchbatch(name)
- char *name;
- {
- int fd, i;
- long here, offset;
- char buf[64];
- struct stat st;
- if ((fd = open(name, O_RDWR)) < 0) {
- perror(name);
- return -1;
- }
- if (fstat(fd, &st) < 0) {
- perror(name);
- return -1;
- }
- if ((i = read(fd, buf, sizeof buf - 1)) != sizeof buf - 1) {
- if (i < 0)
- perror(name);
- else fprintf(stderr, "%s: too short for a news batch\n");
- close(fd); return -1;
- }
- buf[sizeof buf - 1] = '\0';
- if (strncmp(buf, lead, LEADLEN) != 0) {
- fprintf(stderr, "%s: not a news batch (should start with %s)\n",
- name, lead);
- close(fd);
- return -1;
- }
- here = 0; i = 0;
- while (1) {
- char *p;
- int numsize;
- long artstart, newpos;
- offset = strtol(buf + LEADLEN, &p, 10);
- if (offset == 0) {
- fprintf(stderr,
- "%s: bad value after %s, file offset %ld\n",
- name, lead, here);
- close(fd);
- return -1;
- }
- numsize = p - (buf + LEADLEN);
- artstart = here + LEADLEN + numsize + 1/*\n*/;
- newpos = artstart + offset;
- if (newpos == st.st_size)
- return 0;
- else if (newpos > st.st_size) {
- char offstr[16];
- lastart:
- offset = st.st_size - artstart;
- changeoffset:
- sprintf(offstr + 1, "%ld", offset);
- switch (strlen(offstr + 1) - numsize) {
- case 0: /* Same size, just overwrite. */
- p = offstr + 1;
- break;
- case -1: /* Shorter, use leading 0. */
- p = offstr; *p = '0';
- break;
- case 1: /* Longer, oops. */
- fprintf(stderr, "%s: no room to change article \
- length to %ld, file offset %ld\n", name, offset, here);
- goto setnewpos;
- }
- lseek(fd, here + LEADLEN, 0);
- if (write(fd, p, numsize) < 0) {
- perror(name); return -1;
- }
- if (verbose)
- fprintf(stderr, "%s: changed article length to \
- %ld, file offset %ld\n", name, offset, here);
- setnewpos:
- newpos = artstart + offset;
- if (newpos >= st.st_size)
- return 0;
- } else { /* newpos < st.st_size */
- lseek(fd, newpos - FUDGE, 0);
- if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1)
- goto lastart;
- if (strncmp(buf + FUDGE, lead, LEADLEN) == 0) {
- strcpy(buf, buf + FUDGE); /* Hmmm... */
- here = newpos;
- continue;
- }
- for (p = buf; (p = strchr(p, lead[0])) != NULL; p++)
- if (strncmp(p, lead, LEADLEN) == 0)
- break;
- if (p == NULL) {
- fprintf(stderr, "%s: can't find next article \
- with offset %ld from file pos %ld\n", name, offset, here);
- close(fd); return -1;
- }
- offset = (newpos - FUDGE) + (p - buf) - artstart;
- goto changeoffset;
- }
- lseek(fd, newpos, 0);
- if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) {
- fprintf(stderr, "%s: last article too short\n", name);
- close(fd); return -1;
- }
- here = newpos;
- }
- }
-