home *** CD-ROM | disk | FTP | other *** search
- /*
- * pindex Brian Rice rice@dg-rtp.dg.com 1989-91
- *
- * This produces one or more indexed files from the raw input file.
- *
- * $Date: 91/07/03 12:29:00 $
- */
-
- static char rcsid[] = "$What: <@(#) pindex.c,v 2.3> $";
-
- #include <stdio.h>
- #define MY_FILENAME_LENGTH 64
- #define MAX_WORD_LEN 128
- #define USAGE "%s: usage: %s [ -number ] inputfile outputfile1 ...\n"
-
- #ifdef unix
- #define TEMPFILE_PATTERN "/tmp/index.%d.%d"
- #else
- #define TEMPFILE_PATTERN "index%d"
- #endif
-
- #ifndef TRUE
- #define TRUE 1
- #endif
-
- main(argc,argv)
- int argc;
- char *argv[];
- {
- long int bytecount = 0L;
- long int numrecs = 0L;
- long int headersize = 0L;
- long int sofar = 0L;
- char buff[MAX_WORD_LEN];
- char temp_filename[MY_FILENAME_LENGTH];
- char temp2_filename[MY_FILENAME_LENGTH];
- char *cmdname;
- static char *usage = USAGE;
- int lines_per_entry = 1;
- char *infilename;
- char **outfilenames;
- char **tempfilenames;
- FILE *raw, *thislen, *thisout;
- FILE **lenfiles;
- FILE **outfiles;
- char *malloc();
- int inx;
- int maxfiles();
-
- /* A bit of initialization. */
-
- cmdname = argv[0];
- argc--; argv++;
-
- if (argc == 0) {
- (void) fprintf(stderr,usage,cmdname,cmdname);
- exit(1);
- }
-
- /* What's next, the number of lines per entry or the
- input file name? */
-
- if (*argv[0] == '-') {
- lines_per_entry = atoi(argv[0]+1);
- argc--; argv++;
- }
-
- if (lines_per_entry > maxfiles() - 3) {
- fprintf(stderr, "%s: can't open that many files\n", cmdname);
- exit(2);
- }
-
- /* Let's grab some space for our dynamically allocated arrays. */
-
- outfilenames = (char **) malloc(lines_per_entry * sizeof (char *));
- tempfilenames = (char **) malloc(lines_per_entry * sizeof (char *));
- lenfiles = (FILE **) malloc(lines_per_entry * sizeof (FILE *));
- outfiles = (FILE **) malloc(lines_per_entry * sizeof (FILE *));
- if (outfilenames == NULL || tempfilenames == NULL ||
- lenfiles == NULL || outfiles == NULL) {
- (void) fprintf(stderr, "%s: malloc failed\n", cmdname);
- exit(2);
- }
-
- if (argc == 0) {
- (void) fprintf(stderr,usage,cmdname,cmdname);
- exit(1);
- }
-
- infilename = argv[0];
- argc--; argv++;
-
- if (argc == 0) {
- (void) fprintf(stderr,usage,cmdname,cmdname);
- exit(1);
- }
-
- /* This loop sets up our freshly-allocated arrays of output
- filenames and temporary-file names. */
-
- for (inx = 0; inx < lines_per_entry; inx++) {
- if (argc == 0) {
- (void) fprintf(stderr,
- "%s: too few filenames\n", cmdname);
- exit(1);
- }
- outfilenames[inx] = argv[0];
- tempfilenames[inx] = malloc(MY_FILENAME_LENGTH);
- if (tempfilenames[inx] == NULL) {
- (void) fprintf(stderr,"%s: malloc failed\n",cmdname);
- exit(2);
- }
- #ifdef unix
- (void) sprintf(tempfilenames[inx],
- TEMPFILE_PATTERN,inx,getpid());
- #else
- (void) sprintf(tempfilenames[inx],TEMPFILE_PATTERN,inx);
- #endif
- argc--; argv++;
- }
-
- if (argc > 0) {
- /* Since this program is destructive (i.e., it over-
- writes its output files), we'd better be cautious. */
- (void) fprintf(stderr,"%s: too many filenames\n",cmdname);
- exit(1);
- }
-
- /* First we go through the input file, noting the length of
- each input item. */
-
- raw = fopen(infilename,"r");
- if (raw == NULL) {
- (void) fprintf(stderr,"%s: can't open %s for reading\n",
- cmdname, infilename);
- exit(1);
- }
-
- for (inx = 0; inx < lines_per_entry; inx++) {
- lenfiles[inx] = fopen(tempfilenames[inx],"w");
- if (lenfiles[inx] == NULL) {
- (void) fprintf(stderr,"%s: can't open %s for writing\n",
- cmdname, tempfilenames[inx]);
- exit(1);
- }
- }
-
- /* Here we write the length of each item in turn to its
- temporary file... */
-
- while (TRUE) {
- for (inx = 0; inx < lines_per_entry; inx++) {
- bytecount = (long) fngetline(raw,buff,MAX_WORD_LEN);
- if (bytecount != 0) {
- (void) fwrite(&bytecount,
- sizeof bytecount,1,lenfiles[inx]);
- } else {
- if (inx == 0) {
- goto alldone; /* E.W. Dijkstra
- considered harmful */
- } else {
- (void) fprintf(stderr,
- "%s: premature eof on read\n",
- cmdname);
-
- exit(2);
- }
- }
- }
- numrecs++;
- }
-
- alldone:
- (void) fclose(raw);
- for (inx = 0; inx < lines_per_entry; inx++) {
- (void) fclose(lenfiles[inx]);
- }
-
- /* Now we'll write the index part of the output files. */
-
- headersize = (sizeof bytecount) * (numrecs + 1) + sizeof numrecs;
-
- for (inx = 0; inx < lines_per_entry; inx++) {
- sofar = headersize;
- thislen = fopen(tempfilenames[inx],"r");
- if (thislen == NULL) {
- (void) fprintf(stderr,"%s: can't open %s for reading\n",
- cmdname, tempfilenames[inx]);
- exit(1);
- }
- thisout = fopen(outfilenames[inx],"w");
- if (thisout == NULL) {
- (void) fprintf(stderr,"%s: can't open %s for writing\n",
- cmdname, outfilenames[inx]);
- exit(1);
- }
-
- /* Thing one is the number of records in the file. */
-
- (void) fwrite(&numrecs,sizeof numrecs,1,thisout);
-
- /* So suppose we have a forty-byte-long header, then the first
- item is six bytes long. The starting address of the
- first item will be 40 (the header will lie in 0-39). The
- next item will begin in location 44 (the first item will lie
- in locations 40-43). And so forth. */
-
- while (fread(&bytecount,
- sizeof bytecount,1,thislen) == 1) {
- (void) fwrite(&sofar,sizeof bytecount,1,thisout);
- sofar += bytecount;
- }
-
- /* Since the expected way of figuring out how long an item is
- to subtract its starting address from that of its successor,
- we have to write the starting address of an imaginary item
- one past the last one. Not to worry...if someone asks for
- that item, they've made a mistake-o. */
-
- (void) fwrite(&sofar,sizeof bytecount,1,thisout);
-
- (void) fclose(thislen);
- (void) unlink(tempfilenames[inx]);
- (void) fclose(thisout);
- }
-
- /* Finally, we copy the input records to the appropriate
- output files. */
-
- raw = fopen(infilename,"r");
- if (raw == NULL) {
- (void) fprintf(stderr,
- "%s: can't open %s for reading\n",
- cmdname, infilename);
- exit(1);
- }
-
- for (inx = 0; inx < lines_per_entry; inx++) {
- outfiles[inx] = fopen(outfilenames[inx],"a");
- }
-
- inx = 0;
- while (TRUE) {
- if ((bytecount = (long) fngetline(raw,buff,MAX_WORD_LEN))
- != 0) {
- (void) fwrite(buff,
- sizeof(char),bytecount,outfiles[inx]);
- inx++;
- if (inx == lines_per_entry) {
- inx = 0;
- }
- } else {
- if (inx == 0) {
- break;
- } else {
- fprintf(stderr,
- "%s: premature eof on read [A]\n",
- cmdname);
- exit(5);
- }
- }
- }
- (void) fclose(raw);
- for (inx = 0; inx < lines_per_entry; inx++) {
- (void) fclose(outfiles[inx]);
- }
-
- exit(0);
- }
-
-