home *** CD-ROM | disk | FTP | other *** search
- From: gtoal@tharr.UUCP (Graham Toal)
- Newsgroups: alt.sources
- Subject: code for reading from .Z files (with free zcat :-) )
- Message-ID: <1422@tharr.UUCP>
- Date: 15 Nov 90 00:59:40 GMT
-
- Archive-name: zlib.shr
-
- This posting consists of a set of routines which roughly simulate fopen,
- fgetc, fgets, and fclose. The difference between these and the originals is
- that these will read data from a .Z compressed file, decompressing it on the
- fly. It does *not* uses pipes, processes, or intermediate files. This makes
- it useful to add to any programs which read large text files sequentially.
-
- An example of this might be a version of LaTeX which read its .sty files in
- compressed form -- it satisfies the following criteria: 1) the files are read
- sequentially; 2) the files are read from *much* more often than they are
- written to.
-
- I passed this code around a couple of years back, and forgot about it since.
- I recently had to resurrect it, and have taken the chance to fix a couple of
- bugs which had surfaced in the mean time, and to port it to MSDOS. (Of course
- it still works on Unix or any standard ANSI C system)
-
- I include as a test program a simple version of zcat; someone was asking
- recently for a small uncompress program; well this is it.
-
- The source is heavily based on the original compress. I've removed as much
- unneccesary code as I could get away with, and simplified many expressions to
- get them through the dismal MSDOS compilers.
-
- Any comments/bug reports to me; Graham Toal <gtoal@ed.ac.uk>
-
- #!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # zcat.c
- # zlib.h
- # zlib.c
- cat - << \SHAR_EOF > zcat.c
- #include <stdio.h>
- #include "zlib.h"
- /*#include "zlib.c"*/ /* Written so it can be either included or linked in */
-
- /* This part is optional... you probably wouldn't do this in real life */
- #define FILE ZFILE
- #define fgetc(in) zfgetc(in)
- #define fopen(f, m) zfopen(f, m)
- #define fclose(f) zfclose(f)
-
- #ifndef __STDC__
- int main(argc, argv)
- int argc;
- char **argv;
- #else
- int main(int argc, char **argv)
- #endif
- {
- FILE *in;
- int i, c;
-
- if (argc == 1) {
- in = zfilter(stdin);
- for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
- fclose(in);
- } else if (argc > 1) {
- for (i = 1; i < argc; i++) {
- in = fopen(argv[i], "r");
- if (in != NULL) {
- for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
- fclose(in);
- } else {
- fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
- }
- }
- }
- return(0);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > zlib.h
- #ifndef _ZLIB_H
- #define _ZLIB_H 1
-
- #ifdef MSDOS
- #define PC_HUGE huge /* Microsoft C and contemptibles */
- #else
- #define PC_HUGE
- #endif
-
-
- #define ZEXT ".Z"
-
- #ifdef __arm
- #undef ZEXT
- #define ZEXT "-z"
- #endif
-
- typedef struct zfiletype {
- #define Z_BITS 16
- #define Z_MAXBUF 256
- FILE *file;
- int flags;
- int n_bits; /* number of bits/code */
- int maxbits; /* user settable max # bits/code */
- long maxcode; /* maximum code, given n_bits */
- long free_ent; /* first unused entry */
- int block_compress;
- int clear_flg;
-
- long stackp;
- long finchar;
- long code, oldcode, incode;
- int offset, size;
- unsigned char buf[Z_BITS]; /* Passed to getcode */
- unsigned char PC_HUGE *tab_suffixof; /* There is a flag bit to say whether */
- long PC_HUGE *tab_prefixof; /* these have been allocated. */
- int init;
-
- int bufput, bufget, bufend;
- unsigned char buff[Z_MAXBUF];
- int c1, c2;
- int zeof;
- } ZFILE;
-
- #ifndef __STDC__
- ZFILE *zfopen(/* char *fileptr, char *how */);
- void zfclose(/* ZFILE *z */);
- ZFILE *zfilter(/* FILE *f */);
- int zfgetc(/* ZFILE *z */);
- int zfeof(/* ZFILE *z */);
- char *zfgets(/* char *line, int len, ZFILE *zfp */);
- #else
- ZFILE *zfopen(char *fileptr, char *how);
- void zfclose(ZFILE *z);
- ZFILE *zfilter(FILE *f);
- int zfgetc(ZFILE *z);
- int zfeof(ZFILE *z);
- char *zfgets(char *line, int len, ZFILE *zfp);
- #endif /* Not __STDC__ */
- #endif
- SHAR_EOF
- cat - << \SHAR_EOF > zlib.c
- /*#define MAIN*/
- /*int debug = 1;*/
- /*#define DEBUG 1*/
-
- /* These wondrous debugging macros helped me find the nasty bug which
- only manifested itself on msdos -- stackp has to be a long on msdos
- because the array it is indexing is 'huge' ... */
- #ifdef DEBUG
- #define TRACT(lev, stmnt) \
- if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
- #define TRACE(lev, stmnt) \
- if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); stmnt
- #define TRACA(lev, stmnt) \
- stmnt; if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
- #define TRACL(lev, var) \
- if (lev <= debug) fprintf(stderr, "%d: %s <- %ld\n", __LINE__, #var, var);
- #else
- #define TRACT(lev, stmnt)
- #define TRACE(lev, stmnt) stmnt
- #define TRACA(lev, stmnt) stmnt
- #define TRACL(lev, var)
- #endif
- /*
- *
- * Originally:
- *
- * compress.c - File compression ala IEEE Computer, June 1984.
- *
- * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
- * Jim McKie (decvax!mcvax!jim)
- * Steve Davies (decvax!vax135!petsd!peora!srd)
- * Ken Turkowski (decvax!decwrl!turtlevax!ken)
- * James A. Woods (decvax!ihnp4!ames!jaw)
- * Joe Orost (decvax!vax135!petsd!joe)
- *
- * $Header: zlib.c,v 4.1 90/11/12 14:52:24 gtoal Release $
- *
- * Graham Toal, 3rd September 1988. My changes released to public domain.
- * Updated Nov 90.
- *
- * The original decompress has been restructured so that data can be
- * fetched on demand a byte at a time. This lets it be used as a filter
- * for programs which read large data files - you do not need the disk
- * space to decompress the input files first.
- *
- * (Incidentally, programs reading data off floppies will be speeded up
- * because decompression is always faster than the equivalent amount
- * of disk I/O).
- *
- * This implementation supplies 'z' versions of fopen, fputc, feof and fclose
- * to be used as direct substitutes for the originals; it would be cleaner
- * and more transparent if the decompress filter were hidden under the
- * real stdio procedures. An extra call zfilter() is supplied to convert
- * an already-opened stream into a z-stream: see the example at the end
- * of this file.
- *
- * If a file opened by zfopen() was not compressed, the files contents are
- * still recovered correctly at the low expense of an extra procedure call
- * per byte. This makes the routines more generally usable - they can be
- * left in production programs which can be speeded up in the field by
- * compressing selected input files(*); also, files can be compressed or
- * not selectively depending on whether the compression makes them
- * smaller or not - code accessing the files does not need to know.
- *
- * [(*) reading from a compressed file off floppy disk is faster than
- * reading from an uncompressed file. This probably isn't true of
- * hard disks though.]
- *
- * BUGS: Opening a file "r" will not do CR/LF processing on computers with
- * this file structure.
- */
-
- #include <stdio.h>
- #include <string.h>
- #ifdef __STDC__
- #include <stdlib.h>
- #else
- #define size_t int
- #endif
- #include <ctype.h>
-
- #ifdef MSDOS
- #include <malloc.h>
- #endif
-
- #ifndef min
- #define min(a,b) ((a>b) ? b : a)
- #endif
- #define HSIZE 69001L /* 95% occupancy */
-
- /*
- * the next two codes should not be changed lightly, as they must not
- * lie within the contiguous general code space.
- */
-
- #define FIRST 257L /* first free entry */
- #define CLEAR 256L /* table clear output code */
-
- #define BIT_MASK 0x1f
- #define BLOCK_MASK 0x80
- #define INIT_BITS 9 /* initial number of bits/code */
-
- #define CHECK_GAP 10000L/* ratio check interval */
-
- #include "zlib.h"
- #define NOT_COMPRESSED 1
- #define ALLOCATED 2
-
- #ifndef __STDC__
- static void decompress_more( /* register ZFILE *z */ );
- static long getcode( /* register ZFILE *z */ );
- #else
- static void decompress_more(register ZFILE *z);
- static long getcode(register ZFILE *z);
- #endif
-
- #ifndef __STDC__
- ZFILE *zfopen(fileptr, how)
- char *fileptr;
- char *how;
- #else
- ZFILE *zfopen(char *fileptr, char *how)
- #endif
- {
- register ZFILE *z;
-
- z = (ZFILE *) malloc(sizeof(ZFILE));
- z->flags = 0;
- z->maxbits = Z_BITS; /* user settable max # bits/code */
- z->free_ent = 0; /* first unused entry */
- z->block_compress = BLOCK_MASK;
- z->clear_flg = 0;
- z->init = 0;
-
- z->zeof = (0 != 0);
- z->c1 = EOF;
- z->c2 = EOF;
- z->bufput = 0;
- z->bufget = 0;
- z->bufend = Z_MAXBUF - 1;
-
- z->maxbits = Z_BITS; /* user settable max # bits/code */
-
- /* Open input file */
- if (*how == 'r') {
- z->file = fopen(fileptr, "rb");
- if (z->file == NULL) {
- char tempfname[256];
-
- strcpy(tempfname, fileptr);
- strcat(tempfname, ZEXT);
- z->file = fopen(tempfname, "rb");
- }
- } else {
- /* No compressed output yet, if ever... */
- /* Compress the file explicitly once it has been written */
- z->file = fopen(fileptr, how);
- z->flags |= NOT_COMPRESSED;
- }
- if (z->file == NULL) {
- free(z);
- z = NULL;
- }
- /* Check the magic number */
- if ((z != NULL)
- && ((fgetc(z->file) != 0x1F) || (fgetc(z->file) != 0x9D))) {
- z->flags |= NOT_COMPRESSED;
- fclose(z->file);
- z->file = fopen(fileptr, how);
- if (z->file == NULL) {
- free(z);
- z = NULL;
- }
- }
- if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
- return (z);
- z->maxbits = fgetc(z->file); /* set -b from file */
- z->block_compress = z->maxbits & BLOCK_MASK;
- z->maxbits &= BIT_MASK;
- if (z->maxbits > Z_BITS) {
- fprintf(stderr,
- "%s: compressed with %d bits; decompress can only handle %d bits\n",
- fileptr, z->maxbits, Z_BITS);
- exit(0);
- }
- return (z);
- }
-
- #ifndef __STDC__
- ZFILE *zfilter(f)
- FILE *f;
- #else
- ZFILE *zfilter(FILE *f)
- #endif
- {
- register ZFILE *z;
-
- z = (ZFILE *) malloc(sizeof(ZFILE));
- z->flags = 0;
- z->maxbits = Z_BITS; /* user settable max # bits/code */
- z->free_ent = 0; /* first unused entry */
- z->block_compress = BLOCK_MASK;
- z->clear_flg = 0;
- z->init = 0;
-
- z->zeof = (0 != 0);
- z->c1 = EOF;
- z->c2 = EOF;
- z->bufput = 0;
- z->bufget = 0;
- z->bufend = Z_MAXBUF - 1;
-
- z->maxbits = Z_BITS; /* user settable max # bits/code */
-
- /* Open input file */
- z->file = f;
- if (z->file == NULL) {
- free(z);
- z = NULL;
- }
- /* Check the magic number */
- if (z != NULL) {
- z->c1 = fgetc(z->file);
- z->c2 = fgetc(z->file);
- if ((z->c1 != 0x1F) || (z->c2 != 0x9D)) {
- z->flags |= NOT_COMPRESSED;
- }
- }
- if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
- return (z);
- z->maxbits = fgetc(z->file); /* set -b from file */
- z->block_compress = z->maxbits & BLOCK_MASK;
- z->maxbits &= BIT_MASK;
- if (z->maxbits > Z_BITS) {
- fprintf(stderr,
- "stdin compressed with %d bits; decompress can only handle %d bits\n",
- z->maxbits, Z_BITS);
- exit(0);
- }
- return (z);
- }
-
- #ifndef __STDC__
- int zfgetc(z)
- ZFILE *z;
- #else
- int zfgetc(ZFILE *z)
- #endif
- {
- int c;
-
- /*
- If buffer empty, and not end-of-file, call decompress_more(); return
- next in buffer.
- */
- if ((z->flags & NOT_COMPRESSED) != 0) {
- if ((c = z->c1) >= 0) {
- z->c1 = z->c2;
- z->c2 = EOF;
- return (c);
- }
- return (fgetc(z->file));
- }
- if ((z->bufget == z->bufput) && (!z->zeof)) {
- decompress_more(z);
- }
- z->zeof = (z->bufput == z->bufget);
- if (z->zeof) {
- if ((z->flags & ALLOCATED) != 0) {
- #ifdef MSDOS
- hfree(z->tab_suffixof);
- hfree(z->tab_prefixof);
- #else
- free(z->tab_suffixof);
- free(z->tab_prefixof);
- #endif
- z->flags &= (~ALLOCATED);
- }
- return (EOF);
- }
- c = z->buff[z->bufget];
- z->bufget++;
- return (c);
- }
-
- #ifndef __STDC__
- int zfeof(z)
- ZFILE *z;
- #else
- int zfeof(ZFILE *z)
- #endif
- {
- if ((z->flags & NOT_COMPRESSED) != 0) {
- if (z->c1 != EOF) {
- return (0 != 0);
- }
- return (feof(z->file));
- }
- return (z->zeof);
- }
-
- #ifndef __STDC__
- void zfclose(z)
- ZFILE *z;
- #else
- void zfclose(ZFILE *z)
- #endif
- {
- if (z == 0)
- return;
- if (z->zeof) {
- if ((z->flags & ALLOCATED) != 0) {
- #ifdef MSDOS
- hfree(z->tab_suffixof);
- hfree(z->tab_prefixof);
- #else
- free(z->tab_suffixof);
- free(z->tab_prefixof);
- #endif
- z->flags &= (~ALLOCATED);
- }
- }
- free(z);
- }
-
- #ifndef __STDC__
- char *zfgets(line, len, zfp)
- char *line;
- int len;
- ZFILE *zfp;
- #else
- char *zfgets(char *line, int len, ZFILE *zfp)
- #endif
- {
- /* I *hope* this is what fgets does - I only added it
- here when I came across a program that needed it; I'm
- including the '\n' in the string. */
- int c, pos = 0;
-
- for (;;) {
- c = zfgetc(zfp);
- if (c == EOF)
- return (NULL);
- c &= 255;
- line[pos] = (char) c;
- if (pos + 1 == len) /* Too long! */
- break;
- pos++;
- if (c == '\n')
- break;
- }
- line[pos] = '\0';
- return (line);
- }
-
- #ifndef __STDC__
- static void decompress_more(z)
- register ZFILE *z;
- #else
- static void decompress_more(register ZFILE *z)
- #endif
- {
- z->bufput = 0;
- z->bufget = 0;
-
- if (z->init != 0)
- goto resume;
- z->init = 1;
-
- z->offset = 0;
- z->size = 0;
- #ifdef MSDOS
- z->tab_suffixof =
- (unsigned char PC_HUGE *) halloc(HSIZE, sizeof(unsigned char));
- z->tab_prefixof =
- (long PC_HUGE *) halloc(HSIZE, sizeof(long));
- #else
- z->tab_suffixof =
- (unsigned char *) malloc((size_t) HSIZE * sizeof(unsigned char));
- z->tab_prefixof = (long *) malloc((size_t) HSIZE * sizeof(long));
- #endif
- z->flags |= ALLOCATED;
-
- z->n_bits = INIT_BITS;
- z->maxcode = ((1L << (z->n_bits)) - 1L);
- for (z->code = 255L; z->code >= 0L; z->code--) {
- z->tab_prefixof[z->code] = 0L;
- z->tab_suffixof[z->code] = (unsigned char) z->code;
- }
- z->free_ent = ((z->block_compress) ? FIRST : 256L);
-
- z->finchar = z->oldcode = getcode(z);
- if (z->oldcode == -1L)
- return; /* EOF already? */
- if (z->finchar < 0L || z->finchar >= 256L)
- fprintf(stderr, "****\n");
- z->buff[z->bufput] = (char) (z->finchar & 0xff);
- z->bufput++;
-
- z->stackp = 1L << Z_BITS; /* The 1L is for DOS huge arrays */
-
- while ((z->code = getcode(z)) != EOF) {
- if ((z->code == CLEAR) && z->block_compress) {
- for (z->code = 255; z->code >= 0; z->code--)
- z->tab_prefixof[z->code] = 0;
- z->clear_flg = 1;
- z->free_ent = FIRST - 1;
- if ((z->code = getcode(z)) == EOF)
- break; /* O, untimely death! */
- } /* if */
- z->incode = z->code;
- if (z->code >= z->free_ent) {
- z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
- z->stackp += 1L;
- z->code = z->oldcode;
- }
- while (z->code >= 256L) {
- z->tab_suffixof[z->stackp] = z->tab_suffixof[z->code];
- z->stackp += 1L;
- z->code = z->tab_prefixof[z->code];
- }
- z->finchar = z->tab_suffixof[z->code];
- z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
- z->stackp += 1L;
- do {
- long tmp;
-
- z->stackp -= 1L;
- tmp = z->tab_suffixof[z->stackp];
- z->buff[z->bufput++] = (unsigned char) (tmp & 255L);
- if (z->bufput == z->bufend) {
- return; /* Logically a setjmp/longjump, but this is
- more portable */
- resume:; /* jumped to here -- is jumping into a loop
- safe? */
- /* - or should I use jumps for the loop too? */
- } /* if */
- } while (z->stackp > (1L << Z_BITS));
- /* ^ This is why I changed stackp from a pointer. */
- /* Pointer comparisons can be dubious... */
- if ((z->code = z->free_ent) < (1L << z->maxbits)) {
- z->tab_prefixof[z->code] = z->oldcode;
- z->tab_suffixof[z->code] = (unsigned char) z->finchar;
- z->free_ent = z->code + 1;
- }
- z->oldcode = z->incode;
- } /* while */
- } /* decompress more */
-
- static unsigned char rmask[9] =
- {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
-
- #ifndef __STDC__
- static long getcode(z)
- register ZFILE *z;
- #else
- static long getcode(register ZFILE *z)
- #endif
- { /* Should be int!!! */
- register long code;
- register long r_off, bits;
- register int bp;
-
- bp = 0;
- if (z->clear_flg != 0 ||
- z->offset >= z->size ||
- z->free_ent > z->maxcode) {
- if (z->free_ent > z->maxcode) {
- z->n_bits++;
- if (z->n_bits == z->maxbits) {
- z->maxcode = (1L << z->maxbits); /* won't get any bigger now */
- } else {
- z->maxcode = ((1L << (z->n_bits)) - 1L);
- }
- }
- if (z->clear_flg != 0) {
- z->n_bits = INIT_BITS;
- z->maxcode = ((1L << (z->n_bits)) - 1L);
- z->clear_flg = 0;
- }
- z->size = fread(z->buf, 1, (size_t) z->n_bits, z->file);
- if (z->size <= 0) {
- fclose(z->file);
- return (EOF); /* end of file */
- }
- z->offset = 0;
- z->size = (z->size << 3) - (z->n_bits - 1);
- }
- r_off = z->offset;
- bits = z->n_bits;
- bp = bp + ((int) r_off >> 3);
- r_off = r_off & 7;
- code = ((long) z->buf[bp++] >> r_off);
- bits = bits - 8 + r_off;
- r_off = 8 - r_off; /* now, offset into code word */
- if (bits >= 8) {
- code = code | ((long) z->buf[bp++] << r_off);
- r_off = r_off + 8;
- bits = bits - 8;
- }
- code = code
- | ((long) ((long) (z->buf[bp]) & (long) rmask[bits]) << (long) r_off);
- z->offset = z->offset + z->n_bits;
- return (code);
- }
-
- #ifdef MAIN
-
- /* This part is optional... */
- #define FILE ZFILE
- #define fgetc(in) zfgetc(in)
- #define fopen(f, m) zfopen(f, m)
- #define fclose(f) zfclose(f)
-
- #ifndef __STDC__
- int main(argc, argv)
- int argc;
- char **argv;
- #else
- int main(int argc, char **argv)
- #endif
- {
- FILE *in;
- int i, c;
-
- if (argc == 1) {
- in = zfilter(stdin);
- for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
- zfclose(in);
- } else if (argc > 1) {
- for (i = 1; i < argc; i++) {
- in = fopen(argv[i], "r");
- if (in != NULL) {
- for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
- fclose(in);
- } else {
- fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
- }
- }
- }
- return (0);
- }
-
- #endif
- SHAR_EOF
-
- --
- (* Posted from tharr.uucp - Public Access Unix - +44 (234) 261804 *)
-