home *** CD-ROM | disk | FTP | other *** search
- /*
- ** general purpose file packer, Copyright (C) Randy Nevin 1989, 1990.
- **
- ** you may give this software to anyone, make as many copies as you like, and
- ** post it on public computer bulletin boards and file servers. you may not
- ** sell it or charge any fee for distribution (except for media and postage),
- ** remove this comment or the copyright notice from the code, or claim that
- ** you wrote this code or anything derived from it. you may modify the code as
- ** much as you want (please document clearly with comments, and maintain the
- ** coding style), but programs which are derived from this one are subject to
- ** the conditions stated here. i am providing this code so that people can
- ** learn from it, so if you distribute it, please include source code, not
- ** just executables. contact me to report bugs or suggest enhancements; i do
- ** not guarantee support, but i will make an effort to help you, and i want to
- ** act as a central clearing house for future versions. you should contact me
- ** before undertaking a significant development effort, to avoid reinventing
- ** the wheel. if you come up with an enhancement you consider particularly
- ** useful, i would appreciate being informed so that it can be incorporated in
- ** future versions. my address is: Randy Nevin, 1731 211th PL NE, Redmond,
- ** WA 98053, USA. this code is available directly from the author; just send a
- ** 360k floppy and a self-addressed floppy mailer with sufficient postage.
- **
- ** HISTORY
- ** (name date description)
- ** ----------------------------------------------------
- ** randy nevin 7/16/89 initial version
- ** randy nevin 8/15/89 released version 1.00
- */
-
- /* BUGBUG: doesn't handle files with only one byte value */
-
- /*
- ** use huffman encoding to find the most frequently used bytes. do this by
- ** scanning the input file once, counting the number of times each byte
- ** occurs. then build a frequency-weighted binary tree. tree traversal will
- ** assign a bit sequence to each byte. for files in which a few bytes occur
- ** many times, those bytes are near the top of the tree, and thus have a short
- ** encoding scheme. then scan the input file again, writing out the binary
- ** tree and the encoded input to the output file.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-
- #define MAX 32 /* use a maximum of 32 bits per byte */
-
- struct node { /* tree node for huffman encoding */
- long count;
- int byteval;
- char buf[MAX+1];
- char pad;
- struct node *left, *right, *nxtnode;
- } *head = NULL, *tail = NULL;
-
- long table[0x100]; /* frequency table */
- struct node *ptab[0x100]; /* pointers to the corresponding nodes */
-
- void main( int, char *[] );
- struct node *getmin( void );
- void walk0( struct node *, char [], int );
- void walk1( struct node *, FILE * );
- void walk2( struct node *, FILE * );
- void walk3( struct node * );
- void initbit( void );
- void flushbit( FILE * );
- void outbit( int, FILE * );
-
- void main ( argc, argv )
- int argc;
- char *argv[];
- {
- char *self, *t;
- int c, i;
- FILE *fin, *fout;
- struct node *p, *q, *r;
- char buf[MAX+1];
- long total;
-
- printf( "Copyright (C) Randy Nevin, 1989, 1990. Version 1.00\n" );
- printf( "See source code for rights granted.\n\n" );
- self = argv[0];
- /* get rid of initial part of path */
- if ((t = strrchr( self, '\\' )) || (t = strrchr( self, ':' )))
- self = ++t;
- /* get rid of extension */
- if ((t = strrchr( self, '.' )) && !stricmp( t, ".EXE" ))
- *t = 0;
- if (argc != 3) { /* need infile and outfile */
- fprintf( stderr, "usage: %s infile outfile\n", self );
- exit( -1 );
- }
- if (!(fin = fopen( argv[1], "rb" ))) {
- fprintf( stderr, "can't open %s\n", argv[1] );
- exit( -1 );
- }
- if (!(fout = fopen( argv[2], "wb" ))) {
- fprintf( stderr, "can't open %s\n", argv[2] );
- exit( -1 );
- }
- /* initialize the byte count table */
- for (i = 0; i < 0x100; i++)
- table[i] = 0;
- /* count the bytes */
- for (total = 0, c = getc( fin ); c != EOF; total++, c = getc( fin ))
- table[c] += 1L;
- putc( 1, fout ); /* encoding type (1=huffman) */
- putc( (char)total, fout );
- putc( (char)(total >> 8), fout );
- putc( (char)(total >> 16), fout );
- putc( (char)(total >> 24), fout );
- fseek( fin, 0L, SEEK_SET ); /* rewind input file for second pass */
- /* build a node for each byte value, and put them in a list */
- for (i = 0; i < 0x100; i++)
- if (table[i]) {
- if (!(p = (struct node *)malloc(
- sizeof(struct node) ))) {
- fprintf( stderr, "out of memory\n" );
- exit( -1 );
- }
- p->count = table[i];
- p->byteval = i;
- p->left = p->right = p->nxtnode = NULL;
- if (tail)
- tail->nxtnode = p;
- else
- head = p;
- tail = p;
- }
- /* collapse forest to a single tree */
- while (head && head->nxtnode) {
- p = getmin();
- q = getmin();
- if (!(r = (struct node *)malloc( sizeof(struct node) ))) {
- fprintf( stderr, "out of memory\n" );
- exit( -1 );
- }
- r->count = p->count + q->count;
- r->left = p;
- r->right = q;
- r->nxtnode = NULL;
- if (tail)
- tail->nxtnode = r;
- else
- head = r;
- tail = r;
- }
- buf[0] = 0;
- walk0( head, buf, 0 ); /* assign encoded bit strings */
- initbit();
- walk1( head, fout ); /* write out tree skeleton */
- flushbit( fout );
- walk2( head, fout ); /* write out byte values */
- walk3( head ); /* set pointers to corresponding nodes */
- initbit();
- while ((c = getc( fin )) != EOF) { /* encode input file */
- for (t = ptab[c]->buf; *t; t++)
- outbit( (*t == '0') ? 0 : 1, fout );
- }
- flushbit( fout );
- if (ferror( fout ))
- fprintf( stderr, "output error; disk might be full\n" );
- fclose( fout );
- exit( 0 );
- }
-
- struct node *getmin () { /* fetch & remove smallest-weighted element */
- struct node *p, *q;
-
- for (p = head, q = p->nxtnode; q; q = q->nxtnode)
- if (q->count < p->count) /* find smallest */
- p = q;
- if (p == head) {
- if (p == tail)
- head = tail = NULL;
- else {
- head = p->nxtnode;
- p->nxtnode = NULL;
- }
- }
- else {
- for (q = head; q && q->nxtnode != p; q = q->nxtnode)
- ;
- if (!q) {
- fprintf( stderr, "internal error\n" );
- exit( -1 );
- }
- q->nxtnode = p->nxtnode;
- p->nxtnode = NULL;
- if (p == tail)
- tail = q;
- }
- return( p );
- }
-
- void walk0 ( p, buf, level ) /* assign encoded bit strings */
- struct node *p;
- char buf[];
- int level;
- {
- if (!(p->left) && !(p->right)) {
- strcpy( p->buf, buf );
- /* printf( "str[0x%02X] = %s\n", p->byteval, buf ); */
- }
- else if (level < MAX) {
- buf[level] = '0';
- buf[level+1] = 0;
- walk0( p->left, buf, level+1 );
- buf[level] = '1';
- buf[level+1] = 0;
- walk0( p->right, buf, level+1 );
- buf[level] = 0;
- }
- else {
- fprintf( stderr, "error: skewed tree\n" );
- exit( -1 );
- }
- }
-
- void walk1 ( p, fout ) /* output skeletal tree structure */
- struct node *p;
- FILE *fout;
- {
- /* a 1 bit indicates a node has children; 0 means no children */
- if (p->left) {
- outbit( 1, fout );
- walk1( p->left, fout );
- walk1( p->right, fout );
- }
- else
- outbit( 0, fout );
- }
-
- void walk2 ( p, fout ) /* write out byte values from the tree */
- struct node *p;
- FILE *fout;
- {
- if (p->left) {
- walk2( p->left, fout );
- walk2( p->right, fout );
- }
- else
- putc( (char)(p->byteval), fout );
- }
-
- void walk3 ( p ) /* set pointers to corresponding nodes */
- struct node *p;
- {
- if (p->left) {
- walk3( p->left );
- walk3( p->right );
- }
- else
- ptab[p->byteval] = p;
- }
-
- static int shift; /* how far to shift next bit */
- static char byte; /* the byte buffer */
-
- void initbit () { /* initialize bit output buffer */
- byte = 0;
- shift = 0;
- }
-
- void flushbit ( fp ) /* flush bit output buffer */
- FILE *fp;
- {
- if (shift) /* buffer non-empty? */
- putc( byte, fp ); /* yes, output partial byte */
- }
-
- void outbit( bit, fp ) /* output a bit using byte buffering */
- int bit;
- FILE *fp;
- {
- byte |= ((char)bit << shift);
- if (shift == 7) {
- putc( byte, fp );
- byte = 0;
- shift = 0;
- }
- else
- shift++;
- }
-