home *** CD-ROM | disk | FTP | other *** search
-
- /*
- * COMPRESS.C
- *
- */
-
- #include "defs.h"
-
- #define ngetchar() oreadchar()
- #define nputchar(n) mputc(n)
-
- #ifndef min
- #define min(a,b) ((a>b) ? b : a)
- #endif
-
- #define BITS 13
-
- #ifdef _DCC
- #define HSIZE 9001
- #else
-
- #if BITS == 16
- #define HSIZE 69001 /* 95% occupancy */
- #endif
- #if BITS == 15
- #define HSIZE 35023 /* 94% occupancy */
- #endif
- #if BITS == 14
- #define HSIZE 18013 /* 91% occupancy */
- #endif
- #if BITS == 13
- #define HSIZE 9001 /* 91% occupancy */
- #endif
- #if BITS <= 12
- #define HSIZE 5003 /* 80% occupancy */
- #endif
-
- #endif
-
- typedef long code_int;
- typedef long count_int;
- typedef unsigned char char_type;
-
- #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
- #define INIT_BITS 9 /* initial number of bits/code */
-
- int n_bits; /* number of bits/code */
- int maxbits; /* user settable max # bits/code */
- code_int maxcode; /* maximum code, given n_bits */
- code_int maxmaxcode; /* should NEVER generate this code */
-
- __far count_int htab[HSIZE];
- __far uword codetab[HSIZE];
-
- #define htabof(i) htab[i]
- #define codetabof(i) codetab[i]
-
- code_int hsize = HSIZE; /* for dynamic table sizing */
-
- #define tab_prefixof(i) codetabof(i)
- #define tab_suffixof(i) ((char_type *)(htab))[i]
- #define de_stack ((char_type *)&tab_suffixof(1<<BITS))
-
- code_int free_ent; /* first unused entry */
- code_int getcode();
- void output (code_int);
- void cl_block(void);
- void cl_hash(count_int);
-
- #define CHECK_GAP 10000 /* ratio check interval */
-
- int block_compress = 1;
- int clear_flg;
- long ratio;
- count_int checkpoint;
-
- /*
- * the next two codes should not be changed lightly, as they must not
- * lie within the contiguous general code space.
- */
-
- #define FIRST 257 /* first free entry */
- #define CLEAR 256 /* table clear output code */
-
- static int offset;
- long int in_count = 1; /* length of input */
-
- /*
- * Compress a file to memory-buffers and return TRUE if the compressed
- * size is smaller than the actual size.
- */
-
- long
- CompressFile(name, fsize)
- char *name;
- long fsize;
- {
- long fcode;
- code_int i = 0;
- int c;
- code_int ent;
- int disp;
- code_int hsize_reg;
- int hshift;
-
- {
- register NODE *node;
- for (node = (NODE *)CSList.mlh_Head; node->ln_Succ; node = node->ln_Succ) {
- if (WildCmp(node->ln_Name, name)) {
- if (Verbose)
- printf(" Will not compress %s\n", name);
- return(0);
- }
- }
- }
-
- if (WildCmp("*.Z", name) || WildCmp("*.ARC", name) || WildCmp("*.ZOO", name)) {
- if (Verbose)
- printf(" Will not compress %s\n", name);
- return(0);
- }
-
- CLen = 0;
- CWrite = GetHead(&CList);
- if (CWrite == NULL)
- CWrite = NewSComp();
- CWrite->N = 0;
-
- setmem(htab, sizeof(htab), 0);
- setmem(codetab, sizeof(codetab), 0);
-
- hsize = HSIZE;
- if ( fsize < (1 << 12) )
- hsize = min ( 5003, HSIZE );
- else if ( fsize < (1 << 13) )
- hsize = min ( 9001, HSIZE );
- else if ( fsize < (1 << 14) )
- hsize = min ( 18013, HSIZE );
- else if ( fsize < (1 << 15) )
- hsize = min ( 35023, HSIZE );
- else if ( fsize < 47000 )
- hsize = min ( 50021, HSIZE );
-
- offset = clear_flg = ratio = 0;
- in_count = 1;
- checkpoint = CHECK_GAP;
- n_bits = INIT_BITS; /* number of bits/code */
- maxbits = BITS; /* user settable max # bits/code */
- maxcode = MAXCODE(INIT_BITS); /* maximum code, given n_bits */
- maxmaxcode = 1 << BITS; /* should NEVER generate this code */
- free_ent = ((block_compress) ? FIRST : 256 );
-
- ent = ngetchar();
-
- hshift = 0;
- for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
- hshift++;
- hshift = 8 - hshift; /* set hash code range bound */
-
- hsize_reg = hsize;
- cl_hash((count_int)hsize_reg); /* clear hash table */
-
- /*printf("hshift %d hsr %d,%d\n", hshift, hsize_reg, hsize);*/
-
- while ((c = ngetchar()) != EOF) {
- in_count++;
- fcode = (long) (((long) c << maxbits) + ent);
- i = ((c << hshift) ^ ent); /* xor hashing */
-
- /*printf("c %02x fcode %08lx i %d ent %08lx\n", c, fcode, i, ent);*/
-
- if (i >= hsize_reg) {
- /*printf("compress error A1 : %d on $%02x ent %lx\n", i, c, ent);*/
- if (i >= HSIZE) {
- puts("XXX1");
- continue;
- }
- }
-
- if (htabof (i) == fcode) {
- ent = codetabof(i);
- continue;
- } else if ((long)htabof (i) < 0) /* empty slot */
- goto nomatch;
- disp = hsize_reg - i; /* secondary hash (after G. Knott) */
- if (i == 0)
- disp = 1;
- probe:
- if ((i -= disp) < 0)
- i += hsize_reg;
-
- if (i >= hsize_reg || i < 0) {
- /*printf("compress error A2 : %d on $%02x\n", i, c);*/
- if (i >= HSIZE) {
- puts("XXX2");
- continue;
- }
- }
-
-
- if (htabof (i) == fcode) {
- ent = codetabof(i);
- continue;
- }
- if ((long)htabof (i) > 0)
- goto probe;
- nomatch:
- output ((code_int) ent);
- ent = c;
- if (free_ent < maxmaxcode) {
- codetabof(i) = free_ent++; /* code -> hashtable */
- htabof(i) = fcode;
- }
- else if ((count_int)in_count >= checkpoint && block_compress)
- cl_block ();
- }
-
- /*
- * Put out the final code.
- */
-
- output((code_int)ent);
- output((code_int)-1);
-
- return(CLen < fsize);
- }
-
- static char buf[BITS];
-
- char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
- char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
-
- void
- output( code )
- code_int code;
- {
- register int r_off = offset, bits= n_bits;
- register char * bp = buf;
-
- if ( code >= 0 ) {
- /*
- * Get to the first byte.
- */
- bp += (r_off >> 3);
- r_off &= 7;
- /*
- * Since code is always >= 8 bits, only need to mask the first
- * hunk on the left.
- */
- *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
- bp++;
- bits -= (8 - r_off);
- code >>= 8 - r_off;
- /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
- if ( bits >= 8 ) {
- *bp++ = code;
- code >>= 8;
- bits -= 8;
- }
- /* Last bits. */
- if(bits)
- *bp = code;
-
- offset += n_bits;
- if (offset == (n_bits << 3)) {
- bp = buf;
- bits = n_bits;
- mwrite(bp, bits);
- bp += bits;
- bits = 0;
- offset = 0;
- }
-
- /*
- * If the next entry is going to be too big for the code size,
- * then increase it, if possible.
- */
-
- if (free_ent > maxcode || (clear_flg > 0)) {
- /*
- * Write the whole buffer, because the input side won't
- * discover the size increase until after it has read it.
- */
- if (offset > 0)
- mwrite(buf, n_bits);
- offset = 0;
-
- if (clear_flg) {
- n_bits = INIT_BITS;
- maxcode = MAXCODE(INIT_BITS);
- clear_flg = 0;
- } else {
- n_bits++;
- if (n_bits == maxbits)
- maxcode = maxmaxcode;
- else
- maxcode = MAXCODE(n_bits);
- }
- }
- } else {
- /*
- * At EOF, write the rest of the buffer.
- */
- if (offset > 0)
- mwrite(buf, (offset + 7) / 8);
- offset = 0;
- }
- }
-
-
- char *
- xrindex(s, c) /* For those who don't have it in libc.a */
- register char *s, c;
- {
- char *p;
- for (p = NULL; *s; s++) {
- if (*s == c)
- p = s;
- }
- return(p);
- }
-
- void
- cl_block() /* table clear for block compress */
- {
- register long int rat;
-
- checkpoint = in_count + CHECK_GAP;
-
- if (in_count > 0x007fffff) { /* shift will overflow */
- rat = CLen >> 8;
- if (rat == 0) { /* Don't divide by zero */
- rat = 0x7fffffff;
- } else {
- rat = in_count / rat;
- }
- } else {
- rat = (in_count << 8) / CLen; /* 8 fractional bits */
- }
- if (rat > ratio) {
- ratio = rat;
- } else {
- ratio = 0;
- cl_hash ( (count_int) hsize );
- free_ent = FIRST;
- clear_flg = 1;
- output ( (code_int) CLEAR );
- }
- }
-
- void
- cl_hash(hsize) /* reset code table */
- register count_int hsize;
- {
- register count_int *htab_p = htab+hsize;
- register long i;
- register long m1 = -1;
-
- i = hsize - 16;
- do { /* might use Sys V memset(3) here */
- *(htab_p-16) = m1;
- *(htab_p-15) = m1;
- *(htab_p-14) = m1;
- *(htab_p-13) = m1;
- *(htab_p-12) = m1;
- *(htab_p-11) = m1;
- *(htab_p-10) = m1;
- *(htab_p-9) = m1;
- *(htab_p-8) = m1;
- *(htab_p-7) = m1;
- *(htab_p-6) = m1;
- *(htab_p-5) = m1;
- *(htab_p-4) = m1;
- *(htab_p-3) = m1;
- *(htab_p-2) = m1;
- *(htab_p-1) = m1;
- htab_p -= 16;
- } while ((i -= 16) >= 0);
- for ( i += 16; i > 0; i-- )
- *--htab_p = m1;
- }
-
- void
- UnCompressFile(insize)
- long insize;
- {
- register char_type *stackp;
- register int finchar;
- register code_int code, oldcode, incode;
-
- /*
- * As above, initialize the first 256 entries in the table.
- */
-
- setmem(htab, sizeof(htab), 0);
- setmem(codetab, sizeof(codetab), 0);
-
- offset = clear_flg = ratio = 0;
- in_count = 1;
- checkpoint = CHECK_GAP;
- n_bits = INIT_BITS; /* number of bits/code */
- maxbits = BITS; /* user settable max # bits/code */
- maxcode = MAXCODE(INIT_BITS); /* maximum code, given n_bits */
- maxmaxcode = 1 << BITS; /* should NEVER generate this code */
-
- for ( code = 255; code >= 0; code-- ) {
- tab_prefixof(code) = 0;
- tab_suffixof(code) = (char_type)code;
- }
- free_ent = ((block_compress) ? FIRST : 256 );
-
- finchar = oldcode = getcode();
- if (oldcode == -1) /* EOF already? */
- return; /* Get out of here */
- oputc((char)finchar); /* first code must be 8 bits = char */
- stackp = de_stack;
-
- while ((code = getcode()) > -1) {
- if ((code == CLEAR) && block_compress) {
- for (code = 255; code >= 0; code--)
- tab_prefixof(code) = 0;
- clear_flg = 1;
- free_ent = FIRST - 1;
- if ((code = getcode()) == -1) /* O, untimely death! */
- break;
- }
- incode = code;
- /*
- * Special case for KwKwK string.
- */
- if (code >= free_ent) {
- *stackp++ = finchar;
- code = oldcode;
- }
-
- /*
- * Generate output characters in reverse order
- */
- while ( code >= 256 ) {
- *stackp++ = tab_suffixof(code);
- code = tab_prefixof(code);
- }
- *stackp++ = finchar = tab_suffixof(code);
-
- /*
- * And put them out in forward order
- */
- do
- oputc (*--stackp);
- while (stackp > de_stack);
-
- /*
- * Generate the new entry.
- */
- if ((code=free_ent) < maxmaxcode) {
- tab_prefixof(code) = (unsigned short)oldcode;
- tab_suffixof(code) = finchar;
- free_ent = code+1;
- }
- /*
- * Remember previous code.
- */
- oldcode = incode;
- }
- }
-
- code_int
- getcode()
- {
- /*
- * On the VAX, it is important to have the register declarations
- * in exactly the order given, or the asm will break.
- */
-
- register code_int code;
- static int offset = 0, size = 0;
- static char_type buf[BITS];
- register int r_off, bits;
- register char_type *bp = buf;
-
- if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
- /*
- * If the next entry will be too big for the current code
- * size, then we must increase the size. This implies reading
- * a new buffer full, too.
- */
- if ( free_ent > maxcode ) {
- n_bits++;
- if ( n_bits == maxbits )
- maxcode = maxmaxcode; /* won't get any bigger now */
- else
- maxcode = MAXCODE(n_bits);
- }
- if ( clear_flg > 0) {
- maxcode = MAXCODE (n_bits = INIT_BITS);
- clear_flg = 0;
- }
- size = oread(buf, n_bits);
- if (size <= 0)
- return -1; /* end of file */
- offset = 0;
- size = (size << 3) - (n_bits - 1);
- }
- r_off = offset;
- bits = n_bits;
-
- /*
- * Get to the first byte.
- */
- bp += (r_off >> 3);
- r_off &= 7;
- /* Get first part (low order bits) */
-
- code = (*bp++ >> r_off);
-
- bits -= (8 - r_off);
- r_off = 8 - r_off; /* now, offset into code word */
- /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
- if ( bits >= 8 ) {
- code |= *bp++ << r_off;
- r_off += 8;
- bits -= 8;
- }
- /* high order bits. */
- code |= (*bp & rmask[bits]) << r_off;
-
- offset += n_bits;
-
- return code;
- }
-