home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1999 mARCH
/
PCWK3A99.iso
/
Archiwiz
/
Tar320
/
SOURCES.ZIP
/
COMPRESS.C
< prev
next >
Wrap
Text File
|
1994-12-31
|
13KB
|
459 lines
/* Compress - data compression program */
/* machine variants which require cc -Dmachine: pdp11, z8000, pcxt */
/* The following code is derived from regular 'compress' program, */
/* revision 4.0 85/07/30 by Joe Orost (decvax!vax135!petsd!joe) */
/*
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
* Algorithm:
* Modified Lempel-Ziv method (LZW). Basically finds common
* substrings and replaces them with a variable size code. This is
* deterministic, and can be done on the fly. Thus, the decompression
* procedure needs no input table, but tracks the way the table was built.
*/
#include "modern.h"
#include "compress.h"
#ifdef USE_COMPRESS
#include "lzwbits.h"
#ifndef USG
# ifdef i386
# define SYS_V
# endif
# ifdef M_XENIX
# define SYS_V
# endif
#endif
#ifdef SYS_V
# include <memory.h>
# ifndef MEMSET
# define MEMSET
# endif
#endif
#ifdef MSC_VER
# include <memory.h>
# ifndef MEMSET
# define MEMSET
# endif
#endif
#ifdef __TURBOC__
# include <mem.h>
# ifndef MEMSET
# define MEMSET
# endif
#endif
#ifdef XENIX_16
static code_int c_hashsize;
# define c_HSIZE c_hashsize
#else
# define c_HSIZE _HSIZE
#endif
#ifdef MSDOS
# include <stdlib.h>
# ifndef ODDBYTES
# define ODDBYTES
# endif
#else
char *malloc();
void free();
#endif
#if BITS > 15
# ifdef ODDBYTES
typedef signed char tentry[3];
# define ENMASK(x) ((x) & 0xffffffL)
# else
typedef count_int tentry;
# endif
#else
typedef count_int tentry;
#endif
#ifdef ENMASK
# define EMPTY (count_int)ENMASK(~0L)
#else
# define EMPTY (count_int)~0L
#endif
static int c_n_bits; /* number of bits/code */
static int c_maxbits = BITS; /* user settable max # bits/code */
static code_int c_maxcode; /* maximum code, given n_bits */
/* should NEVER generate this code */
static code_int c_maxmaxcode = (code_int)1 << BITS;
#define word_type unsigned short
#define WNIL (word_type)0
#define CNIL (count_int)0
#ifdef XENIX_16
static tentry *htab[MAXPAGES];
static word_type *codetab[MAXPAGES] = {WNIL};
# define codetabof(i) (codetab[(int)((i) >> PAGEXP)][(int)(i) & PAGEMASK])
# define htabof(i) (htab[(int)((i) >> PAGEXP)][(int)(i) & PAGEMASK])
#else
static tentry *htab;
static word_type *codetab = WNIL;
# define codetabof(i) codetab[i]
# define htabof(i) htab[i]
#endif
static code_int hsize = _HSIZE; /* for dynamic table sizing */
static code_int c_free_ent = 0; /* first unused entry */
/* block compression parameters -- after all codes are */
/* used up, and compression rate changes, start over. */
static int c_block_compress = BLOCK_MASK;
static int c_clear_flg = 0;
static long int ratio = 0;
static count_int c_checkpoint = CHECK_GAP;
static void cl_hash __ARGS__((count_int));
static void cl_hash(clsize) /* reset code table */
register count_int clsize;
{
#ifdef XENIX_16
register j, l;
# ifndef MEMSET
register i;
# endif
for (j=0; j<MAXPAGES && clsize>=0; j++, clsize-=PAGESIZE) {
l = clsize<PAGESIZE ? (int)clsize : PAGESIZE;
# ifdef MEMSET
(void)memset((char*)htab[j], (int)EMPTY, l*sizeof(**htab));
# else
for (i=0; i<l; i++)
# ifdef ODDBYTES
{
register signed char *p = htab[j][i];
*(unsigned short *)p = (unsigned short)EMPTY;
p[2] = (signed char)(EMPTY >> 16);
}
# else
htab[j][i] = EMPTY;
# endif
# endif
}
#else
# ifdef MEMSET
(void)memset(htab, (int)EMPTY, clsize*sizeof(*htab));
# else
register count_int i; for (i=0; i<clsize; i++) htab[i] = EMPTY;
# endif
#endif
}
static int offset;
static long int in_count = 1; /* length of input */
static long int bytes_out; /* length of compressed output */
/* interface function pointer */
static void (*putbyte) __ARGS__((int));
static void putpiece __ARGS__((char *, int));
static void putpiece(p, n)
register char *p; register n;
{
bytes_out += n; while (n-- > 0) (*putbyte)(*p++); offset = 0;
}
/* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
static char outbuf[BITS];
static void output __ARGS__((word_type));
static void output(code)
word_type code;
{
#ifndef vax
static char_type rmask[] = {0x00,0x01,0x03,0x07,0x0f,0x1f,0x3f,0x7f};
#endif
/* On the VAX, it is important to have the register declarations */
/* in exactly the order given, or the asm will break. */
register int r_off = offset, bits = c_n_bits;
register char *bp = outbuf;
#ifdef vax
/* VAX DEPENDENT!! Implementation on other machines is below.
*
* Translation: Insert BITS bits from the argument starting at
* offset bits from the beginning of buf.
*/
0; /* Work around for pcc -O bug with asm and if stmt */
asm( "insv 4(ap),r11,r10,(r9)" );
#else
/* byte/bit numbering on the VAX is simulated by the following code */
/* Get to the first byte. */
bp += (r_off >> 3);
r_off &= 7;
/* Since code is always >= 8 bits, only */
/* need to mask the first hunk on the left. */
*bp = (*bp & rmask[r_off]) | (code << r_off);
bp++;
code >>= (r_off = 8 - r_off);
if ((bits -= r_off) >= 8) {
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
*bp++ = code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if (bits) *bp = code;
#endif
if ((offset += c_n_bits)==(c_n_bits << 3)) putpiece(outbuf,c_n_bits);
/* If the next entry is going to be too big for */
/* the code size, then increase it, if possible. */
if (c_free_ent > c_maxcode || c_clear_flg > 0) {
/* Write the whole buffer, because the input side won't */
/* discover the size increase until after it has read it */
if (offset > 0) putpiece(outbuf, c_n_bits);
if (c_clear_flg) {
c_maxcode = MAXCODE(c_n_bits = INIT_BITS);
c_clear_flg = 0;
} else {
c_maxcode = ++c_n_bits == c_maxbits ?
c_maxmaxcode : MAXCODE(c_n_bits);
}
}
}
static void cl_block __ARGS__((void)) /* table clear for block compress */
{
register long int rat;
c_checkpoint = in_count + CHECK_GAP;
if (in_count > 0x007fffffL) { /* shift will overflow */
if ((rat = bytes_out >> 8) == 0) {/* Don't divide by zero */
rat = 0x7fffffffL;
} else {
rat = in_count / rat;
}
} else {
rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
}
if (rat > ratio) {
ratio = rat;
} else {
ratio = 0;
cl_hash((count_int)hsize);
c_free_ent = FIRST;
c_clear_flg = 1;
output(CLEAR);
}
}
int z_gettab(bits)
int bits;
{
#ifdef XENIX_16
register i, j; long l;
#endif
if (c_maxbits > bits) c_maxbits = bits;
else if (c_maxbits < INIT_BITS) c_maxbits = INIT_BITS;
#ifdef XENIX_16
if (codetab[0]) return c_maxbits;
if (c_maxbits >= 16) c_hashsize = 69001L;
else if (c_maxbits >= 15) c_hashsize = 35023L;
else if (c_maxbits >= 14) c_hashsize = 18013L;
else if (c_maxbits >= 13) c_hashsize = 9001L;
else c_hashsize = 5003L;
for (l=c_hashsize, i=0; i<MAXPAGES && l > 0; i++) {
j = l > PAGESIZE ? PAGESIZE : (int)l;
codetab[i] = (word_type *)malloc(sizeof(word_type)*j);
if (!codetab[i]) break;
htab[i] = (tentry*)malloc(sizeof(**htab) * j);
if (!htab[i]) {
free((char*)(codetab[i])); codetab[i]=WNIL; break;
}
l -= j;
}
c_hashsize -= l;
if (c_hashsize >= 69001L) { j = 16; c_hashsize = 69001L; }
else if (c_hashsize >= 35023L) { j = 15; c_hashsize = 35023L; }
else if (c_hashsize >= 18013) { j = 14; c_hashsize = 18013; }
else if (c_hashsize >= 9001) { j = 13; c_hashsize = 9001; }
else if (c_hashsize >= 5003) { j = 12; c_hashsize = 5003; }
else return -1;
if (c_maxbits > j) c_maxbits = j;
#else
if (codetab) return c_maxbits;
if ((codetab=(word_type *)malloc(sizeof(*codetab)*_HSIZE))==WNIL ||
(htab =(count_int *)malloc(sizeof(*htab) *_HSIZE))==CNIL) {
return -1;
}
#endif
c_maxmaxcode = (code_int)1 << c_maxbits;
return c_maxbits;
}
void z_reltab()
{
#ifdef XENIX_16
register i;
for (i=0; i<MAXPAGES && codetab[i]!=WNIL; i++) {
free((char*)(codetab[i])); free((char*)(htab[i]));
}
codetab[0] = WNIL;
#else
if (codetab != WNIL) {
free((char*)codetab); codetab = WNIL;
if (htab != CNIL) free((char*)htab);
}
#endif
}
/*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
static int already = 0;
static int hshift;
static unsigned short ent;
int cbegin(wishbits, putb, fsize)
int wishbits;
void (*putb)();
long fsize;
{
register long fcode;
if (z_gettab(wishbits) < 0) return -1;
c_block_compress = BLOCK_MASK;
c_checkpoint = CHECK_GAP;
putbyte = putb;
hsize = _HSIZE;
/* tune hash table size for small files -- ad hoc, */
/* but the sizes match earlier #defines, which */
/* serve as upper bounds on the number of output codes. */
if (fsize < (1<<12)) hsize = 5003;
else if (fsize < (1<<13)) hsize = 9001;
else if (fsize < (1<<14)) hsize = 18013;
else if (fsize < (1<<15)) hsize = 35023L;
else if (fsize < 47000L) hsize = 50021L;
if (hsize > c_HSIZE) hsize = c_HSIZE;
offset = 0;
bytes_out = 3; /* includes 3-byte header mojo */
c_clear_flg = 0;
ratio = 0;
in_count = 1;
c_checkpoint = CHECK_GAP;
c_maxcode = MAXCODE(c_n_bits = INIT_BITS);
c_free_ent = c_block_compress ? FIRST : 256;
hshift = 0;
for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) hshift++;
hshift = 8 - hshift; /* set hash code range bound */
cl_hash((count_int)hsize); /* clear hash table */
already = 0;
return c_maxbits;
}
#define GETBYTE() (--len, char_to_byte(*(char_type *)buf++))
void cpiece(buf, len)
char *buf;
int len;
{
register long fcode;
register code_int i = 0;
register int c;
register long tmp;
register code_int disp;
if (!already) {
(*putbyte)(LZW_0TH_MAGIC); (*putbyte)(LZW_1ST_MAGIC);
(*putbyte)((char)(c_maxbits | c_block_compress));
already = 1; ent = GETBYTE();
}
while (len > 0) {
c = GETBYTE(); in_count++;
fcode = ((long)c << c_maxbits) + ent;
i = ((code_int)c << hshift) ^ ent; /* xor hashing */
disp = i ? hsize-i : 1; /* secondary hash (after G. Knott) */
while ((tmp =
#ifdef ODDBYTES
# ifdef ENMASK
ENMASK(*(long*)htabof(i))
# else
(((long)*(2+(signed char *)htabof(i))) << 16) |
*(unsigned short *)htabof(i)
# endif
#else
htabof(i)
#endif
) != EMPTY) {
if (tmp == fcode) {
ent = codetabof(i); goto next;
}
if ((i -= disp) < 0) i += hsize;
}
output(ent);
ent = c;
if (to_compare(c_free_ent) < to_compare(c_maxmaxcode)) {
#ifdef ODDBYTES
register signed char *p = htabof(i);
*(unsigned short *)p = (unsigned short)fcode;
p[2] = (signed char)(fcode >> 16);
#else
htabof(i) = fcode;
#endif
codetabof(i) = c_free_ent++; /* code -> hashtable */
}
else if ((count_int)in_count >= c_checkpoint && c_block_compress)
cl_block();
next: ;
}
}
long cflush()
{
/* Put out the final code. */
output(ent);
/* At EOF, write the rest of the buffer. */
putpiece(outbuf, (offset+7)/8);
return bytes_out;
}
#endif