home *** CD-ROM | disk | FTP | other *** search
- /*
- Posting-number: Volume 02, Issue 098
- Submitted-by: Russ Nelson <nelson@sun.soe.clarkson.edu>
- Archive-name: flzh/flzh_rn.c
- */
- /*
- Here is another text posting. It speaks for itself. Just in case
- others come up with further revisions, I have named this one
- "flzh_rn.c" after "Faster LZHuf by Russ Nelson". Note that I have
- added the copyright statement as requested by Kenji Rikitake in an
- earlier Usenet article. There seems to be a compiler dependency here,
- because it won't execute correctly if compiled with Turbo C 1.0, but
- Russ Nelson tells me it does work with later versions of Turbo C,
- 1.5 or 2, but I don't remember which. -- R.D
- */
-
- #ifdef USE_ASM
- #pragma inline
- #endif
-
- /*
- LZHUF.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake.
- All rights reserved. Permission granted for non-commercial use.
- */
-
- /*
- * LZHUF.C English version 1.0
- * Based on Japanese version 29-NOV-1988
- * LZSS coded by Haruhiko OKUMURA
- * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
- * Edited and translated to English by Kenji RIKITAKE
- * Assembly language added by Russell Nelson (nelson@clutx.clarkson.edu)
- * Makes it 1.56 times faster in compression,
- * and 1.53 times faster in decompression.
- * Warning! If you change anything, verify that the register use doesn't
- * change.
- * Some C optimization added by Russell Nelson.
- */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
-
- /* These values are Turbo-C dependent;
- EXIT_SUCCESS, EXIT_FAILURE
- renamed by Kenji */
-
- #define EXIT_OK 0
- #define EXIT_FAILED -1
-
- FILE *infile, *outfile;
- unsigned long int textsize = 0, codesize = 0, printcount = 0;
-
- void Error(char *message)
- {
- printf("\n%s\n", message);
- exit(EXIT_FAILED);
- }
-
- /* LZSS Parameters */
-
- #define N 4096 /* Size of string buffer */
- #define F 60 /* Size of look-ahead buffer */
- #define THRESHOLD 2
- #define NIL N /* End of tree's node */
-
- unsigned char
- text_buf[N + F - 1];
- int match_position, match_length,
- lson[N + 1], rson[N + 257], dad[N + 1];
-
- void InitTree(void) /* Initializing tree */
- {
- int i;
-
- for (i = N + 1; i <= N + 256; i++)
- rson[i] = NIL; /* root */
- for (i = 0; i < N; i++)
- dad[i] = NIL; /* node */
- }
-
- void InsertNode(int r) /* Inserting node to the tree */
- {
- int i, p, cmp;
- unsigned char *key, keychar;
- unsigned c;
-
- cmp = 1;
- key = &text_buf[r];
- keychar = key[1];
- p = N + 1 + key[0];
- rson[r] = lson[r] = NIL;
- match_length = 0;
-
- #ifdef USE_ASM
-
- /* for speed's sake, we use a bunch of hacks. If you change this code, be
- * sure to tcc -S it before you attempt to run it. If you can't figure
- * out what something's doing, look at the standard C version of it in the
- * #else clause.
- */
-
- #define SF 0x1000 /* 8086 sign flag */
-
- /* We're going to hold p in _SI. Turbo C would ordinarily put it in a
- * register for us, but it refuses to do so if it sees any mention of
- * the register, either in a 'asm' statement or the _SI pseudovariable.
- * When we actually use _SI, we push it first.
- *
- * Similarly for r in _DI. Note that the algorithm doesn't change r.
- */
-
- _SI = p;
- #define p _SI
- _DI = r;
- #define r _DI
-
- _ES = _DS; /* we're going to use cmpsb */
- asm cld
-
- /* many times the initial characters don't match, so we spend a fair amount
- * of time in the following unstructured code.
- */
-
- for ( ; ; ) {
- if ((cmp & SF) == 0) {
- right:
- asm mov bx,si
- asm mov ax,rson[bx+si]
- if (_AX != NIL) {
- asm mov si,ax;
- asm mov al,text_buf[si+1];
- asm cmp keychar,al;
- asm jg right;
- asm jl left;
- } else {
- rson[p] = r;
- dad[r] = p;
- return;
- }
- } else {
- left:
- asm mov bx,si
- asm mov ax,lson[bx+si]
- if (_AX != NIL) {
- asm mov si,ax;
- asm mov al,text_buf[si+1];
- asm cmp keychar,al;
- asm jg right;
- asm jl left;
- } else {
- lson[p] = r;
- dad[r] = p;
- return;
- }
- }
- equal:
- asm push si
- asm push di
- _DI = (unsigned) &text_buf[p+1];
- _SI = (unsigned) &key[1];
- _CX = F - 1;
- /* The semantics of cmpsb are not well understood. Every comparison decrements
- * _CX and bumps _SI and _DI. If the values compared are equal and _CX <> 0
- * then the cmpsb repeats. Otherwise the flags are set to the result of the
- * comparison. The consequence of this is that the only way to determine
- * whether the entire string was equal is to check the flags. If the two
- * strings are identical up to the last character, _CX will be zero
- * whether or not the last characters match.
- *
- * The Microsoft Macro Assembler 5.0 Reference Booklet gets it wrong, even
- * though Intel documents it very precisely and accurately. Boo! Hiss!
- *
- * If _CX is zero before the cmpsb, the flags are unchanged. This affects
- * the interpretation of zero length strings. Are they equal or different?
- * If you wish them to be equal, you can "or cx,cx". If you wish them to
- * be different you can "or sp,sp". In a subroutine, sp is guaranteed to
- * be nonzero.
- */
- asm repe cmpsb /* 7% of runtime is spent here! */
- /* remember the sign flag to see if it was larger or smaller */
- asm lahf
- cmp = _AX;
- /* if it matched, we want _CX to be zero */
- asm je matched;
- _CX++;
- matched:
- i = F - _CX;
- asm pop di;
- asm pop si;
- if (i > THRESHOLD) {
- if (i > match_length) {
- match_position = ((r - p) & (N - 1)) - 1;
- if ((match_length = i) >= F)
- break;
- }
- if (i == match_length) {
- if (((r - p) & (N - 1)) - 1 < match_position) {
- match_position = _AX;
- }
- }
- }
- }
- #else
- for ( ; ; ) {
- if (cmp >= 0) {
- if (rson[p] != NIL)
- p = rson[p];
- else {
- rson[p] = r;
- dad[r] = p;
- return;
- }
- } else {
- if (lson[p] != NIL)
- p = lson[p];
- else {
- lson[p] = r;
- dad[r] = p;
- return;
- }
- }
- for (i = 1; i < F; i++)
- if ((cmp = key[i] - text_buf[p + i]) != 0)
- break;
- if (i > THRESHOLD) {
- if (i > match_length) {
- match_position = ((r - p) & (N - 1)) - 1;
- if ((match_length = i) >= F)
- break;
- }
- if (i == match_length) {
- if ((c = ((r - p) & (N - 1)) - 1) < match_position) {
- match_position = c;
- }
- }
- }
- }
- #endif
- dad[r] = dad[p];
- lson[r] = lson[p];
- rson[r] = rson[p];
- dad[lson[p]] = r;
- dad[rson[p]] = r;
- if (rson[dad[p]] == p)
- rson[dad[p]] = r;
- else
- lson[dad[p]] = r;
- dad[p] = NIL; /* remove p */
- #undef p
- #undef r
- }
-
- void DeleteNode(int p) /* Deleting node from the tree */
- {
- int q;
-
- if (dad[p] == NIL)
- return; /* unregistered */
- if (rson[p] == NIL)
- q = lson[p];
- else
- if (lson[p] == NIL)
- q = rson[p];
- else {
- q = lson[p];
- if (rson[q] != NIL) {
- do {
- q = rson[q];
- } while (rson[q] != NIL);
- rson[dad[q]] = lson[q];
- dad[lson[q]] = dad[q];
- lson[q] = lson[p];
- dad[lson[p]] = q;
- }
- rson[q] = rson[p];
- dad[rson[p]] = q;
- }
- dad[q] = dad[p];
- if (rson[dad[p]] == p)
- rson[dad[p]] = q;
- else
- lson[dad[p]] = q;
- dad[p] = NIL;
- }
-
- /* Huffman coding parameters */
-
- #define N_CHAR (256 - THRESHOLD + F)
- /* character code (= 0..N_CHAR-1) */
- #define T (N_CHAR * 2 - 1) /* Size of table */
- #define R (T - 1) /* root position */
- #define MAX_FREQ 0x8000
- /* update when cumulative frequency */
- /* reaches to this value */
-
- typedef unsigned char uchar;
-
- /*
- * Tables for encoding/decoding upper 6 bits of
- * sliding dictionary pointer
- */
- /* encoder table */
- uchar p_len[64] = {
- 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
- };
-
- uchar p_code[64] = {
- 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
- 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
- 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
- 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
- 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
- 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
- 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
- 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
- };
-
- /* decoder table */
- uchar d_code[256] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
- 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
- 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
- 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
- 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
- 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
- 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
- 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
- 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
- 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
- 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
- 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
- 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
- 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
- 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
- };
-
- uchar d_len[256] = {
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- };
-
- unsigned freq[T + 1]; /* cumulative freq table */
-
- /*
- * pointing parent nodes.
- * area [T..(T + N_CHAR - 1)] are pointers for leaves
- */
- int prnt[T + N_CHAR];
-
- /* pointing children nodes (son[], son[] + 1)*/
- int son[T];
-
- unsigned getbuf = 0;
- uchar getlen = 0;
-
- int GetBit(void) /* get one bit */
- {
- int i;
-
- while (getlen <= 8) {
- if ((i = getc(infile)) < 0) i = 0;
- getbuf |= i << (8 - getlen);
- getlen += 8;
- }
- i = getbuf;
- getbuf <<= 1;
- getlen--;
- return (i < 0);
- }
-
- int GetByte(void) /* get a byte */
- {
- unsigned i;
-
- while (getlen <= 8) {
- if ((i = getc(infile)) < 0) i = 0;
- getbuf |= i << (8 - getlen);
- getlen += 8;
- }
- #ifdef USE_ASM
- _AX = *(((unsigned char *)&getbuf)+1);
- _BX = getbuf;
- _BH = _BL;
- _BL = 0;
- asm mov getbuf,bx;
- getlen -= 8;
- return _AX;
- #else
- i = getbuf;
- getbuf <<= 8;
- getlen -= 8;
- return i >> 8;
- #endif
- }
-
- unsigned putbuf = 0;
- uchar putlen = 0;
-
- void Putcode(int l, unsigned c) /* output c bits */
- {
- putbuf |= c >> putlen;
- if ((putlen += l) >= 8) {
- putc(putbuf >> 8, outfile);
- if ((putlen -= 8) >= 8) {
- putc(putbuf, outfile);
- codesize += 2;
- putlen -= 8;
- putbuf = c << (l - putlen);
- } else {
- putbuf <<= 8;
- codesize++;
- }
- }
- }
-
-
- /* initialize freq tree */
-
- void StartHuff()
- {
- int i, j;
-
- for (i = 0; i < N_CHAR; i++) {
- freq[i] = 1;
- son[i] = i + T;
- prnt[i + T] = i;
- }
- i = 0; j = N_CHAR;
- while (j <= R) {
- freq[j] = freq[i] + freq[i + 1];
- son[j] = i;
- prnt[i] = prnt[i + 1] = j;
- i += 2; j++;
- }
- freq[T] = 0xffff;
- prnt[R] = 0;
- }
-
-
- /* reconstruct freq tree */
-
- void reconst()
- {
- int i, j, k;
- unsigned f, l;
-
- /* halven cumulative freq for leaf nodes */
- j = 0;
- for (i = 0; i < T; i++) {
- if (son[i] >= T) {
- freq[j] = (freq[i] + 1) / 2;
- son[j] = son[i];
- j++;
- }
- }
- /* make a tree : first, connect children nodes */
- for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
- k = i + 1;
- f = freq[j] = freq[i] + freq[k];
- for (k = j - 1; f < freq[k]; k--);
- k++;
- l = (j - k) * 2;
- (void)memmove(&freq[k + 1], &freq[k], l);
- freq[k] = f;
- (void)memmove(&son[k + 1], &son[k], l);
- son[k] = i;
- }
- /* connect parent nodes */
- for (i = 0; i < T; i++) {
- if ((k = son[i]) >= T) {
- prnt[k] = i;
- } else {
- prnt[k] = prnt[k + 1] = i;
- }
- }
- }
-
-
- /* update freq tree */
-
- void update(int c)
- {
- register int k, l;
- int i, j;
-
- if (freq[R] == MAX_FREQ) {
- reconst();
- }
- #ifdef USE_ASM
- #define k _DX /* _DX is safe to use. */
- _SI = prnt[c + T];
- #define c _SI
- do {
- more_k:
- k = ++freq[c];
- asm cmp dx,word ptr DGROUP:_freq+2[bx];
- asm ja start;
- asm mov si,word ptr DGROUP:_prnt[bx];
- asm or si,si;
- asm jne more_k;
- break;
- start:
- _BX = (unsigned)&freq[c+1];
- again:
- asm cmp dx,[bx]
- asm jbe done
- _BX += 4;
- asm cmp dx,[bx-2]
- asm ja again
- _BX -= 2;
- done:
- _BX -= (unsigned) &freq;
- l = _BX >> 1;
- #else
- c = prnt[c + T];
- do {
- /* keep the outer loop together so stupid compilers
- * can optimize.
- */
- do {
- k = ++freq[c];
- /* swap nodes to keep the tree freq-ordered */
- if (k > freq[c + 1]) goto start;
- } while ((c = prnt[c]) != 0);
- break;
- start:
- l = c + 1;
- /* this is the inner loop -- unroll it a few times */
- while (k > freq[++l] &&
- k > freq[++l] &&
- k > freq[++l]);
- #endif
- l--;
- freq[c] = freq[l];
- freq[l] = k;
-
- i = son[c];
- prnt[i] = l;
- if (i < T) prnt[i + 1] = l;
-
- j = son[l];
- son[l] = i;
-
- prnt[j] = c;
- if (j < T) prnt[j + 1] = c;
- son[c] = j;
-
- c = l;
- } while ((c = prnt[c]) != 0); /* do it until reaching the root */
- #undef k
- #undef c
- }
-
- unsigned code, len;
-
- void EncodeChar(unsigned c)
- {
- unsigned i;
- int j, k;
-
- i = 0;
- j = 0;
- k = prnt[c + T];
-
- /* search connections from leaf node to the root */
- do {
- i >>= 1;
-
- /*
- if node's address is odd, output 1
- else output 0
- */
- if (k & 1) i += 0x8000;
-
- j++;
- } while ((k = prnt[k]) != R);
- Putcode(j, i);
- code = i;
- len = j;
- update(c);
- }
-
- void EncodePosition(unsigned c)
- {
- unsigned i;
-
- /* output upper 6 bits with encoding */
- i = c >> 6;
- Putcode(p_len[i], (unsigned)p_code[i] << 8);
-
- /* output lower 6 bits directly */
- Putcode(6, (c & 0x3f) << 10);
- }
-
- void EncodeEnd()
- {
- if (putlen) {
- putc(putbuf >> 8, outfile);
- codesize++;
- }
- }
-
- int DecodeChar()
- {
- unsigned c;
- c = son[R];
-
- /*
- * start searching tree from the root to leaves.
- * choose node #(son[]) if input bit == 0
- * else choose #(son[]+1) (input bit == 1)
- */
- while (c < T) {
- if(getlen){
- getlen--;
- #ifdef USE_ASM
- getbuf<<=1;
- asm jnc zerobit;
- c++;
- zerobit:;
- #else
- if (getbuf < 0)
- c++;
- getbuf<<=1;
- #endif
- } else
- c += GetBit();
- c = son[c];
- }
- c -= T;
- update(c);
- return c;
- }
-
- int DecodePosition()
- {
- unsigned i, j, c;
-
- /* decode upper 6 bits from given table */
- i = GetByte();
- c = (unsigned)d_code[i] << 6;
- j = d_len[i];
-
- /* input lower 6 bits directly */
- j -= 2;
- while (j--) {
- i <<= 1;
- if(getlen){
- getlen--;
- #ifdef USE_ASM
- getbuf<<=1;
- asm jnc zerobit;
- i++;
- zerobit:;
- #else
- if (getbuf < 0)
- i++;
- getbuf<<=1;
- #endif
- } else
- i += GetBit();
- }
- return c | i & 0x3f;
- }
-
- /* Compression */
-
- void Encode(void) /* Encoding/Compressing */
- {
- int i, c, len, r, s, last_match_length;
-
- fseek(infile, 0L, 2);
- textsize = ftell(infile);
- if (fwrite(&textsize, sizeof textsize, 1, outfile) < 1)
- Error("Unable to write"); /* write size of original text */
- if (textsize == 0)
- return;
- rewind(infile);
- textsize = 0; /* rewind and rescan */
- StartHuff();
- InitTree();
- s = 0;
- r = N - F;
- for (i = s; i < r; i++)
- text_buf[i] = ' ';
- for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
- text_buf[r + len] = c;
- textsize = len;
- for (i = 1; i <= F; i++)
- InsertNode(r - i);
- InsertNode(r);
- do {
- if (match_length > len)
- match_length = len;
- if (match_length <= THRESHOLD) {
- match_length = 1;
- EncodeChar(text_buf[r]);
- } else {
- EncodeChar(255 - THRESHOLD + match_length);
- EncodePosition(match_position);
- }
- last_match_length = match_length;
- for (i = 0; i < last_match_length &&
- (c = getc(infile)) != EOF; i++) {
- DeleteNode(s);
- text_buf[s] = c;
- if (s < F - 1)
- text_buf[s + N] = c;
- s = (s + 1) & (N - 1);
- r = (r + 1) & (N - 1);
- InsertNode(r);
- }
- if ((textsize += i) > printcount) {
- printf("%12ld\r", textsize);
- printcount += 1024;
- }
- while (i++ < last_match_length) {
- DeleteNode(s);
- s = (s + 1) & (N - 1);
- r = (r + 1) & (N - 1);
- if (--len) InsertNode(r);
- }
- } while (len > 0);
- EncodeEnd();
- printf("input: %ld bytes\n", textsize);
- printf("output: %ld bytes\n", codesize);
- printf("output/input: %.3f\n", (double)codesize / textsize);
- }
-
- void Decode(void) /* Decoding/Uncompressing */
- {
- int i, j, k, r, c;
- unsigned long int count;
-
- if (fread(&textsize, sizeof textsize, 1, infile) < 1)
- Error("Unable to read"); /* read size of original text */
- if (textsize == 0)
- return;
- StartHuff();
- for (i = 0; i < N - F; i++)
- text_buf[i] = ' ';
- r = N - F;
- for (count = 0; count < textsize; ) {
- c = DecodeChar();
- if (c < 256) {
- putc(c, outfile);
- text_buf[r++] = c;
- r &= (N - 1);
- count++;
- } else {
- i = (r - DecodePosition() - 1) & (N - 1);
- j = c - 255 + THRESHOLD;
- if (r + j < N
- && i + j < N
- && (i + j <= r || i >= r)
- #ifdef __TURBOC__
- && outfile->level < -j){
- memcpy(outfile->curp,
- memmove(&text_buf[r],&text_buf[i], j),
- j);
- outfile->curp += j;
- outfile->level += j;
- #else
- ){
- fwrite(memcpy(&text_buf[r],&text_buf[i], j),
- 1, j, outfile);
- #endif
- r += j;
- count += j;
- } else
-
- for (k = i, j += i; k < j; k++) {
- c = text_buf[k & (N - 1)];
- putc(c, outfile);
- text_buf[r++] = c;
- r &= (N - 1);
- count++;
- }
- }
- if (count > printcount) {
- printf("%12ld\r", count);
- printcount += 4096;
- }
- }
- printf("%12ld\n", count);
- }
-
- int main(int argc, char *argv[])
- {
- char *s;
-
- if (argc != 4) {
- printf("Usage:lzhuf e(compression)|d(uncompression)"
- " infile outfile\n");
- return EXIT_FAILED;
- }
- if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
- || (s = argv[2], (infile = fopen(s, "rb")) == NULL)
- || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
- printf("Trouble with arg %s\n", s);
- return EXIT_FAILED;
- }
- setvbuf(outfile, NULL, _IOFBF, 1<<12);
- setvbuf(infile, NULL, _IOFBF, 1<<12);
- if (toupper(*argv[1]) == 'E')
- Encode();
- else
- Decode();
- fclose(infile);
- fclose(outfile);
- return EXIT_OK;
- }