home *** CD-ROM | disk | FTP | other *** search
- /*----------------------------------------------------------------------*/
- /* Copyright (c) 1987 */
- /* by CompuServe Inc., Columbus, Ohio. All Rights Reserved */
- /* EXPANDER.C can be copied and distributed freely for any */
- /* non-commercial purposes. EXPANDER.C can only be incorporated */
- /* into commercial software with the permission of CompuServe Inc. */
- /*----------------------------------------------------------------------*/
-
- /*
- * ABSTRACT:
- * The compression algorithm builds a string translation table that maps
- * substrings from the input string into fixed-length codes. These codes
- * are used by the expansion algorithm to rebuild the compressor's table
- * and reconstruct the original data stream. In it's simplest form, the
- * algorithm can be stated as:
- *
- * "if <w>k is in the table, then <w> is in the table"
- *
- * <w> is a code which represents a string in the table. When a new
- * character k is read in, the table is searched for <w>k. If this
- * combination is found, <w> is set to the code for that combination
- * and the next character is read in. Otherwise, this combination is
- * added to the table, the code <w> is written to the output stream and
- * <w> is set to k.
- *
- * The expansion algorithm builds an identical table by parsing each
- * received code into a prefix string and suffix character. The suffix
- * character is pushed onto the stack and the prefix string translated
- * again until it is a single character. This completes the expansion.
- * The expanded code is then output by popping the stack and a new entry
- * is made in the table.
- *
- * The algorithm used here has one additional feature. The output codes
- * are variable length. They start at a specified number of bits. Once
- * the number of codes exceeds the current code size, the number of bits
- * in the code is incremented. When the table is completely full, a
- * clear code is transmitted for the expander and the table is reset.
- * This program uses a maximum code size of 12 bits for a total of 4096
- * codes.
- *
- * The expander realizes that the code size is changing when it's table
- * size reaches the maximum for the current code size. At this point,
- * the code size in increased. Remember that the expander's table is
- * identical to the compressor's table at any point in the original data
- * stream.
- *
- * The compressed data stream is structured as follows:
- * first byte denoting the minimum code size
- * one or more counted byte strings. The first byte contains the
- * length of the string. A null string denotes "end of data"
- *
- * This format permits a compressed data stream to be embedded within a
- * non-compressed context.
- *
- * AUTHOR: Steve Wilhite
- *
- * REVISION HISTORY:
- *
- */
-
- #include <malloc.h>
- #include <setjmp.h>
-
- #include "expander.h"
-
- /* Function prototypes: */
-
- void init_table( short min_code_size );
-
- short read_code(void);
-
- #define null_ptr 0
- #define largest_code 4095
-
- jmp_buf recover;
-
- struct code_entry
- {
- short prefix; /* prefix code */
- char suffix; /* suffix character */
- char stack;
- };
-
- static short code_size;
- static short clear_code;
- static short eof_code;
- static short first_free;
- static short bit_offset;
- static short byte_offset, bits_left;
- static short max_code;
- static short free_code;
- static short old_code;
- static short input_code;
- static short code;
- static short suffix_char;
- static short final_char;
- static short bytes_unread;
- static unsigned char code_buffer[64];
- static struct code_entry *code_table;
- static short (*get_byte)(void);
- static short (*put_byte)(short);
-
- static short mask[12] =
- {0x001, 0x003, 0x007, 0x00F,
- 0x01F, 0x03F, 0x07F, 0x0FF,
- 0x1FF, 0x3FF, 0x7FF, 0xFFF};
-
-
- void init_table( short min_code_size )
- {
- code_size = min_code_size + 1;
- clear_code = 1 << min_code_size;
- eof_code = clear_code + 1;
- first_free = clear_code + 2;
- free_code = first_free;
- max_code = 1 << code_size;
- }
- /*$page*/
-
- short read_code(void)
- {
- short
- bytes_to_move,
- i,
- ch;
- long
- temp;
-
- byte_offset = bit_offset >> 3;
- bits_left = bit_offset & 7;
-
- if (byte_offset >= 61)
- {
- bytes_to_move = 64 - byte_offset;
-
- for (i = 0; i < bytes_to_move; i++)
- code_buffer[i] = code_buffer[byte_offset + i];
-
- while (i < 64)
- {
- if (bytes_unread == 0)
- {
- /* Get the length of the next record. A zero-length record
- * denotes "end of data".
- */
-
- bytes_unread = (*get_byte)();
-
- if (bytes_unread < 1)
- if (bytes_unread == 0) /* end of data */
- break;
- else
- longjmp(recover, bytes_unread);
- }
-
- ch = (*get_byte)();
-
- if (ch < 0)
- longjmp(recover, ch);
-
- code_buffer[i++] = (unsigned char)(ch);
- bytes_unread--;
- }
-
- bit_offset = bits_left;
- byte_offset = 0;
- }
-
- bit_offset += code_size;
- temp = (long) code_buffer[byte_offset]
- | (long) code_buffer[byte_offset + 1] << 8
- | (long) code_buffer[byte_offset + 2] << 16;
-
- if (bits_left > 0)
- temp >>= (long) bits_left;
-
- return (short)(temp) & mask[code_size - 1];
- }
- /*$page*/
-
- short Expand_Data(
- short (*get_byte_routine)(void),
- short (*put_byte_routine)(short)
- )
- /*
- * Function:
- * Decompress a LZW compressed data stream.
- *
- * Inputs:
- * get_byte_routine - address of the caller's "get_byte" routine.
- * put_byte_routine - address of the caller's "put_byte" routine.
- *
- * Returns:
- * 0 OK
- * -1 expected end-of-file
- * -2 cannot allocate memory
- * -3 bad "min_code_size"
- * < -3 error status from the get_byte or put_byte routine
- */
- {
- short sp; /* stack ptr */
- short status;
- short min_code_size;
-
- get_byte = get_byte_routine;
- put_byte = put_byte_routine;
-
- code_table = (struct code_entry *)
- malloc(sizeof(struct code_entry)*(largest_code + 1));
-
- if (code_table == null_ptr)
- return -2;
-
- status = setjmp(recover);
-
- if (status != 0)
- {
- free((char *) code_table);
- return status;
- }
-
- /* Get the minimum code size (2 to 9) */
-
- min_code_size = (*get_byte)();
-
- if (min_code_size < 0)
- longjmp(recover, min_code_size);
- else if (min_code_size < 2 || min_code_size > 9)
- longjmp(recover, -3);
-
- init_table(min_code_size);
- sp = 0;
- bit_offset = 64*8; /* force "read_code" to start a new */
- bytes_unread = 0; /* record */
-
- while ((code = read_code()) != eof_code)
- {
- if (code == clear_code)
- {
- init_table(min_code_size);
- code = read_code();
- old_code = code;
- suffix_char = code;
- final_char = code;
- if ((status = (*put_byte)(suffix_char)) != 0)
- longjmp(recover, status);
- }
- else
- {
- input_code = code;
-
- if (code >= free_code)
- {
- code = old_code;
- code_table[sp++].stack = (char) final_char;
- }
-
- while (code >= first_free)
- {
- code_table[sp++].stack = code_table[code].suffix;
- code = code_table[code].prefix;
- }
-
- final_char = code;
- suffix_char = code;
- code_table[sp++].stack = (char) final_char;
-
- while (sp > 0)
- if ((status = (*put_byte)(code_table[--sp].stack)) != 0)
- longjmp(recover, status);
-
- code_table[free_code].suffix = (char) suffix_char;
- code_table[free_code].prefix = old_code;
- free_code++;
- old_code = input_code;
-
- if (free_code >= max_code)
- if (code_size < 12)
- {
- code_size++;
- max_code <<= 1;
- }
- }
- }
-
- free((char *) code_table);
- return 0;
- }
-