home *** CD-ROM | disk | FTP | other *** search
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- * Squash - data compression program
- *
- *
- * Squash.c - File compression ala IEEE Computer, June 1984.
- *
- * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
- * Jim McKie (decvax!mcvax!jim)
- * Steve Davies (decvax!vax135!petsd!peora!srd)
- * Ken Turkowski (decvax!decwrl!turtlevax!ken)
- * James A. Woods (decvax!ihnp4!ames!jaw)
- * Joe Orost (decvax!vax135!petsd!joe)
- * Leslie Satenstein (PCOMM/INFOCOM BBS Montreal as LESLIE)
- * (514)-682-5884
- *
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- #define MAIN
- #include <stdio.h>
- #include <ctype.h>
- #include <dos.h>
- #include <signal.h>
- #include <types.h>
- #include <stat.h>
- #include <squash.h>
- #include <io.h>
-
- /* stuff for non-repeat packing */
- #define DLE 0x90 /* repeat sequence marker */
- static unsigned char state; /* current packing state */
- /* non-repeat packing states */
- #define NOHIST 0 /* don't consider previous input*/
- #define INREP 1 /* sending a repeated value */
- #define SENTCHAR 1 /* lastchar set, no lookahead yet */
- #define SENDNEWC 2 /* run over, send new char next */
- #define SENDCNT 3 /* newchar set, send count next */
-
-
-
- char *arc=".ARC";
- int exit_stat = 0;
-
- int getcode();
-
- int do_decomp = 0;
- int debug = 0;
- int quiet = 1; /* don't tell me about compression */
- int kill=0; /* don't delete input */
-
- int force = 0;
- char ofname [70];
- int verbose = 0;
- int (*bgnd_flag)();
-
- static int crctab[] = /* CRC lookup table */
- {
- 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
- 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
- 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
- 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
- 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
- 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
- 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
- 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
- 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
- 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
- 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
- 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
- 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
- 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
- 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
- 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
- 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
- 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
- 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
- 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
- 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
- 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
- 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
- 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
- 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
- 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
- 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
- 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
- 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
- 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
- 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
- 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
- };
-
- /*****************************************************************
- * TAG( main )
- *
- * Algorithm from "A Technique for High Performance Data Compression",
- * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
- *
- * Usage: squash [-fvcukVd] archive [filein]
- *
- * switches:
- * All flags are optional.
- * -D => debug
- * -V => print Version; debug verbose
- * -v => unquiet
- * -u => do decompression
- * -f => force overwrite of output file
- * -F => force overwrite of output file
- * -n => no header: useful to uncompress old files
- * -k => delete file after inserting into archive
- * -K => delete file after inserting into archive
- * -q => complement of -v (quiet) shhhhh
- *
- * -f: Forces output archive to be generated, even if one already
- * exists.
- * If -f is not used, the user will be prompted to determine
- * if the output file should be overwritten.
- *
- * -k: Deletes input file after output file is created.
- *
- * -v: Write compression statistics
- *
- * Outputs:
- * file.ARC: Compressed form of file with same mode,
- *
- * Assumptions:
- * When filenames are given, replaces with the compressed version
- * (.ARC suffix)
- *
- * Algorithm:
- *
- * Modified Lempel-Ziv method (LZW). Basically finds common
- * substrings and replaces them with a variable size code. This is
- * deterministic, and can be done on the fly. Thus, the decompression
- * procedure needs no input table, but tracks the way the table was built.
- */
-
- main( argc, argv )
- register int argc;
- char **argv;
- {
- int overwrite = 0; /* Do not overwrite unless given -f flag */
- char tempname[80];
- char **filelist, **fileptr;
- char *cp, *strrchr(char *,char), *malloc(int);
- struct stat statbuf;
- FILE *filein,*fileout;
- extern onintr();
-
- /* trap control break */
-
- if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN )
- {
- signal ( SIGINT, onintr );
- }
-
-
- filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
- *filelist = NULL;
-
- /*
- * Argument Processing
- */
- for (argc--, argv++; argc > 0; argc--, argv++)
- {
- if (**argv == '-')
- { /* A flag argument */
- while (*++(*argv))
- { /* Process all flags in this arg */
- switch (**argv)
- {
- case 'D':
- debug = 1;
- break;
- case 'V':
- verbose = 1;
- version();
- break;
- case 'v':
- quiet = 0;
- break;
- case 'u':
- do_decomp = 1;
- break;
- case 'f':
- case 'F':
- overwrite = 1;
- force = 1;
- break;
- case 'k':
- case 'K':
- kill=1;
- break;
- case 'q':
- quiet = 1;
- break;
- default:
- printf( "Unknown flag: '%c'; ", **argv);
- Usage();
- exit(1);
- }
- }
- }
- else
- { /* Input file name */
- *fileptr++ = *argv; /* Build input file list */
- *fileptr = NULL;
- /* process nextarg; */
- }
- nextarg:
- continue;
- }
-
-
- if (*filelist != NULL)
- {
- for (fileptr = filelist; strupr(*fileptr); fileptr++)
- {
- exit_stat = 0;
-
- if (do_decomp != 0)
- { /* DECOMPRESSION */
- /* Check for .ARC suffix */
- if (strcmp(*fileptr + strlen(*fileptr) - 4, arc) != 0)
- {
- /* No .ARC tack one on */
- strcpy(tempname, *fileptr);
- if(!strrchr(tempname,'.')) strcat(tempname, arc);
- *fileptr = tempname;
- }
- /* Open input file */
-
- if ((filein=fopen(*fileptr, "rb")) == NULL)
- {
- perror(*fileptr);
- continue;
- }
-
- /* Generate output filename */
- if(read(fileno(filein),&ahead,sizeof(ahead))!=sizeof(ahead))
- {
- printf("IO error on input file %s\n",filein);
- exit(1);
- }
- strcpy(ofname, ahead.name);
- }
- else
- { /* COMPRESSION */
-
- if (strcmp(*fileptr + strlen(*fileptr) - 4, arc) == 0)
- {
- printf("%s: already has .ARC suffix -- no change\n",
- *fileptr);
- continue;
- }
- /* Open input file */
-
- if ((filein=fopen(*fileptr, "rb")) == NULL)
- {
- perror(*fileptr);
- continue;
- }
- /* Generate output filename */
- /* Strip off drive id and path */
- strcpy(ofname, *fileptr);
- if(!(cp=strrchr(ofname,'\\') ))/* ms/pc dos */
- if(!(cp=strrchr(ofname,'/') )) /* for unix */
- if(!(cp=strrchr(ofname,':'))) cp=ofname-1;
- cp++; /* skip past the delimiter */
- strcpy(ahead.name,cp);
-
-
- if(cp=strrchr(ofname,'.')) strcpy(cp,arc);
- else
- strcat(ofname, arc);
-
- if ((cp=strrchr(ofname,'\\')) != NULL) cp++;
- else
- cp = ofname;
- if (strlen(cp) > 12)
- {
- printf("%s: filename too long to tack on %s\n",cp,arc);
- continue;
- }
-
- }
-
- /* Check for overwrite of existing file */
-
- if (!overwrite)
- {
- if (stat(ofname, &statbuf) == 0)
- {
- char response[2];
- response[0] = 'n';
- printf( "%s already exists;", ofname);
- if (foreground())
- {
- printf( " do you wish to overwrite %s (y or n)? ",
- ofname);
- fflush(stdout);
- read(fileno(stdin), response,2 );
- while (response[1] != '\n')
- {
- if (read(fileno(stdin), response+1, 1) < 0)
- { /* Ack! */
- perror("stdin");
- break;
- }
- }
- }
- strupr(response);
- if (response[0] != 'Y')
- {
- printf( "\tnot overwritten\n");
- continue;
- }
- }
- }
- if((fileout=fopen(ofname, "wb")) == NULL)
- {
- perror(ofname);
- continue;
- }
- if(!quiet)
- printf( "%s: ", *fileptr);
-
- /* Actually do the compression/decompression */
-
- if (do_decomp == 0) docompress(fileout,filein);
- else
- dodecompress(fileout,filein,*fileptr);
-
- if((exit_stat == 1) || (!quiet))
- putc('\n', stdout);
- }
- if (do_decomp == 0) /* indicate end of file */
- {
- ahead.atype=0;
- if(2!=write(fileno(fileout),&ahead,2))writeerr();
- fclose(fileout);
- }
- }
- else
- {
- Usage();
- }
- exit(exit_stat);
- }
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- docompress -initialize compression
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- docompress(fileout,filein)
- register FILE *filein,*fileout;
- {
- long fpos;
- fpos=fseek(fileout,0L,1); /* save displacement into file */
- ahead.archmark=0x1a;
- ahead.length = filelength(fileno(filein));
- getstamp(filein,&ahead.date,&ahead.time);
- if(write(fileno(fileout),&ahead,sizeof(ahead)) !=sizeof(ahead))
- {
- printf( "Outfile error, disk full???\n");
- writeerr();
- }
- bytes_out = crc=0; /* excludes header */
- if ( ahead.length < (1 << 12) )
- {
- state=NOHIST;
- ahead.atype=8; /* crunch */
- hsize = 5003;
- max_bits=12;
- if(1!=write(fileno(fileout),&max_bits,1))writeerr();
- bytes_out++;
- }
- else
- {
- ahead.atype=9;
- hsize = HSIZE;
- max_bits=BITS;
- }
-
- compress(fileout,filein);
- fclose(filein);
- ahead.crc=crc;
- ahead.lzwsize=bytes_out;
- fseek(fileout,fpos,0);
- if(write( fileno(fileout),&ahead,sizeof(ahead)) !=sizeof(ahead) )writeerr();
- fseek(fileout,0L,2);
- /*
- * Print out stats on stdout
- */
-
- if(!quiet)
- {
-
- printf("%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
- in_count, out_count, bytes_out );
- prratio( stdout, in_count, bytes_out );
- printf( "\tCompression as in compact: " );
- prratio( stdout, in_count-bytes_out, in_count );
- printf( "\tLargest code (of last block) was %d (%d bits)\n",
- free_ent - 1, n_bits );
-
- }
- return(exit_stat);
-
- }
-
-
- /*****************************************************************
- * TAG( output )
- *
- * Output the given code.
- * Inputs:
- * code: A n_bits-bit integer. If == -1, then EOF. This assumes
- * that n_bits =< (long)wordsize - 1.
- * Outputs:
- * Outputs code to the file.
- * Assumptions:
- * Chars are 8 bits long.
- * Algorithm:
- * Maintain a BITS character long buffer (so that 8 codes will
- * fit in it exactly). Extract a code from the buffer and use each
- * code in turn. When the buffer fills up empty it and start over.
- *****************************************************************/
-
- static char iobuf[BITS];
-
- unsigned char lmask[9] = {
- 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
- unsigned char rmask[9] = {
- 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
-
- output( code,outf )
- FILE *outf;
- int code;
- {
- static int col = 0;
-
- register char * bp = iobuf;
- register int r_off = offset, bits= n_bits;
-
- if ( verbose )
- printf( "%5d%c", code,
- (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
- if ( code >= 0 )
- {
-
- /*
- * Get to the first byte.
- */
-
- bp += (r_off >> 3);
- r_off &= 7;
- /*
- * Since code is always >= 8 bits, only need to mask the first
- * hunk on the left.
- */
- *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
- bp++;
- bits -= (8 - r_off);
- code >>= 8 - r_off;
- /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
- if ( bits >= 8 )
- {
- *bp++ = code;
- code >>= 8;
- bits -= 8;
- }
- /* Last bits. */
- if(bits)
- *bp = code;
- offset += n_bits;
- if ( offset == (n_bits << 3) )
- {
- bp = iobuf;
- bits = n_bits;
- bytes_out += bits;
- write(fileno(outf),bp,bits);
- offset=bits=0;
- /*
- do
- fputc(*bp++,outf);
- while(--bits);
- offset = 0;
- */
- }
-
- /*
- * If the next entry is going to be too big for the code size,
- * then increase it, if possible.
- */
- if ( free_ent > maxcode || (clear_flg > 0))
- {
- /*
- * Write the whole buffer, because the input side won't
- * discover the size increase until after it has read it.
- */
- if ( offset > 0 )
- {
- if( write( fileno(outf),iobuf, n_bits) != n_bits)
- writeerr();
- bytes_out += n_bits;
- }
- offset = 0;
-
- if ( clear_flg )
- {
- maxcode = MAXCODE (INIT_BITS);
- n_bits = INIT_BITS;
- clear_flg = 0;
- }
- else
- {
- n_bits++;
- if ( n_bits == max_bits)
- maxcode = maxmaxcode;
- else
- maxcode = MAXCODE(n_bits);
- }
- if ( debug )
- {
- printf( "\nChange to %d bits\n", n_bits );
- col = 0;
- }
- }
- }
- else
- {
- /*
- * At EOF, write the rest of the buffer.
- */
- if ( offset > 0 )
- write( fileno(outf),iobuf, offset= (offset + 7)>>3);
- bytes_out += offset;
- offset = 0;
- fflush( outf );
-
- if ( verbose )
- printf( "\n" );
-
- if( ferror( outf ) )
- writeerr();
- }
- }
-
- /*****************************************************************
- * TAG( getcode )
- *
- * Read one code from the standard input. If EOF, return -1.
- * Inputs:
- * filein
- * Outputs:
- * code or -1 is returned.
- ****************************************************************/
-
- int getcode(inf)
- FILE *inf;
- {
- register int code;
- static int offset = 0, size = 0;
- register int r_off, bits;
- register unsigned char *bp = iobuf;
-
- if ( clear_flg > 0 || offset >= size || free_ent > maxcode )
- {
- /*
- * If the next entry will be too big for the current code
- * size, then we must increase the size. This implies reading
- * a new buffer full, too.
- */
-
- if ( free_ent > maxcode )
- {
- n_bits++;
- if ( n_bits == max_bits )
- maxcode = maxmaxcode; /* won't get any bigger now */
- else
- maxcode = MAXCODE(n_bits);
- }
- if ( clear_flg > 0)
- {
- maxcode = MAXCODE (INIT_BITS);
- n_bits = INIT_BITS;
- clear_flg = 0;
- }
-
- for(size=0; size<n_bits; size++)
- {
- if((code=getc_unp(inf))==EOF)
- break;
- else iobuf[size] = code;
- }
-
- if(size <= 0)return -1; /* end of file */
-
- offset = 0;
- /* Round size down to integral number of codes */
- size = (size << 3) - (n_bits - 1);
- }
- r_off = offset;
- bits = n_bits;
- /*
- * Get to the first byte.
- */
- bp += (r_off >> 3);
- r_off &= 7;
- /* Get first part (low order bits) */
- code = (*bp++ >> r_off);
- bits -= (8 - r_off);
- r_off = 8 - r_off; /* now, offset into code word */
- /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
- if ( bits >= 8 )
- {
- code |= *bp++ << r_off;
- r_off += 8;
- bits -= 8;
- }
- /* high order bits. */
- code |= (*bp & rmask[bits]) << r_off;
- offset += n_bits;
-
- return code;
- }
-
- writeerr()
- {
- perror ( ofname );
- unlink ( ofname );
- exit ( 1 );
- }
- /*
- * This routine returns 1 if we are running in the foreground and stdout
- * is a tty.
- */
- foreground()
- {
- if(bgnd_flag)
- { /* background? */
- return(0);
- }
- /* foreground */
- return((isatty(2)));
- }
-
- onintr ( )
- {
- unlink ( ofname );
- exit ( 1 );
- }
-
-
- cl_block () /* table clear for block compress */
- {
- register long int rat;
-
- checkpoint = in_count + CHECK_GAP;
- if ( debug )
- {
- printf ( "count: %ld, ratio: ", in_count );
- prratio ( stdout, in_count, bytes_out );
- }
-
- if(in_count > 0x007fffff)
- { /* shift will overflow */
- rat = bytes_out >> 8;
- if(rat == 0)
- { /* Don't divide by zero */
- rat = 0x7fffffff;
- }
- else
- {
- rat = in_count / rat;
- }
- }
- else
- {
- rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
- }
- if ( rat > ratio )
- {
- ratio = rat;
- }
- else
- {
- ratio = 0;
- #ifdef DUMPTAB
- if(verbose)
- dump_tab(); /* dump string table */
- #endif
-
- clr_hash ();
- free_ent = FIRST;
- clear_flg = 1;
- output( (int) CLEAR );
- if(debug)
- printf ( "clear\n" );
- }
- }
-
- clr_hash() /* reset code table */
- {
-
- memset(htab,0xffff,sizeof(htab)); /* default everything */
- }
-
- prratio(stream, num, den)
- FILE *stream;
- long int num, den;
- {
- register int q; /* Doesn't need to be long */
-
- if(num > 214748L)
- { /* 2147483647/10000 */
- q = num / (den / 10000L);
- }
- else
- {
- q = 10000L * num / den; /* Long calculations, though */
- }
- if (q < 0)
- {
- putc('-', stream);
- q = -q;
- }
- fprintf(stream, "%d.%02d%%\n", q / 100, q % 100);
- }
-
- version()
- {
- printf( "Options: ");
- if(debug)
- {
- printf("DEBUG, ");
- printf( "BITS = %d\n", BITS);
- }
- }
-
- Usage()
- {
- printf("Usage: Squash [-DVfkq] filename.ext\n\t===>filename.arc\n");
- printf("Usage: Squash -u [-DVfkq] filename[.arc] \n");
- printf("\t===>filename.ext -u selects decompress \n");
- }
-
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- read input, perform crc check, and then function like fgetc
- which is to return a character at a time to the caller
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- unsigned myfgetc(filein)
- FILE *filein;
- {
- static long lefttoread; /* count down number of bytes */
- static fchunk=0; /* amount to take at a time */
- static unsigned char *fbuf=NULL,*fbpos=NULL;
- static short fbsize;
- if( !fbuf) /* was I here already ? */
- {
- fbsize=_memavl(); /* get free space */
- if(fbsize>32767) fbsize=32767; /* not more than 32k */
- if(fbsize>512) fbsize-=512; /* leave some for someone else */
- if(!(fbuf=malloc(fbsize)))
- {
- printf("Not enough memory\n");
- exit(1);
- }
- lefttoread=ahead.length; /* file size to process */
- }
- if(!fchunk)
- {
- if(!lefttoread) /* already read to end of file */
- {
- free(fbuf); /* return malloc'd memory */
- fbuf=NULL; /* set indicator for next usage */
- fchunk=fbsize=0;
- return 0xffff; /* return EOF */
- }
- /* read up to buffersize bytes */
- if(fbsize<lefttoread) fchunk=fbsize;
- else
- fchunk=lefttoread;
- if(read(fileno(filein),fbuf,fchunk)!=fchunk)
- {
- printf( "Read error on input file\n");
- exit(1);
- }
- crc=addcrc(crc,fbuf,fchunk); /* whiz thru the crc check */
- lefttoread-=fchunk;
-
- fbpos=fbuf; /* crc check the fbuf */
- }
-
- fchunk--;
- return *fbpos++ ;
- }
- /* CRC computation logic
-
- The logic for this method of calculating the CRC 16 bit polynomial
- is taken from an article by David Schwaderer in the April 1985
- issue of PC Tech Journal.
- */
-
-
-
- int addcrc(crc,cc,i) /* update a CRC check */
- register char *cc; /* string to check */
- register int crc, /* running CRC value */
- i; /* number of bytes to analyze */
- {
- for(cc--;i--;)
- crc=((crc>>8)&0x00ff) ^ crctab[(crc^*++cc)&0x00ff];
- return crc;
- }
-
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- *
- * compress filein to fileout
- *
- * Algorithm: use open addressing double hashing (no chaining) on the
- * prefix code / next character combination. We do a variant of Knuth's
- * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
- * secondary probe. Here, the modular division first probe is gives way
- * to a faster exclusive-or manipulation. Also do block compression with
- * an adaptive reset, whereby the code table is cleared when the compression
- * ratio decreases, but after the table fills. The variable-length output
- * codes are re-sized at this point, and a special CLEAR code is generated
- * for the decompressor. Late addition: construct the table according to
- * file size for noticeable speed improvement on small files. Please direct
- * questions about this implementation to ames!jaw.
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- compress(outf,inf)
- FILE *outf,*inf;
- {
- register int i = 0;
- int c;
- int ent;
- register int disp;
- register int hsize_reg;
- register int hshift;
- register long fcode;
- offset = 0;
- out_count = 0;
- clear_flg = 0;
- ratio = 0;
- in_count = 1;
- checkpoint = CHECK_GAP;
- maxcode = MAXCODE(INIT_BITS);
- n_bits = INIT_BITS;
- free_ent = FIRST ;
- if(EOF ==(ent= (ahead.atype==8?getc_ncr(inf):myfgetc(inf)) ))return EOF;
- hshift = 0;
- for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )hshift++;
-
- hshift = 8 - hshift; /* set hash code range bound */
- hsize_reg = hsize;
- clr_hash(); /* clear hash table */
-
- while((c= (ahead.atype==8?getc_ncr(inf):myfgetc(inf))) !=EOF ) /* until not end of file */
- {
- c&=0xff;
- in_count++;
- fcode = (long) (((long) c << max_bits) + ent);
- i = ((c << hshift) ^ ent); /* xor hashing */
-
- if ( htab[i] == fcode )
- {
- ent = codetab[i];
- continue;
- }
- else if ( (long)htab[i] < 0 ) /* empty slot */
- goto nomatch;
- disp = hsize_reg - i; /* secondary hash (after G. Knott) */
- if ( i == 0 )
- disp = 1;
- probe:
- if ( (i -= disp) < 0 )
- i += hsize_reg;
-
- if ( htab [i] == fcode )
- {
- ent = codetab[i];
- continue;
- }
- if ( (long)htab[i] > 0 )
- goto probe;
- nomatch:
- output( (int) ent ,ouhead.l;
- out_count++;
- ent = c;
- if ( free_ent < maxmaxcode )
- {
- codetab[i] = free_ent++; /* code -> hashtable */
- htab[i] = fcode;
- }
- else if ( (long)in_count >= checkpoint )
- cl_block ();
- }
- /*
- * Put out the final code.
- */
- output( ( int)ent,ouhf);
- out_count++;
- output( ( int)-1,outf);
-
- }
-
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- Initiate a file decompression
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- #ifndef EOF /* include if never done before */
- #include <stdio.h>
- #include <ctype.h> /* class tests */
- #include <squash.h>
- #endif
-
- dodecompress(outf,inf,fileid)
- FILE *outf,*inf;
- char *fileid;
- {
- if(0x1a!=ahead.archmark || (ahead.atype!=8&&ahead.atype!=9))
- {
- printf( "%s: not in compressed format\n",fileid);
- return(-1);
- }
- lzwsize=ahead.lzwsize; /* amount to read */
- if(ahead.atype==8)
- {
- hsize=5003;
- if(12!=(max_bits=getc_unp(inf)))
- {
- printf("\nCannot continue. %s is non-standard archive\n",fileid);
- exit(1);
- }
- state=NOHIST;
- }
- else
- {
- max_bits=BITS;
- hsize=9001;
- }
- maxmaxcode = 1 << max_bits;
- crc=0; /* for crc check */
- decompress(outf,inf );
- if(crc!=ahead.crc)printf("CRC error detected on file %s\n",fileid);
- return (crc);
- }
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- Decompress filein to fileout. This routine adapts to the codes in the
- file building the "string" table on-the-fly; requiring no table to
- be stored in the compressed file. The tables used herein are shared
- with those of the compress() routine. See the definitions above.
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- decompress(outf,inf)
- FILE *outf,*inf;
- {
- register unsigned char *stackp;
- register int finchar;
- register int code, oldcode;
- int incode,j;
-
- /*
- * As above, initialize the first 256 entries in the table.
- */
-
- maxcode = MAXCODE(INIT_BITS);
- n_bits = INIT_BITS;
- for ( code = 255; code >= 0; code-- )
- {
- tab_prefixof(code) = 0;
- tab_suffixof(code) = (unsigned char)code;
- }
- free_ent = FIRST;
- incode=finchar = oldcode = getcode(inf);
- if(oldcode == -1) /* EOF already? */
- return; /* Get out of here */
- crc=addcrc(crc,&incode,1); /* calc the crc */
- write(fileno(outf),&incode,sizeof(char));/* first code must be 8 bits = char */
- if(ferror(outf)) /* Crash if can't write */
- writeerr();
- stackp = de_stack;
-
- while ( (code = getcode(inf)) > -1 )
- {
- if ( (code == CLEAR) )
- {
- for ( code = 255; code >= 0; code-- )
- tab_prefixof(code) = 0;
- clear_flg = 1;
- free_ent = FIRST - 1;
- if ( (code = getcode (inf)) == -1 ) /* O, untimely death! */
- {
- break;
- }
- }
- incode = code;
- /*
- * Special case for KwKwK string.
- */
- if ( code >= free_ent )
- {
- *stackp++ = finchar;
- code = oldcode;
- }
-
- /*
- * Generate output characters in reverse order
- * Stop if input code is in range 0..255
- */
- while ( code >= 256 )
- {
- *stackp++ = tab_suffixof(code);
- code = tab_prefixof(code);
- }
- *stackp++ = finchar = tab_suffixof(code);
- /*
- * And put them out in forward order
- */
- if(ahead.atype==9)
- {
- if(1<(j=stackp-de_stack)) /* number of bytes in the string */
- memrev(de_stack,j); /* reverse the string in place */
- crc=addcrc(crc,de_stack,j);
- if(write(fileno(outf),stackp=de_stack,j)!=j)
- {
- printf("Outfile error, disk full???\n");
- writeerr();
- }
- }
- else
- { /* ahead.atype==8 */
- do
- putc_ncr(*--stackp,outf);
- while ( stackp > de_stack );
- }
- /*
- * Generate the new entry.
- */
- if ( (code=free_ent) < maxmaxcode )
- {
- tab_prefixof(code) = (unsigned short)oldcode;
- tab_suffixof(code) = finchar;
- free_ent = code+1;
- }
- /*
- * Remember previous code.
- */
- oldcode = incode;
- }
- setstamp(outf,ahead.date,ahead.time);
- if(ferror(outf))writeerr();
- } /* end decompress */
-
- getstamp(f,date,time) /* get a file's date/time stamp */
- FILE *f; /* file to get stamp from */
- unsigned *date, *time; /* storage for the stamp */
- {
- struct WORDREGS reg;
-
- reg.ax = 0x5700; /* get date/time */
- reg.bx = fileno(f); /* file handle */
- intdos(®,®); /* DOS call */
- if(reg.cflag ) /* DOS call */
- printf("Get timestamp fail (%d)\n",reg.ax);
-
- *date = reg.dx; /* save date/time */
- *time = reg.cx;
- } /* getstamp */
- setstamp(f,date,time) /* set a file's date/time stamp */
- FILE *f; /* file to set stamp on */
- int date, time; /* desired date, time */
- {
-
- struct WORDREGS reg;
- fflush(f); /* force any pending output */
- reg.ax = 0x5701; /* set date/time */
- reg.bx = fileno(f); /* file handle */
- reg.cx = time; /* desired time */
- reg.dx = date; /* desired date */
- intdos(®,®); /* DOS call */
- if(reg.cflag) /* DOS call */
- printf("Set timestamp fail (%d)\n",reg.ax);
- } /* setstamp */
- /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- Memrev reverse string in memory;
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
-
- memrev(str,len)
- register unsigned char *str;
- unsigned len;
- {
- register unsigned char *end;
- unsigned char i;
- end=str+len-1;
- for(len>>=1;len--;)
- {
- i=*str;
- *str++=*end;
- *end--=i;
- }
- }
- int getc_ncr(f) /* get bytes with collapsed runs */
- FILE *f; /* file to get from */
- {
- static int lastc; /* value returned on last call */
- static int repcnt; /* repetition counter */
- static int c; /* latest value seen */
-
- switch(state)
- { /* depends on our state */
- case NOHIST: /* no relevant history */
- state = SENTCHAR;
- return lastc = myfgetc(f); /* remember the value next time */
-
- case SENTCHAR: /* char was sent. look ahead */
- switch(lastc)
- { /* action depends on char */
- case DLE: /* if we sent a real DLE */
- state = NOHIST; /* then start over again */
- return 0; /* but note that the DLE was real */
-
- case EOF: /* EOF is always a special case */
- return EOF;
-
- default: /* else test for a repeat */
- for(repcnt=1; (c=myfgetc(f))==lastc && repcnt<255; repcnt++)
- ; /* find end of run */
-
- switch(repcnt)
- { /* action depends on run size */
-
- case 1: /* not a repeat */
- return lastc = c; /* but remember value next time */
-
- case 2: /* a repeat, but too short */
- state = SENDNEWC; /* send the second one next time */
- return lastc;
-
- default: /* a run - compress it */
- state = SENDCNT; /* send repeat count next time */
- return DLE; /* send repeat marker this time */
- }
- }
-
- case SENDNEWC: /* send second char of short run */
- state = SENTCHAR;
- return lastc = c;
-
- case SENDCNT: /* sent DLE, now send count */
- state = SENDNEWC;
- return repcnt;
-
- default:
- printf("Bug - bad ncr state\n");
- }
- } /* getc_ncr */
- pascal putc_unp(c,t) /* output an unpacked byte */
- char c; /* byte to output */
- FILE *t; /* file to output to */
- {
- crc = addcrc(crc,&c,1); /* update the CRC check value */
- if(fputc(c,t)==EOF) /* if to console then ignore error */
- {
- printf("Disk full???\n");
- exit(1);
- }
- }
-
- /* This routine is used to decode non-repeat compression. Bytes are
- passed one at a time in coded format, and are written out uncoded.
- The data is stored normally, except that runs of more than two
- characters are represented as:
-
- <char> <DLE> <count>
-
- With a special case that a count of zero indicates a DLE as data,
- not as a repeat marker.
- */
- putc_ncr(c,t) /* put NCR coded bytes */
- unsigned char c; /* next byte of stream */
- FILE *t; /* file to receive data */
- {
- static int lastc; /* last character seen */
-
- switch(state)
- { /* action depends on our state */
-
- case NOHIST: /* no previous history */
- if(c==DLE) /* if starting a series */
- state = INREP; /* then remember it next time */
- else putc_unp(lastc=c,t); /* else nothing unusual */
- return;
-
- case INREP: /* in a repeat */
- if(c) /* if count is nonzero */
- while(--c) /* then repeatedly ... */
- putc_unp(lastc,t); /* ... output the byte */
- else putc_unp(DLE,t); /* else output DLE as data */
- state = NOHIST; /* back to no history */
- return;
-
- default:
- printf("Incorrect NCR unpacking state (%d)",state);
- return 1;
- }
- }
- int getc_unp(f) /* get a byte from an archive */
- FILE *f; /* archive file to read */
- {
- static unsigned getc_wrk=0;
- if(!getc_wrk) /* less overhead to manage ints */
- {
- /* then indicate end of file */
- if(!lzwsize) return EOF; /* if no data left */
- else
- {
- getc_wrk=0xffff; /* the maximum int */
- if(getc_wrk>lzwsize) getc_wrk=lzwsize;
- lzwsize-=getc_wrk;
- }
- }
- getc_wrk--; /* deduct from input counter */
- return fgetc(f) ; /* and return next decoded byte */
- }