home *** CD-ROM | disk | FTP | other *** search
Text File | 1993-01-18 | 54.6 KB | 2,204 lines |
- Newsgroups: comp.sources.misc
- From: leo@ipmce.su (Leonid A. Broukhis)
- Subject: v34i126: freeze - Freeze/melt compression program vers. 2.4, Part02/03
- Message-ID: <1993Jan19.043553.29582@sparky.imd.sterling.com>
- X-Md4-Signature: 2a34590838e1849d96027d7eafecafe3
- Date: Tue, 19 Jan 1993 04:35:53 GMT
- Approved: kent@sparky.imd.sterling.com
-
- Submitted-by: leo@ipmce.su (Leonid A. Broukhis)
- Posting-number: Volume 34, Issue 126
- Archive-name: freeze/part02
- Environment: ISC, Xenix, SunOS, MS-DOS
- Supersedes: freeze: Volume 25, Issue 12-13
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # Contents: Makefile.in config.h.in config.msc config.tur default.c
- # freeze.1 freeze.h huf.c lz.c lz.h statist.1 statist.c
- # Wrapped by kent@sparky on Mon Jan 18 22:27:48 1993
- PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH
- echo If this archive is complete, you will see the following message:
- echo ' "shar: End of archive 2 (of 3)."'
- if test -f 'Makefile.in' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'Makefile.in'\"
- else
- echo shar: Extracting \"'Makefile.in'\" \(3683 characters\)
- sed "s/^X//" >'Makefile.in' <<'END_OF_FILE'
- XSHELL = /bin/sh
- X
- XCC = @CC@
- XCFLAGS = # -O2 # for gcc 2.2.2
- X
- XINSTALL = @INSTALL@
- XINSTALL_PROGRAM = @INSTALL_PROGRAM@ -s
- XINSTALL_DATA = @INSTALL_DATA@
- X
- XLIBS = @LIBS@
- XOBJ = o
- XEXE =
- X
- Xdefault: prog
- X
- X# Added the prefix macro, so that it was easier to change installation place.
- Xprefix = /usr/local
- XDEST = $(prefix)/bin
- XMANDEST = $(prefix)/man/man1
- XSEC = 1
- X
- XHDRS = bitio.h\
- X compat.h\
- X freeze.h\
- X huf.h\
- X lz.h\
- X patchlevel.h
- X
- X# define DEFFILE as a filename with freeze's default Huffman values
- X# e.g. -DDEFFILE=\"/etc/default/freeze\"
- X# !!!! NOTE !!!! default is now $(prefix)/lib/freeze.cnf !!!!!
- X
- XOPTIONS = -DDEFFILE=\"$(prefix)/lib/freeze.cnf\"
- X
- XLINTFLAGS = -DCOMPAT -DDEBUG -DGATHER_STAT -x -h
- X
- XMAKEFILE = makefile
- X
- XOBJS = bitio.$(OBJ)\
- X debug.$(OBJ)\
- X decode.$(OBJ)\
- X default.$(OBJ)\
- X encode.$(OBJ)\
- X freeze.$(OBJ)\
- X huf.$(OBJ)\
- X lz.$(OBJ)
- X
- XCATMAN = freeze.man statist.man
- X
- XMAN = freeze.1 statist.1
- X
- XSRCS = bitio.c\
- X debug.c\
- X decode.c\
- X default.c\
- X encode.c\
- X freeze.c\
- X huf.c\
- X lz.c
- X
- X.SUFFIXES: .man .1 .$(suffix)
- X
- X.1.man:
- X nroff -man < $< > $@
- X
- X.c.$(OBJ):
- X $(CC) -c $(CFLAGS) $(OPTIONS) $<
- X
- Xprog: freeze$(EXE) statist$(EXE) showhuf$(EXE)
- X
- Xman: $(CATMAN)
- X
- Xlint: $(SRCS)
- X lint $(LINTFLAGS) $(SRCS) > lint.out
- X
- Xfreeze$(EXE): $(OBJS)
- X $(CC) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
- X
- Xstatist$(EXE): statist.$(OBJ) lz.$(OBJ)
- X $(CC) $(LDFLAGS) -o $@ statist.$(OBJ) lz.$(OBJ) $(LIBS)
- X
- Xshowhuf$(EXE): showhuf.$(OBJ)
- X $(CC) $(LDFLAGS) -o $@ showhuf.$(OBJ) $(LIBS)
- X
- Xclobber: clean
- X rm -f freeze$(EXE) statist$(EXE) showhuf$(EXE) *.man \#* *~ config.h
- X
- Xclean:; rm -f *.$(OBJ) *.b .,* core *.out
- X
- Xinstall: $(DEST)/freeze $(DEST)/statist $(MANDEST)/freeze.$(SEC) $(MANDEST)/statist.$(SEC)
- X
- Xpatch:; rm -f patch.out
- X -for i in distribution/*.[ch1] Makefile.in configure ; do \
- X (diff -c $$i `basename $$i` >> patch.out); \
- X done
- X
- X$(DEST)/freeze: freeze
- X $(INSTALL_PROGRAM) freeze $@
- X -ln -f $@ $(DEST)/melt
- X -ln -f $@ $(DEST)/unfreeze
- X -ln -f $@ $(DEST)/fcat
- X
- X$(DEST)/statist: statist
- X $(INSTALL_PROGRAM) statist $@
- X
- X$(MANDEST)/freeze.$(SEC): freeze.1
- X $(INSTALL_DATA) freeze.1 $@
- X -ln -f $@ $(MANDEST)/melt.$(SEC)
- X -ln -f $@ $(MANDEST)/unfreeze.$(SEC)
- X -ln -f $@ $(MANDEST)/fcat.$(SEC)
- X# This is much better for places which keep preformated manpages.
- X# echo ".so man1/freeze.$(SEC)" > $(MANDEST)/melt.$(SEC)
- X# echo ".so man1/freeze.$(SEC)" > $(MANDEST)/unfreeze.$(SEC)
- X# echo ".so man1/freeze.$(SEC)" > $(MANDEST)/fcat.$(SEC)
- X
- X
- X$(MANDEST)/statist.$(SEC): statist.1
- X $(INSTALL_DATA) statist.1 $@
- X
- Xx286:
- X $(MAKE) prog "CC=cc -LARGE" CFLAGS="-Ox -Ml2" LDFLAGS="-Ml2"
- X
- Xx286install:
- X $(MAKE) install MANDEST=/usr/man/man.C SEC=C
- X
- Xmsc:
- X if not exist config.h copy config.msc config.h
- X $(MAKE) prog EXE=.exe OBJ=obj CC="cl" CFLAGS="-Ms -Ox" LDFLAGS="-Ml" LIBS=""
- X
- Xturbo:
- X if not exist config.h copy config.tur config.h
- X $(MAKE) prog EXE=.exe OBJ=obj CC="tcc" CFLAGS="-O -ms" LIBS=""
- X
- Xborland:
- X if not exist config.h copy config.tur config.h
- X $(MAKE) prog EXE=.exe OBJ=obj CC="bcc" CFLAGS="-O -ms" LIBS=""
- X
- X###
- Xbitio.$(OBJ): freeze.h compat.h bitio.h
- Xdebug.$(OBJ): freeze.h compat.h huf.h bitio.h
- Xdecode.$(OBJ): freeze.h compat.h huf.h bitio.h
- Xdefault.$(OBJ): freeze.h compat.h
- Xencode.$(OBJ): freeze.h compat.h lz.h huf.h bitio.h
- Xfreeze.$(OBJ): freeze.h compat.h patchlev.h
- Xhuf.$(OBJ): freeze.h compat.h huf.h bitio.h
- Xlz.$(OBJ): freeze.h compat.h lz.h
- Xstatist.$(OBJ): freeze.h compat.h lz.h
- Xshowhuf.$(OBJ): freeze.h huf.h
- END_OF_FILE
- if test 3683 -ne `wc -c <'Makefile.in'`; then
- echo shar: \"'Makefile.in'\" unpacked with wrong size!
- fi
- # end of 'Makefile.in'
- fi
- if test -f 'config.h.in' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'config.h.in'\"
- else
- echo shar: Extracting \"'config.h.in'\" \(2522 characters\)
- sed "s/^X//" >'config.h.in' <<'END_OF_FILE'
- X/* This is a configuration file prototype, copy it to config.h and */
- X/* "#define" appropriate macros if you have problems with "configure". */
- X
- X/* define as "int" or "void"; default (undefined) means "void" */
- X#undef RETSIGTYPE
- X
- X/* define if your computer/system allows unaligned word access */
- X#undef ALLOW_MISALIGN
- X
- X/* define if sizeof(int) == 2 */
- X#undef INT_16_BITS
- X
- X/* define if your computer cannot handle data items of more than 64K */
- X#undef SEGMENTED
- X
- X/* define if filenames can be of more than 14 chars */
- X#undef HAVE_LONG_FILE_NAMES
- X
- X/* define no more than one, according to your standard #include's */
- X/* if you have <dirent.h> */
- X#undef DIRENT
- X/* if you have <sys/ndir.h> */
- X#undef SYSNDIR
- X/* if you have <sys/dir.h> */
- X#undef SYSDIR
- X
- X/* define if you have <sys/stdtypes.h> */
- X#undef HAVE_SYS_STDTYPES_H
- X
- X/* define if you have "rindex" and "setlinebuf" correspondingly */
- X#undef HAVE_RINDEX
- X#undef HAVE_SETLINEBUF
- X
- X/* define no more than one, according to your standard #include's */
- X/* if you have <utime.h> */
- X#undef UTIME
- X/* if you have <sys/utime.h> */
- X#undef SYSUTIME
- X/* if you have "struct timeval" in <sys/time.h> */
- X#undef SYSTIME
- X
- X/* define if you want to have freeze compatible with vers. 1.0 */
- X#undef COMPAT
- X
- X/* define if your system has multibyte NEWLINE (as in MS-DOS) and */
- X/* you want to do text conversion by default */
- X#undef TEXT_DEFAULT
- X
- X/* define if you want to build freeze in small model (64K data) */
- X/* (segmented architectures only) */
- X#undef TINY
- X
- X/* define if you want to decrease the amount of memory but without */
- X/* 64K restriction */
- X#undef SMALL
- X
- X/* define to increase the compression speed by about 10% at the cost */
- X/* of some tenths of % compression rate */
- X#undef FASTHASH
- X
- X/* default Huffman values, define if you don't like the default */
- X/* 0,0,1,2,6,19,34,0. These below are reasonably good also. */
- X/* #define HUFVALUES 0,1,1,1,4,10,27,18 */
- END_OF_FILE
- if test 2522 -ne `wc -c <'config.h.in'`; then
- echo shar: \"'config.h.in'\" unpacked with wrong size!
- fi
- # end of 'config.h.in'
- fi
- if test -f 'config.msc' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'config.msc'\"
- else
- echo shar: Extracting \"'config.msc'\" \(2490 characters\)
- sed "s/^X//" >'config.msc' <<'END_OF_FILE'
- X/* This is a configuration file prototype, copy it to config.h and */
- X/* "#define" appropriate macros if you have problems with "configure". */
- X
- X/* define as "int" or "void" */
- X#define RETSIGTYPE void
- X
- X/* define if your computer/system allows unaligned word access */
- X#define ALLOW_MISALIGN
- X
- X/* define if sizeof(int) == 2 */
- X#define INT_16_BITS
- X
- X/* define if your computer cannot handle data items of more than 64K */
- X#define SEGMENTED
- X
- X/* define if filenames can be of more than 14 chars */
- X#undef HAVE_LONG_FILE_NAMES
- X
- X/* define no more than one of, according to your standard #include's */
- X/* if you have <dirent.h> */
- X#undef DIRENT
- X/* if you have <sys/ndir.h> */
- X#undef SYSNDIR
- X/* if you have <sys/dir.h> */
- X#undef SYSDIR
- X
- X/* define if you have <sys/stdtypes.h> */
- X#undef HAVE_SYS_STDTYPES_H
- X
- X/* define if you have "rindex" and "setlinebuf" correspondingly */
- X#undef HAVE_RINDEX
- X#undef HAVE_SETLINEBUF
- X
- X/* define no more than one of, according to your standard #include's */
- X/* if you have <utime.h> */
- X#define UTIME
- X/* if you have <sys/utime.h> */
- X#undef SYSUTIME
- X/* if you have "struct timeval" in <sys/time.h> */
- X#undef SYSTIME
- X
- X/* define if you want to have freeze compatible with vers. 1.0 */
- X#undef COMPAT
- X
- X/* define if your system has multibyte NEWLINE (as in MS-DOS) and */
- X/* you want to do text conversion by default */
- X#undef TEXT_DEFAULT
- X
- X/* define if you want to build freeze in small model (64K data) */
- X/* (segmented architectures only) */
- X#define TINY
- X
- X/* define if you want to decrease the amount of memory but without */
- X/* 64K restriction */
- X#undef SMALL
- X
- X/* define to increase the compression speed by about 10% at the cost */
- X/* of some tenths of % compression rate */
- X#undef FASTHASH
- X
- X/* default Huffman values, define if you don't like the default */
- X/* 0,0,1,2,6,19,34,0. These below are reasonably good also. */
- X/* #define HUFVALUES 0,1,1,1,4,10,27,18 */
- END_OF_FILE
- if test 2490 -ne `wc -c <'config.msc'`; then
- echo shar: \"'config.msc'\" unpacked with wrong size!
- fi
- # end of 'config.msc'
- fi
- if test -f 'config.tur' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'config.tur'\"
- else
- echo shar: Extracting \"'config.tur'\" \(2584 characters\)
- sed "s/^X//" >'config.tur' <<'END_OF_FILE'
- X/* This is a configuration file prototype, copy it to config.h and */
- X/* "#define" appropriate macros if you have problems with "configure". */
- X
- X/* define as "int" or "void" */
- X#define RETSIGTYPE void
- X
- X/* define if your computer/system allows unaligned word access */
- X#define ALLOW_MISALIGN
- X
- X/* define if sizeof(int) == 2 */
- X#define INT_16_BITS
- X
- X/* define if your computer cannot handle data items of more than 64K */
- X#define SEGMENTED
- X
- X/* define if filenames can be of more than 14 chars */
- X#undef HAVE_LONG_FILE_NAMES
- X
- X/* define no more than one of, according to your standard #include's */
- X/* if you have <dirent.h> */
- X#undef DIRENT
- X/* if you have <sys/ndir.h> */
- X#undef SYSNDIR
- X/* if you have <sys/dir.h> */
- X#undef SYSDIR
- X
- X/* define if you have <sys/stdtypes.h> */
- X#undef HAVE_SYS_STDTYPES_H
- X
- X/* define if you have "rindex" and "setlinebuf" correspondingly */
- X#undef HAVE_RINDEX
- X#undef HAVE_SETLINEBUF
- X
- X/* define no more than one of, according to your standard #include's */
- X/* if you have <utime.h> */
- X#undef UTIME
- X/* if you have <sys/utime.h> */
- X#undef SYSUTIME
- X/* if you have "struct timeval" in <sys/time.h> */
- X#undef SYSTIME
- X
- X/* BORLAND C has no off_t definition in <sys/types.h> */
- X#define off_t long
- X
- X/* define if you want to have freeze compatible with vers. 1.0 */
- X#undef COMPAT
- X
- X/* define if your system has multibyte NEWLINE (as in MS-DOS) and */
- X/* you want to do text conversion by default */
- X#undef TEXT_DEFAULT
- X
- X/* define if you want to build freeze in small model (64K data) */
- X/* (segmented architectures only) */
- X#define TINY
- X
- X/* define if you want to decrease the amount of memory but without */
- X/* 64K restriction */
- X#undef SMALL
- X
- X/* define to increase the compression speed by about 10% at the cost */
- X/* of some tenths of % compression rate */
- X#undef FASTHASH
- X
- X/* default Huffman values, define if you don't like the default */
- X/* 0,0,1,2,6,19,34,0. These below are reasonably good also. */
- X/* #define HUFVALUES 0,1,1,1,4,10,27,18 */
- END_OF_FILE
- if test 2584 -ne `wc -c <'config.tur'`; then
- echo shar: \"'config.tur'\" unpacked with wrong size!
- fi
- # end of 'config.tur'
- fi
- if test -f 'default.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'default.c'\"
- else
- echo shar: Extracting \"'default.c'\" \(1274 characters\)
- sed "s/^X//" >'default.c' <<'END_OF_FILE'
- X#include "freeze.h"
- X#include <errno.h>
- X
- X /*-------------------------------*/
- X /* DEFAULTS FILE HANDLING */
- X /*-------------------------------*/
- X
- X#define OK 0
- X#define FAIL NULL
- X#define NOFILE ((FILE *) 0)
- X#define MAXLINE 128
- X
- Xchar *strchr();
- Xstatic FILE *defd = NOFILE; /* defaults file stream */
- X
- Xint defopen(fname) /* open | reopen | close defaults file */
- X char *fname;
- X{
- X register FILE *fd;
- X
- X if (!fname) {
- X if (defd)
- X (void) fclose(defd);
- X defd = NOFILE;
- X return OK;
- X }
- X
- X if (!(fd = fopen(fname, "r")))
- X return errno; /* problems opening file */
- X
- X defd = fd;
- X return OK;
- X}
- X
- Xstatic char defline[MAXLINE + 1];
- X
- Xchar *defread(pattern)
- X register char *pattern;
- X{
- X register sz_patt;
- X register char *cp;
- X
- X if (!defd)
- X return FAIL; /* defaults file not opened */
- X
- X rewind(defd);
- X sz_patt = strlen(pattern);
- X
- X while (fgets(defline, MAXLINE, defd)) {
- X if (!(cp = strchr(defline, '\n')))
- X return FAIL; /* line too long */
- X if (cp - defline < sz_patt)
- X continue; /* line too short */
- X *cp = '\0';
- X if (!strncmp(pattern, defline, sz_patt))
- X return defline + sz_patt; /* match found */
- X }
- X
- X return FAIL; /* no matching lines */
- X}
- END_OF_FILE
- if test 1274 -ne `wc -c <'default.c'`; then
- echo shar: \"'default.c'\" unpacked with wrong size!
- fi
- # end of 'default.c'
- fi
- if test -f 'freeze.1' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'freeze.1'\"
- else
- echo shar: Extracting \"'freeze.1'\" \(7193 characters\)
- sed "s/^X//" >'freeze.1' <<'END_OF_FILE'
- X.PU
- X.TH FREEZE 1 local
- X.SH NAME
- Xfreeze, unfreeze, melt, fcat \- compress and uncompress files
- X.SH SYNOPSIS
- X.ll +8
- X.B freeze
- X[
- X.B \-cdfvVgx
- X] [
- X.I "filename | type \&..."
- X]
- X.ll -8
- X.br
- X.B unfreeze
- X[
- X.B \-cfvV
- X] [
- X.I "filename \&..."
- X]
- X.br
- X.B melt
- X[
- X.B \-cfvV
- X] [
- X.I "filename \&..."
- X]
- X.br
- X.B fcat
- X[
- X.I "filename \&..."
- X]
- X.SH DESCRIPTION
- XCompresses the specified files or standard input.
- XEach file is replaced by a file with the extension
- X.B "\&.F,"
- Xbut only if the file got smaller. If no files are specified,
- Xthe compression is applied to the standard input
- Xand is written to standard output regardless of the results.
- XCompressed files can be restored to their original form by specifying the
- X.B \-d
- Xoption, or by running
- X.I melt
- Xor
- X.I unfreeze
- X(both linked to
- X.IR freeze ),
- Xon the
- X.B "\&.F"
- Xfiles or the standard input.
- X.PP
- XIf the output file exists, it will not be overwritten unless the
- X.B \-f
- Xflag is given. If
- X.B \-f
- Xis not specified and
- X.I freeze
- Xis run in the foreground,
- Xthe user is prompted
- Xas to whether the file should be overwritten.
- X.PP
- XIf the
- X.B \-g
- Xflag is given, a slightly less powerful (compression
- Xrate is 1.5% less), but somewhat faster heuristic is used. This flag can be
- Xused more than once (this mode is quite useful when freezing bitmaps) for
- Xadditional speedup.
- X.PP
- XIf you want to improve compression rate at the cost of speed, use
- X.B \-x
- Xflag. Like
- X.B \-g,
- Xthis flag can be used more than once, thus increasing compression rate.
- XNote that usually it isn't worth to use it more than twice.
- X.PP
- XIf the
- X.B \-f
- Xflag is given, all files specified are replaced with
- X.B "\&.F"
- Xfiles \- even if the file didn't get smaller.
- X.PP
- XWhen file names are given, the ownership (if run by root), modes, accessed
- Xand modified times are maintained between the file and its
- X.B "\&.F"
- Xversion. In this respect,
- X.I freeze
- Xcan be used for archival purposes, yet can still be used with
- X.IR make "(1)"
- Xafter melting.
- X.PP
- XThe
- X.B \-c
- Xoption causes the results of the freeze/melt operation to be written
- Xto stdout; no files are changed. The
- X.I fcat
- Xprogram is the same as specifying
- X.B \-c
- Xto
- X.I melt
- X(all files are unpacked and written to stdout).
- X.PP
- XThe
- X.B \-v
- X(verbose) option causes the diagnostics (at the end of each file processing)
- Xto be printed to stderr, and the
- X.B \-vv
- Xoption causes the progress indicator to be drawn to the same place.
- X.PP
- X.I Type
- Xis a token preceded by a '+' or a '--', which defines the type
- Xof following files in the command string. An explicite definition
- Xof the file's type can give up to 2% of additional compression.
- XThe list of types is stored in file
- X.IR /usr/local/lib/freeze.cnf .
- XTypes may be abbreviated while not ambigious. You can also determine
- Xvalues for the static Huffman table by using a list of 8 numbers
- Xseparated by commas instead of
- X.I type.
- X.PP
- X.I Freeze
- Xuses the Lempel-Ziv algorithm on the first pass and the dynamic
- XHuffman algorithm on the second one. The size of sliding window
- Xis 8K, and the maximum length of matched string is 256.
- XThe positions on the window are coded using a static Huffman table.
- X.PP
- XA two byte magic number is prepended to the file
- Xto ensure that neither melting of random text nor refreezing of
- Xalready frozen text are attempted. In addition, the characteristics
- Xof the static Huffman table being used during
- X.I freeze
- Xis written to the file so that these characteristics may be adapted
- Xto concrete conditions.
- X.PP
- X.ne 8
- XThe amount of compression obtained depends on the size of the
- Xinput file and the distribution of character substrings and their
- Xprobabilities.
- XTypically, text files, such as C programs,
- Xare reduced by 60\-75%, executable files are reduced by 50%.
- XCompression is generally much better than that achieved by
- XLZW coding (as used in
- X.IR compress ),
- Xor Huffman coding
- X.RI ( pack ),
- Xthough takes more time to compute.
- X.PP
- XIf the
- X.B \-V
- X(version) flag is given, the program's version number and compilation
- Xoptions are printed.
- X.PP
- XThe exit status is normally 0;
- Xif the last file gets bigger after freezing, the exit status is 2;
- Xif an error occurs, the exit status is 1.
- X.SH "SEE ALSO"
- Xcompact(1), pack(1), compress(1)
- X.SH "DIAGNOSTICS"
- XUnknown flag:
- X.I "\'x\';"
- XUsage: freeze [-cdfvVg] [file|+type ...]
- X.in +8
- XInvalid options were specified on the command line.
- X.in -8
- X.IR file :
- Xnot in frozen format
- X.in +8
- XThe specified file has not been frozen.
- X.in -8
- X.IR file :
- Xalready has .F suffix -- no change
- X.in +8
- XCannot compress a file that has a ".F" suffix.
- X.IR mv "(1)"
- Xthe file to a different name and try again.
- X.in -8
- X.IR file :
- Xfilename too long to tack on .F
- X.in +8
- XThe specified file cannot be compressed because its filename is longer than
- X12 characters.
- X.IR mv "(1)"
- Xthe file to a different name and try again. This message does not occur on
- X4.XBSD systems.
- X.in -8
- X.I file
- Xalready exists; do you wish to overwrite (y or n)?
- X.in +8
- XRespond "y" if you want the output file to be replaced; "n" if you want it
- Xto be left alone.
- X.in -8
- X.IR file :
- X.IR xx %
- X.in +8
- Xor
- X.in -8
- X.IR xxx K
- X.in +8
- XThese message fragments are written during the processing of a file, if
- X.B \-vv
- Xoption was given in the command line (in percents, if the length of file
- Xbeing processed is known; in Kbytes otherwise).
- X.in -8
- XFreezing:
- X.I "xx.xx% (y.yy"
- Xbits)
- X.in +8
- XThis message fragment gives the percentage of the input file that has been
- Xsaved by freezing and the number of remaining bits per byte of original file.
- X.in -8
- X-- not a regular file: unchanged
- X.in +8
- XThis message fragment is written when the input file is not a regular file.
- XThe input file is left unchanged.
- X.in -8
- X-- has
- X.I xx
- Xother links: unchanged
- X.in +8
- XThis message fragment is written when the input file has links. The input
- Xfile is left unchanged. See
- X.IR ln "(1)"
- Xfor more information.
- X.in -8
- X-- file unchanged
- X.in +8
- XThis message fragment is written when no savings are achieved by
- Xfreezing. The input file is left unchanged.
- X.in -8
- X-- replaced with
- X.I file
- X.in +8
- XThis message fragment is written when a file has been sucessfully
- Xfrozen/melt.
- X.in -8
- XUsing "
- X.I type
- X" type
- X.in +8
- XThis message indicates a successful switching to
- Xposition table for mentioned file type.
- X.in -8
- X"
- X.I xxx
- X" - no such file type
- X.in +8
- Xor
- X.in -8
- X.I xxx
- X- a list of 8 numbers expected
- X.in +8
- XThis message means the given file type does not exist or
- Xthe given string contains a comma, but is not a valid list
- Xof values for static Huffman table.
- X.in -8
- Xmelt: corrupt input
- X.in +8
- XThis message fragment is written when an error in header or
- Xunexpected end of frozen file is detected. Partial
- X(or empty, is there was an error in the header) file is created.
- X.in -8
- Xalready frozen -- file unchanged
- X.in +8
- XThis message fragment is written when an input file already has
- XFreeze's magic header.
- X.in -8
- XInvalid position table
- X.in +8
- Xor
- X.in -8
- X"
- X.I type
- X" - invalid entry
- X.in +8
- XThese messages appear only if Freeze has been made with incorrect
- Xdata for static Huffman table. It does never appear when freeze
- Xis called from a public access directory.
- X.in -8
- XUnknown header format
- X.in +8
- XUnknown values of flag bits were discovered in the header
- Xof frozen file.
- X.in -8
- X.SH "BUGS"
- XFound bugs descriptions, incompatibilities, etc. please send to
- Xleo@ipmce.su.
- END_OF_FILE
- if test 7193 -ne `wc -c <'freeze.1'`; then
- echo shar: \"'freeze.1'\" unpacked with wrong size!
- fi
- # end of 'freeze.1'
- fi
- if test -f 'freeze.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'freeze.h'\"
- else
- echo shar: Extracting \"'freeze.h'\" \(4101 characters\)
- sed "s/^X//" >'freeze.h' <<'END_OF_FILE'
- X#include <stdio.h>
- X#include "config.h"
- X
- X#ifdef HAVE_SYS_STDTYPES_H
- X# include <sys/stdtypes.h>
- X#endif
- X
- X#ifndef getc
- X# ifdef m88k /* Green Hill C library bug. */
- X# define getc(p) (--(p)->_cnt < 0 ? __filbuf(p) : (int) *(p)->_ptr++)
- X# else
- X# define getc(p) (--(p)->_cnt < 0 ? _filbuf(p) : (int) *(p)->_ptr++)
- X# endif
- X#endif
- X#ifndef putc
- X# ifdef m88k /* Green Hill C library bug. */
- X# define putc(x, p) (--(p)->_cnt < 0 ? __flsbuf((unsigned char) (x), (p)) : (int) (*(p)->_ptr++ = (unsigned char) (x)))
- X# else
- X# define putc(x, p) (--(p)->_cnt < 0 ? _flsbuf((unsigned char) (x), (p)) : (int) (*(p)->_ptr++ = (unsigned char) (x)))
- X# endif
- X#endif
- X
- X#if !defined(MSDOS) && defined(__MSDOS__)
- X# define MSDOS
- X#endif
- X
- X#ifdef MSDOS
- X# define DOS
- X# include <fcntl.h>
- X#endif /* MSDOS */
- X
- X#ifdef TOS
- X# define DOS
- X# define O_TEXT 0x01
- X# define O_BINARY 0x02
- X#endif
- X
- X#include <ctype.h>
- X#include <signal.h>
- X
- X#ifndef RETSIGTYPE
- X# define RETSIGTYPE void
- X#endif /* RETSIGTYPE */
- X
- X#ifndef TOS
- X# include <sys/types.h>
- X# include <sys/stat.h>
- X#else
- X# include <tos.h>
- X# include <types.h>
- X#endif
- X
- X#ifdef SYSTIME
- X# include <sys/time.h>
- X# define UTIMES
- X#else
- X# ifdef UTIME
- X# include <utime.h>
- X# else
- X# ifdef SYSUTIME
- X# include <sys/utime.h>
- X# else
- X# ifdef unix
- X/* UNIX without any declaration of utimbuf .... Strange! */
- Xstruct utimbuf {
- X time_t actime;
- X time_t modtime;
- X};
- Xextern int utime();
- X# else
- X# ifndef DOS
- X# define BITS no_utimbuf_definition_on_unknown_system
- X# endif
- X# endif
- X# endif
- X# endif
- X#endif
- X
- X/* for MAXNAMLEN only !!! */
- X#ifdef DIRENT
- X# include <dirent.h>
- X#else
- X# ifdef SYSNDIR
- X# include <sys/ndir.h>
- X# else
- X# ifdef SYSDIR
- X# include <sys/dir.h>
- X# endif
- X# endif
- X#endif
- X
- X#ifndef MAXNAMLEN
- X# define MAXNAMLEN 255
- X#endif
- X
- X#if MAXNAMLEN < 255
- X# undef MAXNAMLEN
- X# define MAXNAMLEN 255
- X#endif
- X
- X#ifdef DEBUG
- X# include <assert.h>
- X#endif /* DEBUG */
- X
- X#ifdef DOS
- X# include <stdlib.h>
- X#endif /* DOS */
- X
- X#ifdef __TURBOC__
- X# ifdef MSDOS
- X# include <io.h>
- X# include <alloc.h>
- X# else /* TOS */
- X# include <ext.h>
- X# endif /* MSDOS */
- X#endif /* __TURBOC__ */
- X
- Xtypedef unsigned short us_t;
- Xtypedef unsigned char uc_t;
- Xtypedef unsigned long ul_t;
- X
- X#define LOOKAHEAD 256 /* pre-sence buffer size */
- X#define MAXDIST 7936
- X#define WINSIZE (MAXDIST + LOOKAHEAD) /* must be a power of 2 */
- X#define WINMASK (WINSIZE - 1)
- X
- X#define THRESHOLD 2
- X
- X#define N_CHAR2 (256 - THRESHOLD + LOOKAHEAD + 1) /* code: 0 .. N_CHARi - 1 */
- X#define T2 (N_CHAR2 * 2 - 1) /* size of table */
- X
- X#define ENDOF 256 /* pseudo-literal */
- X
- Xextern uc_t Table2[];
- X
- Xextern long in_count, bytes_out;
- Xextern off_t file_length;
- X
- Xextern uc_t text_buf[];
- X
- Xextern long indc_threshold, indc_count;
- X
- Xextern short do_melt, topipe, greedy, quiet, force; /* useful flags */
- X
- X#define MAGIC1 ((uc_t)'\037')
- X#define MAGIC2_1 ((uc_t)'\236') /* freeze vers. 1.X */
- X#define MAGIC2_2 ((uc_t)'\237')
- X
- Xextern int exit_stat;
- X
- X#ifdef DEBUG
- Xextern short debug;
- Xextern short verbose;
- Xextern char * pr_char();
- X#endif /* DEBUG */
- X
- X#if defined(GATHER_STAT) || defined(DEBUG)
- Xextern long refers_out, symbols_out;
- X#endif
- X
- Xextern short DecodeChar(), DecodePosition(), GetNBits();
- Xextern void melt2(), (*meltfunc)(), writeerr(), prratio(), prbits(), freeze();
- X
- X#ifdef COMPAT
- X#include "compat.h"
- X#endif
- X
- X#define INDICATOR \
- Xif (quiet < 0 && (in_count > indc_count)) {\
- X if (ferror(stdout))\
- X writeerr();\
- X if (file_length) {\
- X static int percents, old_percents = -1;\
- X if ((percents = ftell(stdin) * 100 / file_length) !=\
- X old_percents) {\
- X fprintf(stderr, " %2d%%\b\b\b\b", percents);\
- X old_percents = percents;\
- X }\
- X indc_count += indc_threshold;\
- X } else {\
- X fprintf(stderr, " %5ldK\b\b\b\b\b\b\b", in_count / 1024);\
- X indc_count += indc_threshold;\
- X indc_threshold += 1024;\
- X }\
- X fflush (stderr);\
- X}
- X
- X#ifdef HAVE_RINDEX
- X#define strchr index
- X#define strrchr rindex
- X#endif
- X
- Xextern char *strchr(), *strrchr();
- X
- END_OF_FILE
- if test 4101 -ne `wc -c <'freeze.h'`; then
- echo shar: \"'freeze.h'\" unpacked with wrong size!
- fi
- # end of 'freeze.h'
- fi
- if test -f 'huf.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'huf.c'\"
- else
- echo shar: Extracting \"'huf.c'\" \(7717 characters\)
- sed "s/^X//" >'huf.c' <<'END_OF_FILE'
- X#include "freeze.h"
- X#include "huf.h"
- X#include "bitio.h"
- X
- X/*----------------------------------------------------------------------*/
- X/* */
- X/* HUFFMAN ENCODING */
- X/* */
- X/*----------------------------------------------------------------------*/
- X
- X/* TABLES OF ENCODE/DECODE for upper 6 bits position information */
- X
- X/* The contents of `Table' are used for freezing only, so we use
- X * it freely when melting.
- X */
- X
- X#ifndef HUFVALUES
- X#define HUFVALUES 0,0,1,2,6,19,34,0
- X#endif
- X
- Xuc_t Table2[9] = { 0, HUFVALUES };
- X
- Xuc_t p_len[64]; /* These arrays are built accordingly to values */
- Xuc_t d_len[256]; /* of `Table' above which are default, from the */
- X /* command line or from the header of frozen file */
- X
- Xuc_t code[256];
- X
- Xus_t freq[T2 + 1]; /* frequency table */
- Xshort son[T2]; /* points to son node (son[i],son[i+1]) */
- Xshort prnt[T2 + N_CHAR2]; /* points to parent node */
- X
- Xstatic short t, r, chars;
- X
- X/* notes :
- X prnt[Tx .. Tx + N_CHARx - 1] used by
- X indicates leaf position that corresponding to code.
- X*/
- X
- X/* Initializes Huffman tree, bit I/O variables, etc.
- X Static array is initialized with `table', dynamic Huffman tree
- X has `n_char' leaves.
- X*/
- X
- Xvoid StartHuff (n_char)
- X int n_char;
- X{
- X register short i, j;
- X t = n_char * 2 - 1;
- X r = t - 1;
- X chars = n_char;
- X
- X/* A priori frequences are 1 */
- X
- X for (i = 0; i < n_char; i++) {
- X freq[i] = 1;
- X son[i] = i + t;
- X prnt[i + t] = i;
- X }
- X i = 0; j = n_char;
- X
- X/* Building the balanced tree */
- X
- X while (j <= r) {
- X freq[j] = freq[i] + freq[i + 1];
- X son[j] = i;
- X prnt[i] = prnt[i + 1] = j;
- X i += 2; j++;
- X }
- X freq[t] = 0xffff;
- X prnt[r] = 0;
- X in_count = 1;
- X bytes_out = 5;
- X#if defined(DEBUG) || defined (GATHER_STAT)
- X symbols_out = refers_out = 0;
- X#endif
- X}
- X
- X/* Reconstructs tree with `chars' leaves */
- X
- Xvoid reconst ()
- X{
- X register int i, j, k;
- X register int f;
- X
- X#ifdef DEBUG
- X if (quiet < 0)
- X fprintf(stderr,
- X "Reconstructing Huffman tree: symbols: %ld, references: %ld\n",
- X symbols_out, refers_out);
- X#endif
- X
- X/* correct leaf node into of first half,
- X and set these freqency to (freq+1)/2
- X*/
- X j = 0;
- X for (i = 0; i < t; i++) {
- X if (son[i] >= t) {
- X freq[j] = (freq[i] + 1) / 2;
- X son[j] = son[i];
- X j++;
- X }
- X }
- X/* Build tree. Link sons first */
- X
- X for (i = 0, j = chars; j < t; i += 2, j++) {
- X k = i + 1;
- X f = freq[j] = freq[i] + freq[k];
- X for (k = j - 1; f < freq[k]; k--);
- X k++;
- X { register us_t *p, *e;
- X for (p = &freq[j], e = &freq[k]; p > e; p--)
- X p[0] = p[-1];
- X freq[k] = f;
- X }
- X { register short *p, *e;
- X for (p = &son[j], e = &son[k]; p > e; p--)
- X p[0] = p[-1];
- X son[k] = i;
- X }
- X }
- X
- X/* Link parents */
- X for (i = 0; i < t; i++) {
- X if ((k = son[i]) >= t) {
- X prnt[k] = i;
- X } else {
- X prnt[k] = prnt[k + 1] = i;
- X }
- X }
- X}
- X
- X
- X/* Updates given code's frequency, and updates tree */
- X
- Xvoid update (c)
- X register int c;
- X{
- X register us_t *p;
- X register int i, j, k, l;
- X
- X if (freq[r] == MAX_FREQ) {
- X reconst();
- X }
- X c = prnt[c + t];
- X do {
- X k = ++freq[c];
- X
- X /* swap nodes when become wrong frequency order. */
- X if (k > freq[l = c + 1]) {
- X for (p = freq+l+1; k > *p++; ) ;
- X l = p - freq - 2;
- X freq[c] = p[-2];
- X p[-2] = k;
- X
- X i = son[c];
- X prnt[i] = l;
- X if (i < t) prnt[i + 1] = l;
- X
- X j = son[l];
- X son[l] = i;
- X
- X prnt[j] = c;
- X if (j < t) prnt[j + 1] = c;
- X son[c] = j;
- X
- X c = l;
- X }
- X } while ((c = prnt[c]) != 0); /* loop until reach to root */
- X}
- X
- X/* Encodes the literal or the length information */
- X
- Xvoid EncodeChar (c)
- X int c;
- X{
- X ul_t i;
- X register int j, k;
- X
- X i = 0;
- X j = 0;
- X k = prnt[c + t];
- X
- X/* trace links from leaf node to root */
- X
- X do {
- X i >>= 1;
- X
- X/* if node index is odd, trace larger of sons */
- X if (k & 1) i += 0x80000000;
- X
- X j++;
- X } while ((k = prnt[k]) != r) ;
- X
- X/* `j' never reaches the value of 32 ! */
- X
- X if (j > 16) {
- X Putcode(16, (us_t)(i >> 16));
- X Putcode(j - 16, (us_t)i);
- X } else {
- X Putcode(j, (us_t)(i >> 16));
- X }
- X update(c);
- X}
- X
- X/* Encodes the position information */
- X
- Xvoid EncodePosition (c)
- X register int c;
- X{
- X register us_t i;
- X
- X /* output upper 6 bit from table */
- X i = c >> 7;
- X Putcode((int)p_len[i], (us_t)(code[i]) << 8);
- X
- X /* output lower 7 bit */
- X Putcode(7, (us_t)(c & 0x7f) << 9);
- X}
- X
- X
- X/* Decodes the literal or length info and returns its value.
- X Returns ENDOF, if the file is corrupt.
- X*/
- X
- Xshort DecodeChar ()
- X{
- X register int c;
- X c = son[r];
- X
- X /* trace from root to leaf,
- X got bit is 0 to small(son[]), 1 to large (son[]+1) son node */
- X
- X while (c < t) {
- X c += GetBit();
- X c = son[c];
- X }
- X c -= t;
- X update(c);
- X if (crpt_flag) {
- X crpt_message();
- X return ENDOF;
- X }
- X crpt_flag = feof(stdin);
- X return c;
- X}
- X
- X/* Decodes the position info and returns it */
- X
- Xshort DecodePosition ()
- X{
- X register us_t i, j, c;
- X
- X /* decode upper 6 bits from the table */
- X
- X i = GetByte();
- X crpt_flag = feof(stdin);
- X
- X c = (us_t)code[i] << 7;
- X j = d_len[i] - 1;
- X
- X /* get lower 7 bits literally */
- X
- X return c | (((i << j) | GetNBits (j)) & 0x7f);
- X}
- X
- X
- X/* Initializes static Huffman arrays */
- X
- Xvoid init(table) uc_t * table; {
- X short i, j, k, num;
- X num = 0;
- X
- X/* There are `table[i]' `i'-bits Huffman codes */
- X
- X for(i = 1, j = 0; i <= 8; i++) {
- X num += table[i] << (8 - i);
- X for(k = table[i]; k; j++, k--)
- X p_len[j] = i;
- X }
- X if (num != 256) {
- X fprintf(stderr, "Invalid position table\n");
- X exit(1);
- X }
- X num = j;
- X if (do_melt == 0)
- X
- X/* Freezing: building the table for encoding */
- X
- X for(i = j = 0;;) {
- X code[j] = i << (8 - p_len[j]);
- X i++;
- X j++;
- X if (j == num) break;
- X i <<= p_len[j] - p_len[j-1];
- X }
- X else {
- X
- X/* Melting: building the table for decoding */
- X
- X for(k = j = 0; j < num; j ++)
- X for(i = 1 << (8 - p_len[j]); i--;)
- X code[k++] = j;
- X
- X for(k = j = 0; j < num; j ++)
- X for(i = 1 << (8 - p_len[j]); i--;)
- X d_len[k++] = p_len[j];
- X }
- X}
- X
- X/* Writes a 3-byte header into the frozen form of file; Table[7] and
- X Table[8] aren't necessary, see `read_header'.
- X*/
- X
- Xvoid write_header() {
- X us_t i;
- X
- X i = Table2[5] & 0x1F; i <<= 4;
- X i |= Table2[4] & 0xF; i <<= 3;
- X i |= Table2[3] & 7; i <<= 2;
- X i |= Table2[2] & 3; i <<= 1;
- X i |= Table2[1] & 1;
- X
- X putchar((int)(i & 0xFF));
- X putchar((int)((i >> 8)));
- X putchar((int)(Table2[6] & 0x3F));
- X if (ferror(stdout))
- X writeerr();
- X}
- X
- X/* Reconstructs `Table' from the header of the frozen file and checks
- X its correctness. Returns 0 if OK, EOF otherwise.
- X*/
- X
- Xint read_header() {
- X short i, j;
- X i = getchar() & 0xFF;
- X i |= (getchar() & 0xFF) << 8;
- X Table2[1] = i & 1; i >>= 1;
- X Table2[2] = i & 3; i >>= 2;
- X Table2[3] = i & 7; i >>= 3;
- X Table2[4] = i & 0xF; i >>= 4;
- X Table2[5] = i & 0x1F; i >>= 5;
- X
- X if (i & 1 || (i = getchar()) & 0xC0) {
- X fprintf(stderr, "Unknown header format.\n");
- X crpt_message();
- X return EOF;
- X }
- X
- X Table2[6] = i & 0x3F;
- X
- X i = Table2[1] + Table2[2] + Table2[3] + Table2[4] +
- X Table2[5] + Table2[6];
- X
- X i = 62 - i; /* free variable length codes for 7 & 8 bits */
- X
- X j = 128 * Table2[1] + 64 * Table2[2] + 32 * Table2[3] +
- X 16 * Table2[4] + 8 * Table2[5] + 4 * Table2[6];
- X
- X j = 256 - j; /* free byte images for these codes */
- X
- X/* Equation:
- X Table[7] + Table[8] = i
- X 2 * Table[7] + Table[8] = j
- X*/
- X j -= i;
- X if (j < 0 || i < j) {
- X crpt_message();
- X return EOF;
- X }
- X Table2[7] = j;
- X Table2[8] = i - j;
- X
- X#ifdef DEBUG
- X fprintf(stderr, "Codes: %d %d %d %d %d %d %d %d\n",
- X Table2[1], Table2[2], Table2[3], Table2[4],
- X Table2[5], Table2[6], Table2[7], Table2[8]);
- X#endif
- X return 0;
- X}
- X
- X#ifdef COMPAT
- X
- Xuc_t Table1[9] = { 0, 0, 0, 1, 3, 8, 12, 24, 16 };
- X
- X/* Old version of a routine above for handling files made by
- X the 1st version of Freeze.
- X*/
- X
- Xshort DecodePOld ()
- X{
- X register us_t i, j, c;
- X
- X i = GetByte();
- X crpt_flag = feof(stdin);
- X
- X c = (us_t)code[i] << 6;
- X j = d_len[i] - 2;
- X
- X return c | (((i << j) | GetNBits (j)) & 0x3f);
- X}
- X#endif
- END_OF_FILE
- if test 7717 -ne `wc -c <'huf.c'`; then
- echo shar: \"'huf.c'\" unpacked with wrong size!
- fi
- # end of 'huf.c'
- fi
- if test -f 'lz.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'lz.c'\"
- else
- echo shar: Extracting \"'lz.c'\" \(3906 characters\)
- sed "s/^X//" >'lz.c' <<'END_OF_FILE'
- X#include "freeze.h"
- X#include "lz.h"
- X
- X/*----------------------------------------------------------------------*/
- X/* */
- X/* LZSS ENCODING */
- X/* */
- X/*----------------------------------------------------------------------*/
- X
- Xuc_t text_buf[WINSIZE + LOOKAHEAD - 1];/* cyclic buffer with an overlay */
- Xint match_position; /* current position of
- X matched pattern */
- Xint chain_length; /* max_chain_length ==
- X CHAIN_THRESHOLD >> greedy */
- X
- X/* next[N+1..] is used as hash table,
- X the rest of next is a link down,
- X*/
- X
- Xhash_t hashtab[array_size]; /* a VERY large array :-) */
- Xhash_t next[WINSIZE];
- X
- X#ifdef GATHER_STAT
- Xlong node_matches, node_compares, node_prolongations;
- X#endif /* GATHER_STAT */
- X
- X/* Initialize the data structures and allocate memory, if needed.
- X Although there is no more trees in the LZ algorithm
- X implementation, routine name is kept intact :-)
- X*/
- X
- Xvoid InitTree ()
- X{
- X
- X#if BITS == 16
- X ul_t i;
- X#else
- X unsigned i;
- X#endif
- X
- X#ifdef GATHER_STAT
- X node_matches = node_compares = node_prolongations = 0;
- X#endif /* GATHER_STAT */
- X
- X for (i = 0; i < array_size; i++ )
- X hashtab[i] = 0;
- X
- X if (greedy >= 0)
- X chain_length = ((CHAIN_THRESHOLD - 1) >> greedy) + 1;
- X else
- X chain_length = ((CHAIN_THRESHOLD - 1) << -greedy) + 1;
- X}
- X
- X/* Get the longest (longer than `match_length' when entering in function)
- X nearest match of the string beginning in text_buf[r]
- X to the cyclic buffer. Result (length & position) is returned
- X as the result and in global variable
- X `match_position'). Unchanged `match_length' denotes failure and
- X `match_position' contains garbage !!
- X In order to achieve faster operation, `match_length' is shifted
- X down to LOOKAHEAD. Ideas of Andrew Cadach <kadach@isi.itfs.nsk.su>
- X have been used (lastbyte).
- X*/
- X
- Xint get_next_match (match_length, r)
- X register hash_t r; int match_length;
- X{
- X register int p = r & WINMASK;
- X register int m;
- X#ifdef ALLOW_MISALIGN
- X register us_t lastbyte;
- X#else
- X register uc_t lastbyte;
- X#endif
- X register uc_t *key FIX_SI, *pattern FIX_DI;
- X int chain_count = chain_length;
- X
- X#ifdef GATHER_STAT
- X node_matches++;
- X#endif
- X key = text_buf + (r & WINMASK) + LOOKAHEAD;
- X r -= MAXDIST; /* `r' is now a "barrier value" */
- X
- X for(;;) {
- X lastbyte = FETCH(key, match_length);
- X do {
- X if(chain_count <= 0)
- X /* chain length exceeded, simple return */
- X return match_length;
- X
- X pattern = text_buf + match_length + LOOKAHEAD;
- X
- X do {
- X if ((p = next[p]) < r)
- X return match_length;
- X } while (FETCH(pattern, p &= WINMASK) != lastbyte);
- X
- X chain_count--; /* successful lastbyte match, cost = 1 */
- X pattern = text_buf + p + LOOKAHEAD;
- X
- X#ifdef GATHER_STAT
- X node_compares++;
- X#endif
- X
- X#ifdef ALLOW_MISALIGN
- X for (m = -LOOKAHEAD;
- X *(unsigned*)&key[m] == *(unsigned*)&pattern[m] &&
- X (m += sizeof(unsigned)) < 0;);
- X#ifndef INT_16_BITS
- X if (m < 0 && *(us_t*)&key[m] == *(us_t*)&pattern[m])
- X m += sizeof(us_t);
- X#endif
- X if (m < 0 && key[m] == pattern[m])
- X ++m;
- X#else
- X for (m = -LOOKAHEAD; key[m] == pattern[m] && ++m < 0;);
- X#endif
- X } while (m < match_length);
- X
- X match_position = p; /* remember new results */
- X if (m == 0)
- X return 0;
- X match_length = m;
- X
- X#ifdef GATHER_STAT
- X node_prolongations++;
- X#endif
- X chain_count -= 2; /* yet another match found, cost = 2 */
- X }
- X}
- X
- Xhash_t
- Xrehash(r)
- Xhash_t r;
- X{
- X#if BITS == 16
- X ul_t i;
- X#else
- X unsigned i;
- X#endif
- X r += WINSIZE;
- X for (i = 0; i < WINSIZE; i++)
- X /* zero must remain zero */
- X if (next[i] && (next[i] += WINSIZE) > r) {
- X next[i] = 0;
- X }
- X for (i = 0; i < array_size; i++)
- X /* zero must remain zero */
- X if (hashtab[i] && (hashtab[i] += WINSIZE) > r) {
- X hashtab[i] = 0;
- X }
- X return r;
- X}
- END_OF_FILE
- if test 3906 -ne `wc -c <'lz.c'`; then
- echo shar: \"'lz.c'\" unpacked with wrong size!
- fi
- # end of 'lz.c'
- fi
- if test -f 'lz.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'lz.h'\"
- else
- echo shar: Extracting \"'lz.h'\" \(2916 characters\)
- sed "s/^X//" >'lz.h' <<'END_OF_FILE'
- Xextern void InitTree();
- X
- X#ifndef SEGMENTED
- X# define MAXBITS 16
- X#else
- X# ifdef INT_16_BITS
- X# define MAXBITS 15
- X# else
- X# define MAXBITS 14
- X# endif
- X#endif
- X
- X#ifdef SEGMENTED
- X# ifdef TINY
- X# undef MAXBITS
- X# define MAXBITS 13
- X# endif
- X#endif
- X
- X#ifndef BITS
- X# define BITS MAXBITS
- X#endif
- X
- X#if BITS < 13
- X# undef BITS
- X# define BITS 13 /* 1:1 hash */
- X#endif
- X
- X#if BITS > 16
- X# undef BITS
- X# define BITS 16
- X#endif
- X
- X/* The following hash-function isn't optimal but it is very fast:
- X
- X HASH = ((first + (second << LEN0) +
- X (third << LEN1)) & ((1L << BITS) - 1);
- X
- X The difference of LENs is no more than one bit.
- X*/
- X
- X#define LEN0 ((BITS-8)/2)
- X#define LEN1 (BITS-8)
- X
- X/* `array_size' is the size of array `next', which contains
- X the heads of linked lists and the references to
- X next members of these lists.
- X*/
- X
- X#define array_size (1L << BITS)
- X
- X/* If native size of integer is 16 bits, don't use longs */
- X
- X#if defined(INT_16_BITS) || defined(SMALL) || defined(TINY)
- Xtypedef us_t hash_t;
- X#else
- Xtypedef ul_t hash_t;
- X#endif /* INT_16_BITS */
- X
- Xextern int match_position, chain_length;
- X
- Xextern hash_t hashtab[], next[];
- X
- X/* Some defines to eliminate function-call overhead */
- X
- X/* Hash function (no more than 16 bits, so we don't need longs */
- X
- X#define hash(p)\
- X ((unsigned)(p)[0] + ((unsigned)(p)[1] << LEN0) +\
- X ((unsigned)(p)[2] << LEN1))
- X
- X#ifdef FASTHASH
- X#define hashof(p)\
- X (((p)[0] != (p)[1] ? hash(p) : hash(p) + hash((p) + 3)) &\
- X ((1L << BITS) - 1))
- X#else
- X#define hashof(p)\
- X (hash(p) & ((1L << BITS) - 1))
- X#endif
- X
- X/* Inserting of a node `r' into hashed linked list: `r' becomes
- X the head of list.
- X*/
- X
- X#define InsertNode()\
- X{\
- X register uc_t *key = &text_buf[r & WINMASK];\
- X register unsigned p = hashof(key);\
- X if (r < MAXDIST) /* wraparound occured */\
- X r = rehash(r);\
- X next[r & WINMASK] = hashtab[p];\
- X hashtab[p] = r;\
- X}
- X
- X/* This routine inputs the char from stdin and does some other
- X actions depending of this char's presence.
- X*/
- X
- X#define Next_Char(N,F)\
- X{\
- X if ((c = getchar()) != EOF) {\
- X text_buf[s] = c;\
- X if (s < F - 1)\
- X text_buf[s + N] = c;\
- X s = (s + 1) & (N - 1);\
- X in_count++;\
- X } else\
- X len--;\
- X r++;\
- X InsertNode();\
- X}
- X
- X#if defined(__GNUC__)
- X#if defined(__i386__)
- X/* Optimizer cannot allocate these registers correctly :( (v1.39) */
- X#define FIX_SI asm("si")
- X#define FIX_DI asm("di")
- X#else
- X
- X/* GNU-style register allocations for other processors are welcome! */
- X
- X#define FIX_SI
- X#define FIX_DI
- X#endif
- X#else
- X
- X/* Dummy defines for non-GNU compilers */
- X
- X#define FIX_SI
- X#define FIX_DI
- X#endif
- X
- X/* some heuristic to avoid necessity of "-ggg..." */
- X#define CHAIN_THRESHOLD (LOOKAHEAD >> (BITS - 12))
- X
- Xextern int get_next_match();
- Xextern hash_t rehash();
- X
- X#ifdef GATHER_STAT
- Xextern long node_matches, node_compares, node_prolongations;
- X#endif
- X
- X#ifdef ALLOW_MISALIGN
- X#define FETCH(array,index) *(us_t*)(&array[index]-1)
- X#else
- X#define FETCH(array,index) array[index]
- X#endif
- END_OF_FILE
- if test 2916 -ne `wc -c <'lz.h'`; then
- echo shar: \"'lz.h'\" unpacked with wrong size!
- fi
- # end of 'lz.h'
- fi
- if test -f 'statist.1' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'statist.1'\"
- else
- echo shar: Extracting \"'statist.1'\" \(3300 characters\)
- sed "s/^X//" >'statist.1' <<'END_OF_FILE'
- X.PU
- X.TH STATIST 1 local
- X.SH NAME
- Xstatist \- calculate Huffman distribution for
- X.IR freeze "(1)"
- X.SH SYNOPSIS
- X.ll +8
- X.B statist
- X[
- X.B \-gx...
- X]
- X.ll -8
- X.br
- X.SH DESCRIPTION
- XThe default table is tuned for both C texts and executable files (as in
- XLHARC). If you will freeze any other files (natural language texts,
- Xdatabases, images, fonts, etc.) you can calculate the matching
- Xpositions distribution using the
- X.B "`statist'"
- Xprogram, which calculates and displays the mentioned
- Xdistribution for the given file. It is useful for large (100K or more)
- Xfiles.
- X
- XThough the built-in position table is polyvalent, the tuning can increase
- Xthe compression rate up to one additional percent. (Observed mainly on
- Xtext files.)
- X.SH USAGE
- X.br
- X.B statist [\-g...] < sample_file
- X.in +8
- Xor
- X.in -8
- X.B gensample | statist [\-g...]
- X.br
- Xwhere
- X.B "`gensample'"
- Xis a program generating some sample stream of
- Xbytes similar to files to be frozen.
- X.PP
- XThe
- X.B \-g
- Xand
- X.B \-x
- Xswitches have the same meaning as for
- X.IR freeze "(1)"
- Xand may be repeated.
- X.PP
- XYou can also see the intermediate values
- Xand watch their changes by pressing INTR key when you wish.
- X.PP
- XNote: If you use
- X.B "gensample | statist"
- X, remember that INTR influence BOTH
- Xprocesses !!
- X.br
- XThe results have the following format:
- X.br
- X.I "n1 n2 n3 n4 n5 n6 n7 n8"
- X(uncertainty =
- X.I x)
- X.br
- XAverage match length:
- X.I xx.yy
- X.br
- XPercentile 99.9:
- X.I p999
- X.br
- XPercentile 99.5:
- X.I p995
- X.br
- XPercentile 99.0:
- X.I p990
- X.br
- XPercentile 97.0:
- X.I p970
- X.br
- XPercentile 95.0:
- X.I p950
- X.br
- XPercentile 90.0:
- X.I p900
- X.br
- XPercentile 80.0:
- X.I p800
- X.br
- XPercentile 70.0:
- X.I p700
- X.br
- XPercentile 50.0:
- X.I p500
- X.br
- XSigma:
- X.I xx.yy
- X.br
- X.PP
- XHere
- X.I n1 \- n8
- Xare values of the calculated position table elements,
- Xuncertainty is a number which denotes validity of given results
- X(non-zero values of uncertainty indicate that the
- Xresults may be unusable). Other values (average match length,
- Xpercentiles and sigma) are FYI only.
- X.PP
- XYou may create the
- X.IR /etc/default/freeze
- Xfile (if you don't like
- X.IR /etc/default/
- Xdirectory, choose another - in MS-DOS it is FREEZE.CNF in
- Xthe directory of FREEZE.EXE), which has the following format:
- X.in +8
- X.I name
- X=
- X.I "n1 n2 n3 n4 n5 n6 n7 n8"
- X.in -8
- X.I (name
- Xmust start in column 1). For example:
- X.ll +8
- X.br
- X---------- cut here -----------
- X.br
- X# This is freeze's defaults file
- X.br
- Xrussian=0 0 1 2 6 20 31 2 # The sample was mailx.lp (Russian)
- X.br
- Xenglish=0 0 1 2 7 16 36 0 # The sample was gcc.lp (English)
- X.br
- X# End of file
- X.br
- X---------- cut here -----------
- X.ll -8
- X.PP
- XIf you find values, which are better THAN DEFAULT both for text (C
- Xprograms) and binary (executable) files, please send them to me.
- X
- XImportant note: statist.c is NOT a part of freeze package, it is an
- Xaditional feature.
- X
- X.SH "SEE ALSO"
- Xfreeze(1), melt(1), fcat(1)
- X.SH "DIAGNOSTICS"
- XHuffman tree has more than 8 levels, reducing...
- X.in +8
- XSelf-explanatory, but sometimes reducing falls into infinite loop.
- X.in -8
- X.IR xxx K
- X.in +8
- XProgress indicator is written after each 4K of a file processed.
- X.in -8
- X.SH "BUGS"
- XSometimes use of the results with uncertainty = 1 (on a file)
- Xgives compression rate worse than default but use of the results
- Xwith uncertainty = 13 (on other file) works quite good.
- X.PP
- XFound bugs descriptions, incompatibilities, etc. please send to
- Xleo@s514.ipmce.su.
- X
- END_OF_FILE
- if test 3300 -ne `wc -c <'statist.1'`; then
- echo shar: \"'statist.1'\" unpacked with wrong size!
- fi
- # end of 'statist.1'
- fi
- if test -f 'statist.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'statist.c'\"
- else
- echo shar: Extracting \"'statist.c'\" \(6376 characters\)
- sed "s/^X//" >'statist.c' <<'END_OF_FILE'
- X#include "freeze.h"
- X#include "lz.h"
- X
- X/* This program calculates the distribution of the matched strings'
- Xpositions and lengths using nearly the same code as `freeze'.
- X*/
- X
- X#define N_POS 62
- X#define T (N_POS * 2 - 1)
- X#define R (T - 1)
- X
- X#define update(c) (/* fprintf(stderr, "%d\n", c), */ freq[c]++)
- X
- Xlong in_count, refers = 0;
- X
- Xlong indc_count;
- Xshort reduceflag = 0, greedy = 0;
- X
- Xint lens[LOOKAHEAD+1];
- X
- Xus_t bits[9];
- X
- Xshort prnt[T];
- Xul_t freq[T];
- Xshort used[T];
- X
- Xvoid freeze(), StartHuff();
- X
- XRETSIGTYPE giveres();
- X
- Xint main(argc, argv) char ** argv; {
- X argv++;
- X while (argc > 1) {
- X if (**argv == '-') {
- X while (*++(*argv) == 'g' || **argv == 'x')
- X greedy += ((**argv == 'g') << 1) - 1;
- X if (**argv)
- X goto usage;
- X argc--; argv++;
- X } else
- X break;
- X }
- X usage:
- X if(argc != 1) {
- X fprintf(stderr, "Usage: statist [-gx...] < sample_file\n");
- X fprintf(stderr, "Press INTR to display current values\n");
- X exit(0);
- X }
- X signal(SIGINT, giveres);
- X
- X#ifdef DOS
- X setmode(fileno(stdin), O_BINARY); /* Oh this MS-DOS ... */
- X#endif /* DOS */
- X
- X freeze();
- X giveres();
- X return 0;
- X}
- X
- Xul_t isqrt(val)
- Xul_t val;
- X{
- X ul_t result = 0;
- X ul_t side = 0;
- X ul_t left = 0;
- X int digit = 0;
- X int i;
- X for (i=0; i<sizeof(ul_t)*4; i++)
- X {
- X left = (left << 2) + (val >> (sizeof(ul_t) * 8 - 2));
- X val <<= 2;
- X if (left >= side*2 + 1)
- X {
- X left -= side*2+1;
- X side = (side+1)*2;
- X result <<= 1;
- X result |= 1;
- X }
- X else
- X {
- X side *= 2;
- X result <<= 1;
- X }
- X }
- X return result;
- X}
- X
- X
- X/* Prints the (current) values of tunable parameters. Uncertainty is
- Xthe number of missequencings (algorithm assumes the probabilities
- Xof references decrease uniformly when distance increases). Ideally
- Xit should be 0, but somewhat about 5 or less denotes the given 8 values
- Xcould improve the compression rate when using them.
- X*/
- X
- XRETSIGTYPE giveres() {
- X us_t c;
- X register int i, j, k, pr, f, average, sum;
- X ul_t cumul, sigma2;
- X short r, percent;
- X signal(SIGINT, giveres);
- X newtry:
- X StartHuff(N_POS);
- X pr = f = 0;
- X i = N_POS;
- X r = N_POS * 2 - 2;
- X while (i <= r) {
- X j = findmin(i);
- X k = findmin(i);
- X freq[i] = freq[j] + freq[k];
- X prnt[j] = prnt[k] = i++;
- X }
- X
- X for (c = 1; c <= 6; c++) bits[c] = 0;
- X
- X printf("Non-monotonities are in: ");
- X
- X for(c = 0; c < N_POS; c++) {
- X j = 0;
- X k = c;
- X do j++; while ((k = prnt[k]) != r);
- X if (j <= 6)
- X bits[j]++;
- X if (j < pr) {
- X f += pr - j;
- X printf("%d, ", c);
- X
- X } else
- X pr = j;
- X }
- X if(f == 0)
- X printf("\b\b\b\babsent.\n");
- X else
- X printf("\b\b.\n");
- X
- X k = bits[1] + bits[2] + bits[3] + bits[4] +
- X bits[5] + bits[6];
- X
- X k = N_POS - k; /* free variable length codes for 7 & 8 bits */
- X
- X j = 128 * bits[1] + 64 * bits[2] + 32 * bits[3] +
- X 16 * bits[4] + 8 * bits[5] + 4 * bits[6];
- X
- X j = 256 - j; /* free byte images for these codes */
- X
- X/* Equation:
- X bits[7] + bits[8] = k
- X 2 * bits[7] + bits[8] = j
- X*/
- X j -= k;
- X if (j < 0 || k < j) {
- X printf("Huffman tree has more than 8 levels, reducing...\n");
- X for (i = 0; i < N_POS; i++)
- X if (!freq[i])
- X freq[i] = 1;
- X else if (reduceflag)
- X freq[i] = (freq[i] + 1) / 2;
- X reduceflag = 1;
- X goto newtry;
- X } else {
- X bits[7] = j;
- X bits[8] = k - j;
- X printf("%d,%d,%d,%d,%d,%d,%d,%d (uncertainty = %d)\n",
- X bits[1], bits[2], bits[3], bits[4],
- X bits[5], bits[6], bits[7], bits[8], f);
- X }
- X sum = 0; cumul = 0;
- X for(i = 3; i <= LOOKAHEAD; i++) {
- X cumul += (ul_t) i * lens[i];
- X sum += lens[i];
- X }
- X sum || sum++;
- X printf("Average match length: %d.%02d\n",
- X average = cumul / sum, i = cumul * 100 / sum % 100);
- X if (i >= 50) average++;
- X j = sum;
- X percent = 0;
- X for (i = LOOKAHEAD; i >= 3; i--) {
- X static pcs[] = { 999, 995, 990, 970, 950, 900, 800, 700, 500 };
- X j -= lens[i];
- X newpcs:
- X if (j <= sum * pcs[percent] / 1000) {
- X printf("Percentile %d.%d: %d\n",
- X pcs[percent] / 10, pcs[percent] % 10, i);
- X if (percent == sizeof(pcs)/sizeof(int) - 1)
- X break;
- X else {
- X percent++;
- X goto newpcs;
- X }
- X }
- X }
- X for (sigma2 = 0, i = 3; i <= LOOKAHEAD; i++)
- X sigma2 += (ul_t)(i - average)*(i - average)*lens[i];
- X sigma2 = sigma2 * 100 / sum;
- X j = (int)isqrt(sigma2);
- X printf("Sigma: %d.%1d\n", j / 10, j % 10);
- X printf("References: %ld\n", refers);
- X fflush(stdout);
- X}
- X
- X
- Xvoid freeze ()
- X{
- X register int i, len, s, c;
- X register hash_t r;
- X int match_length;
- X
- X StartHuff(0);
- X InitTree();
- X r = MAXDIST + 1;
- X s = (r + LOOKAHEAD) & WINMASK;
- X for (len = 0; len < LOOKAHEAD && (c = getchar()) != EOF; len++)
- X text_buf[r + len] = c;
- X
- X in_count = len;
- X for (i = r - LOOKAHEAD; i < MAXDIST; i++)
- X text_buf[i] = ' ';
- X for (i = r - LOOKAHEAD; i <= r; i++) {
- X register uc_t *key = &text_buf[i];
- X register unsigned p = hashof(key);
- X next[i] = hashtab[p];
- X hashtab[p] = i;
- X }
- X while (len != 0) {
- X match_length = LOOKAHEAD + get_next_match(THRESHOLD - LOOKAHEAD, r);
- X if (match_length > len)
- X match_length = len;
- X if (match_length <= THRESHOLD) {
- X match_length = 1;
- X } else if (match_length >= chain_length) {
- X lens[match_length] ++;
- X update((((r - match_position) & WINMASK) - 1) >> 7);
- X refers ++;
- X } else {
- X register us_t orig_length, orig_position;
- X orig_length = match_length;
- X orig_position = match_position;
- X Next_Char(WINSIZE, LOOKAHEAD);
- X match_length = LOOKAHEAD + get_next_match(match_length - LOOKAHEAD, r);
- X if (match_length > len)
- X match_length = len;
- X if (orig_length >= match_length) {
- X lens[orig_length] ++;
- X update((((r - 1 - orig_position) & WINMASK) - 1) >> 7);
- X match_length = orig_length - 1;
- X } else {
- X lens[match_length] ++;
- X update((((r - match_position) & WINMASK) - 1) >> 7);
- X }
- X refers ++;
- X }
- X for (i = 0; i < match_length &&
- X (c = getchar()) != EOF; i++) {
- X text_buf[s] = c;
- X if (s < LOOKAHEAD - 1)
- X text_buf[s + WINSIZE] = c;
- X s = (s + 1) & WINMASK;
- X r++;
- X InsertNode();
- X }
- X in_count += i;
- X if ((in_count > indc_count)) {
- X fprintf(stderr, "%5dK\b\b\b\b\b\b", in_count / 1024);
- X fflush (stderr);
- X indc_count += 4096;
- X }
- X while (i++ < match_length) {
- X len--;
- X r++;
- X InsertNode();
- X }
- X }
- X}
- X
- Xvoid StartHuff(beg) {
- X int i;
- X for (i = beg; i < N_POS * 2 - 1; i++)
- X freq[i] = 0;
- X for (i = 0; i < N_POS * 2 - 1; i++)
- X used[i] = prnt[i] = 0;
- X}
- X
- Xint findmin(range) {
- X long min = (1 << 30) - 1, argmin = -1, i;
- X for (i = 0; i < range; i++) {
- X if(!used[i] && freq[i] < min)
- X min = freq[argmin = i];
- X }
- X used[argmin] = 1;
- X return argmin;
- X}
- END_OF_FILE
- if test 6376 -ne `wc -c <'statist.c'`; then
- echo shar: \"'statist.c'\" unpacked with wrong size!
- fi
- # end of 'statist.c'
- fi
- echo shar: End of archive 2 \(of 3\).
- cp /dev/null ark2isdone
- MISSING=""
- for I in 1 2 3 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 3 archives.
- rm -f ark[1-9]isdone
- else
- echo You still must unpack the following archives:
- echo " " ${MISSING}
- fi
- exit 0
- exit 0 # Just in case...
-