home *** CD-ROM | disk | FTP | other *** search
- Program Vitter;
-
-
- {$R-}
-
- uses CRT,DOS;
-
-
- {
- This is a implementation of the dynamic Huffman coding algorithm presented
- by Jeffrey Scott Vitter in ACM Transactions on Mathematical Software,
- Vol, 15, June 1989, p 158, and described in Journal of the ACM, Vol. 34,
- October 1987, p 825.
-
- The algorithm is implemented in the context of a stand-alone file
- compression utility, which can be used to compress/decompress files
- one at a time. This program serves only to illustrate the use of
- the algorithm.
-
-
-
- This compression algorithm has several good features:
- 1) It's single pass, so its suitable for pipelining or I/O.
- 2) As the Huffman tree is built dynamically, there is no overhead
- involved in storing/sending the tree itself to the decoder, the
- decoder builds the tree as it goes just like the encoder.
- 3) Compression is theoretically optimal among one-pass huffman type
- encoders.
-
- Disadvantages:
- 1) This implementation is fairly slow. (This implementation
- was written with clarity, rather than speed in mind, and there are
- several optimizations possible.)
- 2) Memory requirements are fairly large as compared to some other huffman
- type compression routines. (Low as compared to many LZ-based
- techniques.)
- 3) Compression may not be as good as a regular huffman tree. It seems to
- vary between about 33% and 60% on text files, based on very limited
- and informal testing.
-
-
- Programmer : Douglas P. Webb
- Last Updated: 14 Sept. 1992
- }
-
-
- CONST
- CharBufSize = 2048; { I/O Buffer. }
- WordBufSize = 1024; { I/O Buffer. }
- N = 256; { Alphabet size. 256 chars in ASCII }
-
-
- TYPE
- Vitter_Header_Type = RECORD { 17 bytes in size. }
- Name : String[12];
- FSize : LongInt;
- END;
-
- CharBuffer = Array[0..PRED(CharBufSize)] OF Char;
- WordBuffer = Array[0..PRED(WordBufSize)] OF WORD;
-
-
- CONST
- Bytes_Left : BOOLEAN = TRUE;
-
- { Used by Transmit. }
- OBufPosn : Word = 0;
- WriteWord : Word = 0;
- WShifts : WORD = 15;
-
- { Used by Receive. }
- BufRead : Integer = 0;
- BufPosn : Integer = 0;
- Shifts : WORD = 0;
- ReadWord: WORD = 0;
-
-
- VAR
- Header : Vitter_Header_Type;
-
- Alpha : Array[0..N] OF WORD;
- Rep : Array[0..N] OF Integer;
- Block : Array[1..2*N-1] OF Integer;
- Weight : Array[1..2*N-1] OF LongInt;
- Parent : Array[1..2*N-1] OF Integer;
- Parity : Array[1..2*N-1] OF Integer;
- RtChild : Array[1..2*N-1] OF Integer;
- First : Array[1..2*N-1] OF Integer;
- Last : Array[1..2*N-1] OF Integer;
- PrevBlock : Array[1..2*N-1] OF Integer;
- NextBlock : Array[1..2*N-1] OF Integer;
- Stack : Array[1..2*N-1] OF Integer;
- AvailBlock : Integer;
-
- M,E,R,Z : Integer;
-
-
-
- CInBuf,COutBuf : ^CharBuffer;
- WInBuf,WOutBuf : ^WordBuffer;
-
- VitFile,InFile,OutFile : File;
- FileName : String[12];
- Dir: DirStr;
- Name: NameStr;
- Ext: ExtStr;
- FoundFile : SearchRec;
- Ch : Char;
-
-
-
-
- Procedure Initialize;
- {
- This procedure form an initial Huffman tree consisting of a single leaf
- 0-node. The global variable Z is always equal to 2n-1.
- }
- VAR
- I : Integer;
- BEGIN
- Bytes_Left := TRUE;
-
- OBufPosn := 0; { Variables used by 'Transmit' }
- WriteWord := 0;
- WShifts := 15;
-
- BufRead := 0; { Variables used by 'Receive' }
- BufPosn := 0;
- Shifts := 0;
- ReadWord:= 0;
-
- M := 0;
- E := 0;
- R := -1;
- Z := 2*N -1;
-
- Alpha[0] := 0;
- Rep[0] := 0;
- FOR I := 1 TO N DO
- BEGIN
- INC(M);
- INC(R);
- IF R*2 = M THEN
- BEGIN
- INC(E);
- R := 0;
- END;
- Alpha[I] := I;
- Rep[I] := I;
- END;
-
- { Initialize node N as the 0-node }
- Block[N] := 1;
- PrevBlock[1] := 1;
- NextBlock[1] := 1;
- Weight[1] := 0;
- First[1] := N;
- Last[1] := N;
- Parity[1] := 0;
- Parent[1] := 0;
-
- { Initialize available block list }
-
- AvailBlock := 2;
- FOR I := AvailBlock to Z-1 DO
- NextBlock[I] := I+1;
- NextBlock[Z] := 0;
-
- END;
-
-
-
-
-
-
- Function FindChild(J,Parity: Integer):Integer;
-
- {
- This function returns the node number of either the left or right child
- of node j, depending on whether the parity parameter is set to 0 or 1.
- }
-
- VAR
- Delta, Right, Gap : Integer;
-
- BEGIN
- Delta := 2*(First[Block[J]] - J) + 1 - parity;
- Right := rtChild[Block[J]];
- Gap := Right - Last[Block[Right]];
- IF Delta <= Gap THEN
- FindChild := Right - Delta
- ELSE
- BEGIN
- DEC(Delta,SUCC(Gap));
- Right := First[PrevBlock[Block[Right]]];
- Gap := Right - Last[Block[Right]];
- IF Delta <= Gap THEN
- FindChild := Right - Delta
- ELSE FindChild := First[PrevBlock[Block[Right]]] - Delta + Gap + 1;
- END;
- END;
-
-
-
-
- Procedure InterchangeLeaves(E1,E2 : Integer);
- VAR
- Temp : Integer;
- BEGIN
- Rep[Alpha[E1]] := E2;
- Rep[Alpha[E2]] := E1;
- Temp := Alpha[E1];
- Alpha[E1] := Alpha[E2];
- Alpha[E2] := Temp;
- END;
-
-
-
-
-
- Procedure Update(K : Char);
-
- {
- This procedure is the main component of the algorithm. It is called by
- both 'EncodeAndTransmit' and 'ReceiveAndDecode' in order to modify the
- dynamic Huffman tree to account for the letter just processed.
- }
-
- VAR
- Q,LeafToIncrement,Bq,B,OldParent,OldParity,Nbq,Par,Bpar : Integer;
- Slide : Boolean;
-
-
-
- Procedure FindNode;
- {
- This procedure sets Q to point to the leaf to process. If that leaf is the
- 0-node, which corresponds to the transmission of a letter that has not
- been transmitted earlier in the message, the 0-node is split to form an
- extra leaf if there is still an untransmitted letter left in the alphabet.
- Otherwise, Q is interchanged with the leader of its block.
- }
- BEGIN
- Q := Rep[Byte(K)];
- LeafToIncrement := 0;
- IF q <=M THEN { A zero weight becomes positive. }
- BEGIN
- InterchangeLeaves(Q,M);
- IF R = 0 THEN
- BEGIN
- R := M DIV 2;
- IF R > 0 THEN
- E := E - 1;
- END;
- M := M-1;
- R := R-1;
- Q := SUCC(M);
- Bq := Block[Q];
- IF M > 0 THEN
- BEGIN
- { Split the 0-node into an internal node with two children.
- The new 0-node is node M; the old 0-node is node M+1; the
- new parent nodes M and M+1 is node M+N. }
- Block[M] := Bq;
- Last[Bq] := M;
- OldParent := Parent[Bq];
- Parent[Bq] := M+N;
- Parity[Bq] := 1;
-
- { Create a new internal block of zero weight for node M + N }
- B := AvailBlock;
- AvailBlock := NextBlock[AvailBlock];
- PrevBlock[B] := Bq;
- NextBlock[B] := NextBlock[Bq];
- PrevBlock[NextBlock[Bq]] := B;
- NextBlock[Bq] := B;
- Parent[B] := OldParent;
- Parity[B] := 0;
- RtChild[B] := Q;
- Block[M+N] := B;
- Weight[B] := 0;
- First[B] := M + N;
- Last[B] := M + N;
- LeafToIncrement := Q;
- Q := M + N;
- END;
- END
- ELSE { Interchange Q with the first node in Q's block }
- BEGIN
- InterchangeLeaves(Q,First[Block[Q]]);
- Q := First[Block[Q]];
- IF (Q= SUCC(M)) AND (M>0) THEN
- BEGIN
- LeafToIncrement := Q;
- Q := Parent[Block[Q]];
- END;
- END;
- END;
-
-
-
- Procedure SlideAndIncrement;
- {
- This procedure incrments the weight of node Q by 1 and adjusts the tree
- pointers to reflect the new implicit numbering. Finally, Q is set to
- point to the node one level higher in the tree that needs incrementing
- next.
- }
- BEGIN { Q is currently the first node in its block. }
- Bq := Block[Q];
- Nbq := nextBlock[Bq];
- Par := Parent[Bq];
- OldParent := Par;
- OldParity := Parity[Bq];
- IF ((Q<=N) AND (First[Nbq] > N) AND (Weight[Nbq] = Weight[Bq])) OR
- ((Q>N) AND (First[Nbq] <= N) AND (Weight[Nbq] = SUCC(Weight[Bq]))) THEN
- BEGIN { Slide Q over the next Block }
- Slide := TRUE;
- OldParent := Parent[Nbq];
- OldParity := Parity[Nbq];
-
- { Adjust child pointers for next higher level in tree. }
- IF Par > 0 THEN
- BEGIN
- Bpar := Block[Par];
- IF RtChild[BPar] = Q THEN
- RtChild[BPar] := Last[Nbq]
- ELSE IF RtChild[BPar] = First[Nbq] THEN
- RtChild[Bpar] := Q
- ELSE RtChild[Bpar] := SUCC(RtChild[Bpar]);
- IF Par <> Z THEN
- IF Block[SUCC(Par)] <> Bpar THEN
- IF RtChild[Block[SUCC(Par)]] = First[Nbq] THEN
- RtChild[Block[SUCC(Par)]] := Q
- ELSE IF Block[RtChild[Block[SUCC(Par)]]] = Nbq THEN
- RtChild[Block[SUCC(Par)]] := SUCC(RtChild[Block[SUCC(Par)]]);
- END;
-
- { Adjust parent pointers for block Nbq }
- Parent[Nbq] := Parent[Nbq] -1 + Parity[Nbq];
- Parity[Nbq] := 1 - Parity[Nbq];
- Nbq := NextBlock[Nbq];
- END
- ELSE Slide := FALSE;
-
- IF (((Q <= N) AND (First[Nbq] <= N)) OR ((Q>N) AND (First[Nbq] > N))) AND
- (Weight[Nbq] = SUCC(Weight[Bq])) THEN
- BEGIN { Merge Q into the block of weight one higher }
- Block[Q] := Nbq;
- Last[Nbq] := Q;
- IF Last[Bq] = Q THEN { Q's old block disappears }
- BEGIN
- NextBlock[PrevBlock[Bq]] := NextBlock[Bq];
- PrevBlock[NextBlock[Bq]] := PrevBlock[Bq];
- NextBlock[Bq] := AvailBlock;
- AvailBlock := Bq;
- END
- ELSE
- BEGIN
- IF Q > N THEN
- RtChild[Bq] := FindChild(PRED(Q),1);
- IF Parity[Bq] = 0 THEN
- DEC(Parent[Bq]);
- Parity[Bq] := 1 - Parity[Bq];
- First[Bq] := PRED(Q);
- END;
- END
- ELSE IF Last[Bq] = Q THEN
- BEGIN
- IF Slide THEN { Q's block is slid forward in the block list }
- BEGIN
- PrevBlock[NextBlock[Bq]] := PrevBlock[Bq];
- NextBlock[PrevBlock[Bq]] := NextBlock[Bq];
- PrevBlock[Bq] := PrevBlock[Nbq];
- NextBlock[Bq] := Nbq;
- PrevBlock[Nbq] := Bq;
- NextBlock[PrevBlock[Bq]] := Bq;
- Parent[Bq] := OldParent;
- Parity[Bq] := OldParity;
- END;
- INC(Weight[Bq]);
- END
- ELSE { A new Block is created for Q. }
- BEGIN
- B := AvailBlock;
- AvailBlock := nextBlock[AvailBlock];
- Block[Q] := B;
- First[B] := Q;
- last[B] := Q;
- IF Q > N THEN
- BEGIN
- RtChild[B] := RtChild[Bq];
- RtChild[Bq] := FindChild(Pred(Q),1);
- IF RtChild[B] = PRED(Q) THEN
- Parent[Bq] := Q
- ELSE IF Parity[Bq] = 0 THEN
- DEC(Parent[Bq]);
- END
- ELSE IF Parity[Bq] = 0 THEN
- DEC(Parent[Bq]);
- First[Bq] := PRED(Q);
- Parity[Bq] := 1 - Parity[Bq];
-
- { Insert Q's Block in its proper place in the block list. }
- PrevBlock[B] := PrevBlock[Nbq];
- NextBlock[B] := Nbq;
- PrevBlock[Nbq] := B;
- NextBlock[PrevBlock[B]] := B;
- Weight[B] := SUCC(Weight[Bq]);
- Parent[B] := OldParent;
- Parity[B] := OldParity;
- END;
- { Move Q one level higher in the tree. }
- IF Q <= N THEN
- Q := OldParent
- ELSE Q := Par;
- END;
-
-
-
-
- BEGIN
- { Set Q to the node whose weight should increase }
- FindNode;
- WHILE Q > 0 DO
- { At this point , Q is the first node in its block. Increment Q's
- weight by 1 and slide Q if necesary over the next block to maintain
- the invariant. Then set Q to the node one level higher that needs
- incrementing text. }
- SlideAndIncrement;
-
- { Finish up some special cases involving the 0-node }
- IF LeaftoIncrement <> 0 THEN
- BEGIN
- Q := LeafToIncrement;
- SlideAndIncrement;
- END;
- END;
-
-
-
-
- Procedure Transmit(I : Integer);
-
- CONST
- One = 32768;
-
- BEGIN
- IF I = 1 THEN
- INC(WriteWord,One);
- WriteWord := WriteWord SHR 1;
- DEC(WShifts);
- IF WSHifts = 0 THEN
- BEGIN
- WOutBuf^[OBufPosn] := WriteWord;
- IF OBufPosn = PRED(WordBufSize) THEN
- BEGIN
- BlockWrite(OutFile,WOutBuf^,2*WordBufSize,OBufPosn);
- Write('-');
- OBufPosn := 0;
- END
- ELSE INC(OBufPosn);
- WShifts := 15;
- END;
- END;
-
-
-
-
- Procedure EncodeAndTransmit(J: Char);
- {
- This procedure determines the encoding of letter Aj on the basis of the
- path from the root of the Huffman tree to Aj's leaf, using the convention
- that 0 means "go to the left child" and 1 means "go to the right child".
- If Aj has not appeared previously in the message, extra bits are sent to
- specify which one of the zero-weight letters has been encountered. These
- extra bits are computed by the following minimum prefix code:
- IF 1 <= J <= 2*R THE Aj is specified by the (E+1)-bit binary
- representation of j-1; otherwise, Aj is specified by the E-bit binary
- representation of J-R-1.
-
- The system precedure 'Transmit' is called for each bit in the encoding to
- send it to the reciever.
- }
- VAR
- I,II,Q,T,Root : Integer;
-
- BEGIN
- Q := Rep[ORD(J)];
- I := 0;
- IF Q <= M THEN { Encode letter of zero weight }
- BEGIN
- DEC(Q);
- IF Q < 2*R THEN
- T := SUCC(E)
- ELSE
- BEGIN
- DEC(Q,R);
- T := E;
- END;
- FOR II := 1 to T DO
- BEGIN
- INC(I);
- Stack[I] := Q MOD 2;
- Q := Q DIV 2;
- END;
- Q := M;
- END;
- IF M = N THEN
- Root := N
- ELSE Root := Z;
-
- While Q <> Root DO { Traverse up the tree. }
- BEGIN
- INC(I);
- Stack[I] := (First[Block[Q]]-Q+Parity[BLock[Q]]) MOD 2;
- Q := Parent[Block[Q]]-(First[Block[Q]]-Q+1-Parity[Block[Q]]) DIV 2;
- END;
- FOR II := I DOWNTO 1 DO
- Transmit(Stack[II]);
- END;
-
-
-
-
- Function Receive: WORD;
-
-
- BEGIN
- IF (BufPosn = BufRead) AND (Shifts = 0) THEN
- BEGIN
- BlockRead(InFile,WInBuf^,2*WordBufSize,BufRead);
- BufRead := BufRead DIV 2;
- Write('+');
- If BufRead = 0 THEN Bytes_Left := FALSE;
- BufPosn := 0;
- END;
- IF Shifts = 0 THEN
- BEGIN
- ReadWord := WInBuf^[BufPosn];
- INC(BufPosn);
- Shifts := 15;
- END;
- IF BOOLEAN(ReadWord AND 1) THEN
- Receive := 1
- ELSE Receive := 0;
-
- DEC(Shifts);
- ReadWord := ReadWord SHR 1;
- END;
-
-
-
- Function ReceiveAndDecode: Word;
-
- {
- This Function repeatedly calls a system function 'Recieve' to
- read one more bit of input until the inputed sequence of 0's and
- 1's has specified a path to a leaf node in the huffman tree.
-
- Extra bits are read when K < N-1 and a 0-node is reached in order
- to determine which zero-weigth letter is being transmitted.
- }
-
- VAR
- I,Q : Integer;
-
- BEGIN
- IF M = N THEN
- Q:= N
- ELSE Q := Z; { Set Q to the root node. }
- WHILE Q > N DO { Transverse down the tree. }
- Q := FindChild(Q,Receive);
- IF Q = M THEN { Decode 0-node }
- BEGIN
- Q := 0;
- FOR I := 1 to E DO
- Q := Q*2+Receive;
- IF Q < R THEN
- Q := Q*2 + Receive
- ELSE INC(Q,R);
- INC(Q);
- END;
- ReceiveAndDecode := Alpha[Q];
- END;
-
-
-
-
- Procedure Encode;
- CONST
- BufRead : Word = 0;
- BufPosn : Word = 0;
- VAR
- X : Word;
-
- BEGIN
- Initialize;
- BlockRead(InFile,CInBuf^,CharBufSize,BufRead);
- If BufRead = 0 THEN Bytes_Left := FALSE;
- BufPosn := 0;
- WHILE Bytes_Left DO { Continue until all characters encoded. }
- BEGIN
- { Let Aj be the next letter to encode }
- EncodeAndTransmit(CInBuf^[BufPosn]);
- Update(CInBuf^[BufPosn]);
- INC(BufPosn);
- IF BufPosn = BufRead THEN
- BEGIN
- BlockRead(InFile,CInBuf^,CharBufSize,BufRead);
- If BufRead = 0 THEN Bytes_Left := FALSE;
- BufPosn := 0;
- END;
- END;
-
- FOR X := WShifts DownTO 1 DO
- WriteWord := WriteWord SHR 1;
-
- WOutBuf^[OBufPosn] := WriteWord;
- INC(OBufPosn);
- BlockWrite(OutFile,WOutBuf^,2*OBufPosn,OBufPosn);
- END;
-
-
-
-
- Procedure Decode(FSize: LongInt);
- Var
- BufPosn : Word;
- X : LongInt;
-
- BEGIN
- Initialize;
- BufPosn := 0;
- FOR X := PRED(FSize) DOWNTO 0 DO
- BEGIN
- COutBuf^[BufPosn] := Char(ReceiveAndDecode);
- Update(CoutBuf^[BufPosn]);
- IF BufPosn = PRED(CharBufSize) THEN
- BEGIN
- BlockWrite(OutFile,COutBuf^,SUCC(BufPosn),BufPosn);
- BufPosn := 0;
- END
- ELSE INC(BufPosn);
- END;
- BlockWrite(OutFile,COutBuf^,BufPosn,BufPosn);
- END;
-
-
-
- Procedure DumpSyntax;
- BEGIN
- CLRSCR;
- GotoXY(5,3); Writeln('Vitterpack 1.01');
- GotoXY(5,5); Writeln('The correct syntax for this program is:');
- GotoXY(8,7); Writeln('Vitter <Filename>');
- GotoXY(5,9); Writeln('If the file specified is not a VitterPack file it will be compressed.');
- GotoXY(5,10); Writeln('If it is a VitterPack file it will be decompressed.');
- END;
-
-
-
-
- BEGIN
- IF Paramcount < 1 THEN
- BEGIN
- DumpSyntax;
- HALT;
- END;
- Filename := ParamStr(1);
- FSplit(Filename,Dir,Name,Ext);
- FOR Z := 1 TO 4 DO
- Ext[Z] := Upcase(Ext[Z]);
- IF (Ext <> '.VIT') AND (Ext <> '.') AND(Ext <> '') THEN { Compress. }
- BEGIN
- New(CInBuf);
- New(WOutBuf);
- Header.Name := Name + Ext;
- Assign(Infile,Filename);
- Assign(OutFile,Name + '.Vit');
- RESET(InFile,1); { used for compression }
- REwrite(OutFile,1);
- Header.FSize := FIleSize(InFile);
- BlockWrite(OutFile,Header,SizeOf(Header),Z); { Save space for the header. }
-
- Encode;
-
- Close(Infile);
- Close(outfile);
- Dispose(CInBuf);
- Dispose(WOutBuf);
- END
- ELSE { Decompress. }
- BEGIN
- New(WInBuf);
- New(COutBuf);
- Assign(Infile,Name + '.VIT');
- Reset(InFile,1);
- Blockread(InFile,Header,SizeOf(Header),Z);
- FindFirst(Header.Name,$27,Foundfile); { See if the file to be decompressed }
- If DOSError = 0 THEN { already exists. }
- BEGIN
- Writeln(Header.Name,' already exists, decompress anyway ? (Y/N)');
- Ch := Readkey;
- IF NOT (Ch IN ['y','Y']) THEN HALT;
- END;
- Assign(OutFile,Header.Name);
- ReWrite(OutFile,1); { used for decompression }
-
- Decode(Header.FSize);
-
- Close(Outfile);
- Close(Infile);
- Dispose(WInBuf);
- Dispose(COutBuf);
- END;
- END.