home *** CD-ROM | disk | FTP | other *** search
- 'Experimental LZW Compressor for QuickBASIC 4.5
- 'By Rich Geldreich 1992
- 'This program is in the public domain: use as you wish!
- '(QB4.5 users: Use search & replace to change all of the "SSEG" strings
- 'to "VARSEG" strings in this program.)
- 'Please see QBLZW.BAS for more information on LZW compression in QB.
-
- 'If you have and questions or problems, write/call:
- '
- 'Rich Geldreich
- '410 Market St.
- 'Gloucester City, NJ 08030
- '(609)-742-8752
- '
- 'Benchmarks: ORIGINAL HUFFMAN2.BAS C1.BAS ZIP
- 'BCL71ENR.LIB 263245 216495 191799 159324
- 'BIG_1_3.PCX 7401 3926 2735 2374
- 'MESSAGES.TXT 226989 151750 113077 84044
- 'TIME.MOD 155394 102447 87460 75101
- '
- '
- '
- ' Do not press ctrl+break while this program is compressing! The string
- ' pointers may change, which may result in an error!
-
- DEFINT A-Z
- DECLARE SUB PutByte (A)
- DECLARE SUB PutCode (A)
- DECLARE SUB Rebuild.Table (New.Entries)
- DECLARE FUNCTION GetByte ()
- DECLARE SUB Hash (Prefix, Suffix, Index, Found)
-
- CONST True = -1, False = 0
-
- DIM SHARED Prefix(6576), Suffix(6576), Code(6576)
- DIM SHARED Used(4096)
-
- DIM SHARED InBuffer$, IAddress, IEndAddress, Iseg
- DIM SHARED OutBuffer$, OStartAddress, OAddress, OEndAddress, Oseg
-
- DIM SHARED CodeSize, CurrentBit, Char&
- DIM SHARED Shift(12) AS LONG
-
-
- FOR A = 0 TO 12: READ Shift(A): NEXT
- DATA 1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192
-
-
- LOCATE , , 1
- IF POS(0) <> 1 THEN PRINT
-
-
- InBuffer$ = STRING$(4000, 0) 'input buffer
- OutBuffer$ = STRING$(4000, 0) 'output buffer
-
-
- A& = SADD(OutBuffer$)
- A& = A& - 65536 * (A& < 0)
- Oseg = SSEG(OutBuffer$) + (A& \ 16) 'Segment of buffer
- OAddress = (A& MOD 16) 'Current address in disk buffer
- OEndAddress = OAddress + 4000 'End address of buffer
- OStartAddress = OAddress 'Start of buffer
-
- 'Open input file
- File$ = COMMAND$
- IF File$ = "" THEN LINE INPUT "File to compress? "; File$: File$ = LTRIM$(RTRIM$(File$))
- IF File$ = "" THEN END
- OPEN File$ FOR BINARY AS #1
- FileLength& = LOF(1)
- 'Is it there?
- IF FileLength& = 0 THEN
- CLOSE #1
- KILL COMMAND$
- PRINT COMMAND$; " not found"
- END
- END IF
- 'Open output file
- OPEN "output.lzw" FOR BINARY AS #2
- 'Is it already there?
- IF LOF(2) <> 0 THEN
- 'Kill output file and reopen it
- CLOSE #2
- KILL "output.lzw"
- OPEN "output.lzw" FOR BINARY AS #2
- END IF
- 'CurrentLoc& - position in input file
- CurrentLoc& = 2
-
- 'Compression codes:
- 'Code 256 = end of file
- 'Code 257 = increase code size
- 'Code 258 = rebuild table
- 'Code 259 - 4095 = available for strings
- StartCode = 259 'First LZW code that is available
- NextCode = 259
- 'The maximum code that can be represented in 9 bits
- MaxCode = 512
- 'Start with 9 bit code size
- CodeSize = 9
- 'Current bit position in Char& - use for PutCode
- CurrentBit = 0
- 'Char& is a temporary buffer; accumulates codes from main program and
- 'puts them in the output file once complete bytes have been
- 'built
- Char& = 0
-
- GOSUB ClearTable
- 'Get first byte from file(it's a special case)
- Prefix = GetByte
-
- PRINT "LZW Compressor For QuickBASIC 4.5"
- PRINT "By Richard Geldreich June 2nd, 1992"
- PRINT "Compressing "; File$
- PRINT : PRINT : PRINT
- 'First line to start updating statistics
- Y = CSRLIN - 3
- 'Main compression loop
- DO
- DO
-
- IF CurrentLoc& > FileLength& THEN
- PutCode Prefix
- PutCode 256
- PutCode 0: PutCode 0
- OutBuffer$ = LEFT$(OutBuffer$, OAddress - OStartAddress)
- LOCATE Y, 1
- PRINT "Bytes In:"; CurrentLoc&; (100& * CurrentLoc&) \ FileLength&; "%"
- BytesOut& = LOF(2) + (OAddress - OStartAddress)
- PRINT "Bytes Out:"; BytesOut&
- PRINT "Total Compression:"; 100 - ((100& * BytesOut&) \ CurrentLoc&); "% ";
- PUT #2, , OutBuffer$
- CLOSE
- END
- ELSE
-
- Suffix = GetByte
- CurrentLoc& = CurrentLoc& + 1
- 'We now have a Prefix:Suffix to search for.
- 'If the search fails, put the Prefix in the output file
- 'and set the Prefix equal to the character which caused
- 'the failure.
-
- Hash Prefix, Suffix, Index, Found
- IF Found = True THEN
- Prefix = Code(Index)
- 'update how many times this string was used
- Used(Prefix) = Used(Prefix) + 1
- END IF
- END IF
- LOOP WHILE Found = True
-
- 'only increase the code size when required
- DO WHILE Prefix >= MaxCode AND CodeSize < 12
- PutCode 257
- MaxCode = MaxCode * 2
- CodeSize = CodeSize + 1
- LOOP
-
- PutCode Prefix
-
- 'Put the new string into the hash table.
- Prefix(Index) = Prefix
- Suffix(Index) = Suffix
- Code(Index) = NextCode 'remember this string's code
-
- 'Prefix is now equal to the character that caused the failure now.
- Prefix = Suffix
-
- NextCode = NextCode + 1
- 'if there are too many strings then rebuild the encoding table
- IF NextCode > 4096 THEN
-
- PutCode 258 'send rebuild table code to decompressor
-
- Rebuild.Table New.Entries
- NextCode = New.Entries + StartCode
-
- IF NextCode > 4096 THEN
- GOSUB ClearTable
- NextCode = StartCode 'reset NextCode to top of tree
- END IF
-
- CodeSize = 9
- MaxCode = 512
-
-
- END IF
-
- 'let the impatient user know we haven't hung up (yet!)
- PrintCounter = PrintCounter + 1 'see if time to update the
- IF PrintCounter = 512 THEN 'screen
- LOCATE Y, 1
- PRINT "Bytes In:"; CurrentLoc&; (100& * CurrentLoc&) \ FileLength&; "%"
- BytesOut& = LOF(2) + (OAddress - OStartAddress)
- PRINT "Bytes Out:"; BytesOut&
- PRINT "Compression:"; 100 - ((100& * BytesOut&) \ CurrentLoc&); "% "; "CodeSize:"; CodeSize; "NextCode:"; NextCode; " ";
- PrintCounter = 0
- END IF
- LOOP
- 'clears the hash table
- ClearTable:
- FOR A = 0 TO 6576
- Prefix(A) = -1
- Suffix(A) = -1
- Code(A) = -1
- NEXT
- RETURN
-
- 'Reads one byte from the input buffer, and fills the buffer if it's emty.
- FUNCTION GetByte STATIC
- IF IAddress = IEndAddress THEN
- GET #1, , InBuffer$
- A& = SADD(InBuffer$)
- A& = A& - 65536 * (A& < 0)
- Iseg = SSEG(InBuffer$) + (A& \ 16)
- IAddress = (A& MOD 16)
- IEndAddress = IAddress + 4000
- END IF
- DEF SEG = Iseg
- GetByte = PEEK(IAddress)
- IAddress = IAddress + 1
- END FUNCTION
-
- 'Attempts to finds a prefix:suffix string.
- SUB Hash (Prefix, Suffix, Index, Found)
-
- Index = (Prefix * 256& XOR Suffix) MOD 6577 'XOR hashing
- IF Index = 0 THEN 'is Index lucky enough to be 0?
- Offset = 1 'Set offset to 1, because 6577-0=6577
- ELSE
- Offset = 6577 - Index
- END IF
- DO 'until we find a match or don't
- IF Code(Index) = -1 THEN 'is there nothing here?
- Found = False 'yup, not found
- EXIT SUB
- 'is this entry what we're looking for?
- ELSEIF Prefix(Index) = Prefix AND Suffix(Index) = Suffix THEN
- Found = True 'yup, found
- EXIT SUB
- ELSE 'retry until we find what were looking for or we find a blank
- 'entry
- Index = Index - Offset
- IF Index < 0 THEN 'is index too far down?
- Index = Index + 6577 'yup, bring it up then
- END IF
- END IF
- LOOP
- END SUB
-
- 'Throws a byte into the output buffer and writes the buffer if it's full.
- SUB PutByte (A) STATIC
- IF OAddress = OEndAddress THEN
- PUT #2, , OutBuffer$
- OAddress = OStartAddress
- END IF
- DEF SEG = Oseg
- POKE OAddress, A
- OAddress = OAddress + 1
- END SUB
-
- 'Throws one multi-bit code to the output file.
- SUB PutCode (A) STATIC
- SHARED MaxCode
- IF A >= MaxCode THEN STOP
-
- Char& = Char& + A * Shift(CurrentBit)
- CurrentBit = CurrentBit + CodeSize
- DO WHILE CurrentBit > 7
- PutByte Char& AND 255
- Char& = Char& \ 256
- CurrentBit = CurrentBit - 8
- LOOP
- END SUB
-
- 'This is the "experimental" part of the program. This procedure eliminates
- 'any strings which are not used in the encoding table: the usual result of
- 'doing this is greater compression.
- 'It isn't documented well yet... I'm still working on it.
- SUB Rebuild.Table (New.Entries)
- DIM P(4096), S(4096), U(4096) AS LONG, Pn(4096), C(4096)
- DIM Location(4096)
-
- SHARED StartCode, MaxCode, Prefix
- Num.Entries = 0
-
- FOR A = 0 TO 6576
- C = Code(A)
- IF C <> -1 THEN 'valid code?
- IF Used(C) > 0 THEN 'was it used at all?
- Used(C) = 0
- P = Prefix(A): S = Suffix(A)
- P(Num.Entries) = P 'put it into a temporary table
- S(Num.Entries) = S
- U(Num.Entries) = P * 4096& + S
- C(C) = Num.Entries
- Num.Entries = Num.Entries + 1
- END IF
- END IF
- NEXT
-
-
- Num.Entries = Num.Entries - 1
- FOR A = 0 TO Num.Entries
- Pn(A) = A
- NEXT
- 'sort the table according to it's prefix:suffix
- Mid = Num.Entries \ 2
- DO
- FOR A = 0 TO Num.Entries - Mid
- IF U(Pn(A)) > U(Pn(A + Mid)) THEN
- SWAP Pn(A), Pn(A + Mid)
- Swap.Flag = True
- CompareLow = A - Mid
- CompareHigh = A
- DO WHILE CompareLow >= 0
- IF U(Pn(CompareLow)) > U(Pn(CompareHigh)) THEN
- SWAP Pn(CompareLow), Pn(CompareHigh)
- CompareHigh = CompareLow
- CompareLow = CompareLow - Mid
- ELSE
- EXIT DO
- END IF
- LOOP
-
- END IF
- NEXT
-
- Mid = Mid \ 2
- LOOP WHILE Mid > 0
-
-
- FOR A = 0 TO Num.Entries
- Location(Pn(A)) = A
- NEXT
- 'clear the old hash table
- FOR A = 0 TO 6576
- Prefix(A) = -1
- Suffix(A) = -1
- Code(A) = -1
- NEXT
-
- 'put each prefix:suffix into the hash table
- FOR A1 = 0 TO Num.Entries
- A = Pn(A1)
-
- P = P(A)
- S = S(A)
- IF P >= StartCode THEN 'is it pointing twards a string?
- P = StartCode + Location(C(P)) 'yup; update the pointer
- END IF
- IF S >= StartCode THEN
- S = StartCode + Location(C(S))
- END IF
- 'where does this prefix:suffix go?
- Hash P, S, Index, 0
- 'put it there
- Prefix(Index) = P
- Suffix(Index) = S
- Code(Index) = A1 + StartCode
-
- NEXT
- '# of entries in the hash table now
- New.Entries = Num.Entries + 1
- END SUB
-
-