home *** CD-ROM | disk | FTP | other *** search
- {$D+,L+} {debug info}
- {$M 8192,0,655360} {stack size, heapmin, heapmax}
-
- PROGRAM RunLengthEncode;
- {
- Author: Ken Murphy, CIS 74025,731
- Date Written: January 1993
-
- This program implements a Run-Length-Encoding algorithm to compress files.
- I wrote this to test the decompression speed and compression effect on files
- containing printer graphics. These files tend to be large with long series
- repetitive characters. RLE is a bad compression choice if the file to be
- compressed does not have runs of repetitive bytes. The algorithm gains
- it's compression from reducing a series of the same byte (up to 127 of them)
- to 2 bytes.
-
- Tests have proven that this RLE decompression is very fast, and it's OK to
- include this technique within other applications that employ files that can
- benefit. Tests on my dot matrix printer graphic data files show compressions
- of about 65% of original size. LZHuf compresses better, but this is faster
- and simpler to develop for my needs.
-
- The program has 3 command-line inputs in the order given:
- 1. Mode: C for compress or D for Decompress
- 2. Input file name
- 3. Output file name.
-
- The encoded data file is a series of strings. Each string begins with a
- "prefix" byte. The leftmost bit of the byte distinguishes between the two
- types of strings. The remaining 7 bits in the prefix byte are a length value.
- The prefix byte is followed by one or more data bytes. The two types of
- compressed strings are "RUN" and "LEN". The RUN variety allows for sequences
- of identical bytes. Generally, the maximum compression effect results from
- these. The LEN variety accomodates sequences of data bytes that are not the
- same.
-
- Here's what it looks like in a compressed file:
- If the leftmost bit of the prefix byte is 1, the string is a "RUN" string.
- The remaining 7 bits of the prefix byte are the replication count value
- for the following data byte. Therefore, a RUN string is always 2 bytes.
-
- If the leftmost bit of the prefix byte is 0, the string is a "LENGTH"
- string. The remaining 7 bits of the prefix byte are the number of
- data bytes following the prefix byte.
-
- Since the length value in the prefix byte is limited to 7 bits, a RUN or a
- LENGTH can be no larger that 127 ($01 - $FF). I don't permit a length value
- of 0. PASCAL will handle this type of byte as a SHORTINT. Remember that the
- actual size of a RUN segment is always 2 bytes. A LEN segment never gets
- larger than 128 bytes, i.e. 1 prefix byte and 127 (max) data bytes.
-
- There's performance instrumentation kept and displayed so I can measure the
- algorithm and how I've coded it.
-
- The program was developed in Borland Pascal 7.0 for a REAL target. It uses
- TurboPower's Object Professional, but there's no "Object Oriented"
- programming here. There's not much in the way of "fancy" so it should be
- easy to modify to vanilla Pascal.
-
- Also, consider this a blatant advertisement and recommendation for TurboPower's
- Object Professional library although I'm in no way affiliated with them.}
-
- USES DOS, OPCrt, OPString;
-
- CONST
- MaxChars = 127;
- EndOfFile = 100;
- BufSize = 255;
-
- TYPE
- RLE = RECORD
- RepeatCount : shortint;
- Data : array[1..MaxChars] OF char;
- END;
-
- RLE_Rec_String= array[0..MaxChars] OF char;
-
- SegmentTypes = (RUN, LEN);
-
- TimeRecord = RECORD
- Hour : Word;
- Minute : Word;
- Second : Word;
- Sec100 : Word;
- END;
-
- CONST {typed CONST}
- ReadResult : word = 0;
- WriteCount : word = 0;
- Segments : longint = 0;
- TotalBytesIn : longint = 0;
- TotalBytesOut : longint = 0;
- EndInput : boolean = false;
- DisplayPrefix : string[2] = ' ';
- InputFileSize : real = 0;
- LastIBuffer : boolean = false;
- LastOBuffer : boolean = false;
- OBufferIndex : integer = BufSize+1;
- OBuffer : string[BufSize] = '';
- IBuffer : string[BufSize] = '';
- IBufferIndex : integer = Bufsize+1;
-
- VAR
- SegmentType : SegmentTypes;
- RLE_Record : RLE;
- RLE_String : RLE_Rec_String absolute RLE_Record;
- CurrentChar : char;
- LastChar : char;
- InputFile : FILE;
- OutputFile : FILE;
- CompressMode : boolean;
- InputFileName : string;
- OutputFileName: string;
- OldPct : string[7];
- Started : TimeRecord;
- Stopped : TimeRecord;
-
- {*************************************************************}
- PROCEDURE ShowProgress;
- VAR
- Percent : string[7];
- {show the user what percentage of the input we've done - eye candy}
- BEGIN
- Str((TotalBytesIn * 100) / InputFileSize : 6 : 1, Percent);
- IF OldPct <> Percent THEN {this speeds things a bit}
- BEGIN
- OldPct:=Percent;
- FastText('Percentage Completed = ' + Percent, 6, 2);
- END;
- END; {ShowProgress}
- {**************************************************************}
- PROCEDURE ReadInput (CONST ByteCount : shortint;
- VAR ReadString : RLE_Rec_String);
- {feed BYTECOUNT bytes from the input file. A string buffer is used to hold
- the file's data to minimize the I/O time spent in the file. The output is a
- 0-index-based char array.}
- VAR
- BCount : shortint;
- BEGIN
- IF IBufferIndex+ByteCount-1<=ReadResult THEN {more data in buffer?}
- BEGIN
- IF ByteCount=1 THEN
- ReadString[0]:=IBuffer[IBufferIndex]
- ELSE
- Move (IBuffer[IBufferIndex],ReadString[0],ByteCount);
- IBufferIndex := IBufferIndex + ByteCount;
- END
- ELSE
- BEGIN {no, get more from file}
- IF LastIBuffer THEN
- EndInput:=true {last buffer was delivered}
- ELSE
- BEGIN {read another block}
- BCount:=ByteCount;
- {if there's anything in the buffer, output the remainder as the 1st
- chunk of the data requested}
- IF IBufferIndex<=BufSize THEN {was the entire buffer used}
- BEGIN {no, there's more in there to give}
- IF IBufferIndex=BufSize THEN
- ReadString[0]:=IBuffer[BufSize]
- ELSE
- Move(IBuffer[IBufferIndex],ReadString[0],Length(IBuffer)-IBufferIndex+1);
-
- BCount:=BCount-(BufSize-IBufferIndex+1);
- END; {buffer index <= buffer size}
-
- {$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+}
- IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN
- LastIBuffer:=true; {last buffer has been input}
-
- IF ReadResult>0 THEN
- BEGIN
- IBuffer[0]:=Chr(ReadResult); {set length of string}
-
- IF BCount=1 THEN {Asking for only 1 byte}
- ReadString[ByteCount-BCount]:=IBuffer[1] {yes, faster than 1 byte MOVE}
- ELSE
- Move(IBuffer[1],ReadString[ByteCount-BCount],BCount);
-
- TotalBytesIn:=TotalBytesIn + ReadResult;
- IBufferIndex := BCount+1;
- END; {result>0}
- END; {input another block}
- END; {else}
- END; {ReadRaw}
- {*************************************************************}
- PROCEDURE Compress;
- {Compress the input file by reading it one byte at a time and comparing
- that byte to the one previously read.}
-
- {================================================}
- PROCEDURE NewSegment (CONST StringType : SegmentTypes);
- {set the appropriate prefix type; write the string segment and adjust
- the statistics counters. Lastly insert the latest byte and reset the
- prefix count to 1}
- BEGIN
- CASE StringType OF
- LEN :
- BEGIN {prefix byte remains positive < 128}
- BlockWrite (OutputFile, RLE_Record, RLE_Record.RepeatCount + 1);
- TotalBytesOut:=TotalBytesOut + RLE_Record.RepeatCount + 1;
- END; {LEN}
- RUN :
- BEGIN {flip the sign of the prefix byte}
- RLE_Record.RepeatCount:=RLE_Record.RepeatCount * -1;
- BlockWrite (OutputFile, RLE_Record, 2);
- TotalBytesOut:=TotalBytesOut + 2;
- END; {RUN}
- END; {CASE}
-
- Segments:=Segments+1;
- RLE_Record.Data[1]:=CurrentChar; {set new byte to compare}
- RLE_Record.RepeatCount:=1; {only one of them so far}
- END; {NewSegment}
- {================================================}
- FUNCTION ReadRaw : char;
- {feed one byte from the input file. A string buffer is used to hold
- the file's data to minimize the I/O time spent in the file.}
- BEGIN
- IF IBufferIndex<=ReadResult THEN {more data in buffer?}
- BEGIN
- ReadRaw:=IBuffer[IBufferIndex]; {yes, feed a byte}
- IBufferIndex := IBufferIndex + 1;
- END
- ELSE
- BEGIN {no, get more from file}
- IF LastIBuffer THEN
- EndInput:=true {last buffer was delivered}
- ELSE
- BEGIN {read another block}
- {$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+}
- IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN
- LastIBuffer:=true; {last buffer has been input}
- IF ReadResult>0 THEN
- BEGIN
- IBuffer[0]:=Chr(ReadResult); {set length of string JIC}
- ReadRaw := IBuffer[1];
- TotalBytesIn:=TotalBytesIn + ReadResult;
- IBufferIndex := 2;
- END; {result>0}
- END; {input another block}
- END; {else}
- END; {ReadRaw}
- {================================================}
- BEGIN {Compress}
- WITH RLE_Record DO
- BEGIN
- Data[1]:=ReadRaw; {prime the pump}
- RepeatCount:=1;
-
- REPEAT
- CurrentChar:=ReadRaw; {read 1 byte from input}
- IF NOT EndInput THEN {EOF?}
- CASE RLE_Record.RepeatCount OF {no}
- 0 : BEGIN
- RepeatCount:=1;
- Data[1]:=CurrentChar;
- END; {repeat count = 0}
- 1 : BEGIN {2nd byte from input - determine segment type}
- ShowProgress; {recalc only every new seg to save time}
- IF CurrentChar=Data[1] THEN {determine seg type}
- SegmentType:=RUN {equal consequtive data}
- ELSE
- BEGIN
- SegmentType:=LEN; {nonequal consequtive data}
- Data[2]:=CurrentChar; {save the new data byte}
- END;
- RepeatCount:=2; {in any case, up the counter}
- END; {repeat count = 1}
- ELSE {we're into the 3rd or greater input byte}
- CASE SegmentType OF
- RUN : BEGIN {looking for a RUN of identical bytes}
- IF CurrentChar=Data[1] THEN
- BEGIN {RUN continues}
- Inc(RepeatCount); {bump up counter}
- IF RepeatCount=MaxChars THEN {max?}
- BEGIN {yes}
- NewSegment(RUN); {write full segment}
- RepeatCount:=0; {no current char}
- END;
- END {RUN continues}
- ELSE {RUN has ended}
- NewSegment(RUN);
- END; {case RUN}
- LEN : BEGIN
- IF CurrentChar<>Data[RepeatCount] THEN
- BEGIN
- Inc(RepeatCount); {LEN continues}
- Data[RepeatCount]:=CurrentChar;
- IF RepeatCount=MaxChars THEN
- BEGIN
- NewSegment(LEN); {write output}
- RepeatCount:=0;
- END;
- END
- ELSE {LEN has ended}
- NewSegment(LEN); {write output}
- END; {case LEN}
- END; {case segmentmode}
- END; {case byte counter}
- UNTIL EndInput;
-
- IF RLE_Record.RepeatCount<>0 THEN
- CASE SegmentType OF
- RUN : NewSegment (RUN);
- LEN : NewSegment (LEN);
- END;
- END; {WITH}
- END; {Compress}
- {*************************************************************}
- PROCEDURE DeCompress;
- {the input is a series of RLE_Records both RUN and LEN type. The output is
- buffered which saves about 25% execution time.}
- VAR
- I,J : integer;
- Work : string[MaxChars+1];
-
- {=============================================}
- PROCEDURE WriteOBuffer;
- {write the output buffer string without the length byte}
- BEGIN
- BlockWrite (OutputFile, OBuffer[1], Length(OBuffer));
- TotalBytesOut:=TotalBytesOut + Length(OBuffer);
- END; {WriteBuffer}
- {=============================================}
- PROCEDURE WriteRaw (CONST Data : string);
- {Use the buffer to hold and write the input DATA strings. The entire
- string must fit or it waits for the next buffer}
- BEGIN
- IF Length(OBuffer)+Length(Data)<BufSize THEN {will this string fit?}
- OBuffer:=ConCat(OBuffer, Data) {yessir}
- ELSE
- BEGIN
- WriteOBuffer; {nosiree, output buffer}
- OBuffer:=Data; {now save it}
- END;
- END; {WriteRaw}
- {=============================================}
- BEGIN {DeCompress}
- WITH RLE_Record DO
- BEGIN
- REPEAT
- ReadInput(2, RLE_String); {read the prefix byte and 1 data byte}
- IF NOT EndInput THEN
- BEGIN
- ShowProgress;
- Segments:=Segments+1;
- IF RepeatCount < 0 THEN {is this a RUN or a LEN?}
- BEGIN {it's a RUN}
- SegmentType:=RUN;
- RepeatCount:=RepeatCount * -1;
- END
- ELSE
- SegmentType:=LEN; {it's a LEN}
-
- CASE SegmentType OF
- RUN : BEGIN
- Work:=''; {JIC}
- WriteRaw(PadCh(Work,Data[1],RepeatCount));
- END; {case RUN}
- LEN : BEGIN
- Work:=Data[1]; {save 1st byte}
- I:=RepeatCount - 1; {save original LEN value}
- IF I>0 THEN
- BEGIN
- ReadInput(I, RLE_String); {read remaining bytes in segment}
- {first byte of remaining segment is in the prefix field}
- Work:=ConCat(Work,Chr(RepeatCount));
- IF I>1 THEN
- CASE I>2 OF {append the remaining byte(s)}
- true :
- BEGIN
- Move (Data[1], Work[3], I-1);
- Work[0]:=Chr(Ord(Work[0]) + I - 1); {adjust string length}
- END;
- false : Work:=Work+Data[1];
- END; {case}
- END; {I>0}
- WriteRaw (Work);
- END; {case LEN}
- END; {case}
- END; {not EndInput}
- UNTIL EndInput;
-
- WriteOBuffer; {output final buffer - always something in there}
- END; {WITH}
- END; {Decompress}
- {*************************************************************}
- FUNCTION FormTime (Time : TimeRecord) : real;
- {Convert a system time record to time in seconds and hundredths}
- BEGIN
- FormTime:=((Time.Hour * 3600) + (Time.Minute * 60) + Time.Second)
- + (Time.Sec100 / 100);
- END;
- {*************************************************************}
- FUNCTION RunParams : boolean;
- {validate the run parameters supplied}
- VAR
- P1 : string[1];
- BEGIN
- RunParams := true;
-
- P1:=ParamStr(1); {used temporarily}
- CASE P1[1] OF
- 'c', 'C' : CompressMode := true;
- 'd', 'D' : BEGIN
- CompressMode := false;
- DisplayPrefix:='DE';
- END
- ELSE
- RunParams := false;
- END; {case}
-
- InputFileName :=StUpCase(CleanPathName(ParamStr(2)));
- OutputFileName:=StUpCase(CleanPathName(ParamStr(3)));
- END; {RunParams}
- {*************************************************************}
- BEGIN {main program}
- HiddenCursor;
- TextBackGround(Cyan); {nicer than black screens}
- TextColor(Red);
- ClrScr;
-
- IF ParamCount <> 3 THEN
- BEGIN
- WriteLn ('3 parameters are required - ',ParamCount,' were supplied.');
- WriteLn ('Param syntax is "Mode InputFile OutputFile".');
- WriteLn ('Mode is C for Compress or D for Decompress.');
- NormalCursor;
- Halt(1);
- END
- ELSE
- IF NOT RunParams THEN
- BEGIN
- WriteLn ('Run mode parameter error - /C or /D only.');
- NormalCursor;
- Halt(1);
- END;
-
- {Pascal will shut me down if the path\file is incorrect}
- Assign (InputFile, InputFileName);
- Assign (OutputFile, OutputFileName);
- ReWrite (OutputFile, 1);
- Reset (InputFile, 1);
-
- InputFileSize:=FileSize(InputFile); {input bytes to be processed}
-
- WriteLn (DisplayPrefix, 'COMPRESSING ''',InputFileName,'''');
- WriteLn (' TO ''',OutputFileName,''' ...');
- WriteLn;
-
- {here's where it all happens}
- GetTime (Started.Hour, Started.Minute, Started.Second, Started.Sec100);
- IF CompressMode THEN
- Compress
- ELSE
- Decompress;
- GetTime (Stopped.Hour, Stopped.Minute, Stopped.Second, Stopped.Sec100);
-
- {Audit what we did}
- WriteLn;
- WriteLn;
- WriteLn ('Total string segments = ',TrimLead(Form('###,###,###.',Segments)));
- WriteLn ('Total bytes input = ',TrimLead(Form('###,###,###.',TotalBytesIn)));
- WriteLn ('Total bytes output = ',TrimLead(Form('###,###,###.',TotalBytesOut)));
- WriteLn ('Total seconds to run = ',FormTime(Stopped)-FormTime(Started) : 5 : 1);
- IF CompressMode THEN
- WriteLn ('Compression ratio = ',
- (100 - ((TotalBytesOut / TotalBytesIn) * 100)) : 6 : 2,' %');
-
- Close (InputFile);
- Close (OutputFile);
-
- WriteLn;
- WriteLn ('RLE Processing Completed.');
- NormalCursor;
- END.
-
-