home *** CD-ROM | disk | FTP | other *** search
/ PsL Monthly 1993 December / PSL Monthly Shareware CD-ROM (December 1993).iso / prgmming / dos / pascal / rle.pas < prev    next >
Encoding:
Pascal/Delphi Source File  |  1993-02-03  |  16.4 KB  |  479 lines

  1. {$D+,L+}            {debug info}
  2. {$M 8192,0,655360}  {stack size, heapmin, heapmax}
  3.  
  4. PROGRAM RunLengthEncode;
  5. {
  6.  Author:                 Ken Murphy, CIS 74025,731
  7.  Date Written:    January 1993
  8.  
  9. This program implements a Run-Length-Encoding algorithm to compress files.
  10. I wrote this to test the decompression speed and compression effect on files
  11. containing printer graphics.  These files tend to be large with long series
  12. repetitive characters.  RLE is a bad compression choice if the file to be
  13. compressed does not have runs of repetitive bytes.  The algorithm gains
  14. it's compression from reducing a series of the same byte (up to 127 of them)
  15. to 2 bytes.
  16.  
  17. Tests have proven that this RLE decompression is very fast, and it's OK to
  18. include this technique within other applications that employ files that can
  19. benefit.  Tests on my dot matrix printer graphic data files show compressions
  20. of about 65% of original size.  LZHuf compresses better, but this is faster
  21. and simpler to develop for my needs.
  22.  
  23.     The program has 3 command-line inputs in the order given:
  24.     1. Mode:  C for compress or D for Decompress
  25.     2. Input file name
  26.     3. Output file name.
  27.  
  28. The encoded data file is a series of strings.  Each string begins with a
  29. "prefix" byte.  The leftmost bit of the byte distinguishes between the two
  30. types of strings.  The remaining 7 bits in the prefix byte are a length value.
  31. The prefix byte is followed by one or more data bytes.  The two types of
  32. compressed strings are "RUN" and "LEN".  The RUN variety allows for sequences
  33. of identical bytes.  Generally, the maximum compression effect results from
  34. these.  The LEN variety accomodates sequences of data bytes that are not the
  35. same.
  36.  
  37. Here's what it looks like in a compressed file:
  38.     If the leftmost bit of the prefix byte is 1, the string is a "RUN" string.
  39.     The remaining 7 bits of the prefix byte are the replication count value
  40.     for the following data byte.  Therefore, a RUN string is always 2 bytes.
  41.  
  42.     If the leftmost bit of the prefix byte is 0, the string is a "LENGTH"
  43.     string.  The remaining 7 bits of the prefix byte are the number of
  44.     data bytes following the prefix byte.
  45.  
  46. Since the length value in the prefix byte is limited to 7 bits,  a RUN or a
  47. LENGTH can be no larger that 127 ($01 - $FF). I don't permit a length value
  48. of 0.  PASCAL will handle this type of byte as a SHORTINT.  Remember that the
  49. actual size of a RUN segment is always 2 bytes.  A LEN segment never gets
  50. larger than 128 bytes, i.e. 1 prefix byte and 127 (max) data bytes.
  51.  
  52. There's performance instrumentation kept and displayed so I can measure the
  53. algorithm and how I've coded it.
  54.  
  55. The program was developed in Borland Pascal 7.0 for a REAL target. It uses
  56. TurboPower's Object Professional, but there's no "Object Oriented"
  57. programming here.  There's not much in the way of "fancy" so it should be
  58. easy to modify to vanilla Pascal.
  59.  
  60. Also, consider this a blatant advertisement and recommendation for TurboPower's
  61. Object Professional library although I'm in no way affiliated with them.}
  62.  
  63. USES DOS, OPCrt, OPString;
  64.  
  65. CONST
  66.     MaxChars    = 127;
  67.     EndOfFile    = 100;
  68.     BufSize        =    255;
  69.  
  70. TYPE
  71.     RLE = RECORD
  72.         RepeatCount : shortint;
  73.         Data                : array[1..MaxChars] OF char;
  74.     END;
  75.  
  76.     RLE_Rec_String= array[0..MaxChars] OF char;
  77.  
  78.     SegmentTypes = (RUN, LEN);
  79.  
  80.     TimeRecord    = RECORD
  81.         Hour        :    Word;
  82.         Minute    :    Word;
  83.         Second    :    Word;
  84.         Sec100    : Word;
  85.     END;
  86.  
  87. CONST             {typed CONST}
  88.     ReadResult        : word = 0;
  89.     WriteCount        : word = 0;
  90.     Segments            : longint = 0;
  91.     TotalBytesIn    : longint = 0;
  92.     TotalBytesOut    : longint = 0;
  93.     EndInput            : boolean = false;
  94.     DisplayPrefix    : string[2] = ' ';
  95.     InputFileSize    : real = 0;
  96.     LastIBuffer        : boolean = false;
  97.     LastOBuffer        : boolean = false;
  98.     OBufferIndex    : integer = BufSize+1;
  99.     OBuffer              : string[BufSize] = '';
  100.     IBuffer                : string[BufSize] = '';
  101.     IBufferIndex    : integer = Bufsize+1;
  102.  
  103. VAR
  104.     SegmentType        : SegmentTypes;
  105.     RLE_Record        : RLE;
  106.     RLE_String        : RLE_Rec_String absolute RLE_Record;
  107.     CurrentChar     : char;
  108.     LastChar            : char;
  109.     InputFile          : FILE;
  110.     OutputFile        : FILE;
  111.     CompressMode    : boolean;
  112.     InputFileName : string;
  113.     OutputFileName: string;
  114.     OldPct                : string[7];
  115.     Started                : TimeRecord;
  116.     Stopped                : TimeRecord;
  117.  
  118. {*************************************************************}
  119. PROCEDURE ShowProgress;
  120. VAR
  121.     Percent : string[7];
  122. {show the user what percentage of the input we've done - eye candy}
  123. BEGIN
  124.     Str((TotalBytesIn * 100) / InputFileSize : 6 : 1, Percent);
  125.     IF OldPct <> Percent THEN  {this speeds things a bit}
  126.         BEGIN
  127.             OldPct:=Percent;
  128.             FastText('Percentage Completed = ' + Percent, 6, 2);
  129.         END;
  130. END;  {ShowProgress}
  131. {**************************************************************}
  132. PROCEDURE ReadInput (CONST ByteCount : shortint;
  133.                                          VAR ReadString : RLE_Rec_String);
  134. {feed BYTECOUNT bytes from the input file.  A string buffer is used to hold
  135. the file's data to minimize the I/O time spent in the file. The output is a
  136. 0-index-based char array.}
  137. VAR
  138.     BCount    : shortint;
  139. BEGIN
  140.     IF IBufferIndex+ByteCount-1<=ReadResult THEN                {more data in buffer?}
  141.         BEGIN
  142.             IF ByteCount=1 THEN
  143.                 ReadString[0]:=IBuffer[IBufferIndex]
  144.             ELSE
  145.                 Move (IBuffer[IBufferIndex],ReadString[0],ByteCount);
  146.             IBufferIndex := IBufferIndex + ByteCount;
  147.         END
  148.     ELSE
  149.         BEGIN                                                    {no, get more from file}
  150.             IF LastIBuffer THEN
  151.                 EndInput:=true            {last buffer was delivered}
  152.             ELSE
  153.                 BEGIN                     {read another block}
  154.                     BCount:=ByteCount;
  155.                     {if there's anything in the buffer, output the remainder as the 1st
  156.                     chunk of the data requested}
  157.                     IF IBufferIndex<=BufSize THEN      {was the entire buffer used}
  158.                         BEGIN                            {no, there's more in there to give}
  159.                             IF IBufferIndex=BufSize THEN
  160.                                 ReadString[0]:=IBuffer[BufSize]
  161.                             ELSE
  162.                                 Move(IBuffer[IBufferIndex],ReadString[0],Length(IBuffer)-IBufferIndex+1);
  163.  
  164.                             BCount:=BCount-(BufSize-IBufferIndex+1);
  165.                         END; {buffer index <= buffer size}
  166.  
  167.                     {$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+}
  168.                     IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN
  169.                         LastIBuffer:=true;     {last buffer has been input}
  170.  
  171.                     IF ReadResult>0 THEN
  172.                         BEGIN
  173.                             IBuffer[0]:=Chr(ReadResult);   {set length of string}
  174.  
  175.                             IF BCount=1 THEN               {Asking for only 1 byte}
  176.                                 ReadString[ByteCount-BCount]:=IBuffer[1]    {yes, faster than 1 byte MOVE}
  177.                             ELSE
  178.                                 Move(IBuffer[1],ReadString[ByteCount-BCount],BCount);
  179.  
  180.                             TotalBytesIn:=TotalBytesIn + ReadResult;
  181.                             IBufferIndex := BCount+1;
  182.                         END;  {result>0}
  183.                 END;  {input another block}
  184.         END;  {else}
  185. END;  {ReadRaw}
  186. {*************************************************************}
  187. PROCEDURE Compress;
  188. {Compress the input file by reading it one byte at a time and comparing
  189. that byte to the one previously read.}
  190.  
  191.     {================================================}
  192.     PROCEDURE NewSegment (CONST StringType : SegmentTypes);
  193.     {set the appropriate prefix type; write the string segment and adjust
  194.     the statistics counters.  Lastly insert the latest byte and reset the
  195.     prefix count to 1}
  196.     BEGIN
  197.         CASE StringType OF
  198.             LEN    :
  199.                     BEGIN {prefix byte remains positive < 128}
  200.                         BlockWrite (OutputFile, RLE_Record, RLE_Record.RepeatCount + 1);
  201.                         TotalBytesOut:=TotalBytesOut + RLE_Record.RepeatCount + 1;
  202.                     END;  {LEN}
  203.             RUN    :
  204.                     BEGIN {flip the sign of the prefix byte}
  205.                         RLE_Record.RepeatCount:=RLE_Record.RepeatCount * -1;
  206.                         BlockWrite (OutputFile, RLE_Record, 2);
  207.                         TotalBytesOut:=TotalBytesOut + 2;
  208.                     END;  {RUN}
  209.         END;  {CASE}
  210.  
  211.         Segments:=Segments+1;
  212.         RLE_Record.Data[1]:=CurrentChar;  {set new byte to compare}
  213.         RLE_Record.RepeatCount:=1;        {only one of them so far}
  214.     END;  {NewSegment}
  215.     {================================================}
  216.     FUNCTION ReadRaw : char;
  217.     {feed one byte from the input file.  A string buffer is used to hold
  218.     the file's data to minimize the I/O time spent in the file.}
  219.     BEGIN
  220.         IF IBufferIndex<=ReadResult THEN            {more data in buffer?}
  221.             BEGIN
  222.                 ReadRaw:=IBuffer[IBufferIndex];   {yes, feed a byte}
  223.                 IBufferIndex := IBufferIndex + 1;
  224.             END
  225.         ELSE
  226.             BEGIN                                                    {no, get more from file}
  227.                 IF LastIBuffer THEN
  228.                     EndInput:=true            {last buffer was delivered}
  229.                 ELSE
  230.                     BEGIN                     {read another block}
  231.                         {$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+}
  232.                         IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN
  233.                             LastIBuffer:=true;     {last buffer has been input}
  234.                         IF ReadResult>0 THEN
  235.                             BEGIN
  236.                                 IBuffer[0]:=Chr(ReadResult);   {set length of string JIC}
  237.                                 ReadRaw := IBuffer[1];
  238.                                 TotalBytesIn:=TotalBytesIn + ReadResult;
  239.                                 IBufferIndex := 2;
  240.                             END;  {result>0}
  241.                     END;  {input another block}
  242.             END;  {else}
  243.     END;  {ReadRaw}
  244.     {================================================}
  245. BEGIN {Compress}
  246.     WITH RLE_Record DO
  247.         BEGIN
  248.             Data[1]:=ReadRaw;  {prime the pump}
  249.             RepeatCount:=1;
  250.  
  251.         REPEAT
  252.             CurrentChar:=ReadRaw;   {read 1 byte from input}
  253.             IF NOT EndInput THEN      {EOF?}
  254.                 CASE RLE_Record.RepeatCount OF    {no}
  255.                     0 : BEGIN
  256.                                 RepeatCount:=1;
  257.                                 Data[1]:=CurrentChar;
  258.                             END;  {repeat count = 0}
  259.                     1 : BEGIN    {2nd byte from input - determine segment type}
  260.                                 ShowProgress; {recalc only every new seg to save time}
  261.                                 IF CurrentChar=Data[1] THEN  {determine seg type}
  262.                                     SegmentType:=RUN           {equal consequtive data}
  263.                                 ELSE
  264.                                     BEGIN
  265.                                         SegmentType:=LEN;     {nonequal consequtive data}
  266.                                         Data[2]:=CurrentChar; {save the new data byte}
  267.                                     END;
  268.                                 RepeatCount:=2;        {in any case, up the counter}
  269.                             END;  {repeat count = 1}
  270.                     ELSE     {we're into the 3rd or greater input byte}
  271.                         CASE SegmentType OF
  272.                             RUN :    BEGIN  {looking for a RUN of identical bytes}
  273.                                             IF CurrentChar=Data[1] THEN
  274.                                                 BEGIN  {RUN continues}
  275.                                                     Inc(RepeatCount);             {bump up counter}
  276.                                                     IF RepeatCount=MaxChars THEN     {max?}
  277.                                                         BEGIN                           {yes}
  278.                                                             NewSegment(RUN);    {write full segment}
  279.                                                             RepeatCount:=0;         {no current char}
  280.                                                         END;
  281.                                                 END  {RUN continues}
  282.                                         ELSE  {RUN has ended}
  283.                                             NewSegment(RUN);
  284.                                     END;  {case RUN}
  285.                             LEN : BEGIN
  286.                                             IF CurrentChar<>Data[RepeatCount] THEN
  287.                                                 BEGIN
  288.                                                     Inc(RepeatCount);    {LEN continues}
  289.                                                     Data[RepeatCount]:=CurrentChar;
  290.                                                     IF RepeatCount=MaxChars THEN
  291.                                                         BEGIN
  292.                                                             NewSegment(LEN);    {write output}
  293.                                                             RepeatCount:=0;
  294.                                                         END;
  295.                                                 END
  296.                                             ELSE  {LEN has ended}
  297.                                                 NewSegment(LEN);           {write output}
  298.                                         END;  {case LEN}
  299.                         END;  {case segmentmode}
  300.                     END;  {case byte counter}
  301.         UNTIL EndInput;
  302.  
  303.         IF RLE_Record.RepeatCount<>0 THEN
  304.             CASE SegmentType OF
  305.                 RUN    :    NewSegment (RUN);
  306.                 LEN    :    NewSegment (LEN);
  307.             END;
  308.     END; {WITH}
  309. END;  {Compress}
  310. {*************************************************************}
  311. PROCEDURE DeCompress;
  312. {the input is a series of RLE_Records both RUN and LEN type. The output is
  313. buffered which saves about 25% execution time.}
  314. VAR
  315.     I,J     : integer;
  316.     Work : string[MaxChars+1];
  317.  
  318.     {=============================================}
  319.     PROCEDURE WriteOBuffer;
  320.     {write the output buffer string without the length byte}
  321.     BEGIN
  322.         BlockWrite (OutputFile, OBuffer[1], Length(OBuffer));
  323.         TotalBytesOut:=TotalBytesOut + Length(OBuffer);
  324.     END;  {WriteBuffer}
  325.     {=============================================}
  326.     PROCEDURE WriteRaw (CONST Data : string);
  327.     {Use the buffer to hold and write the input DATA strings. The entire
  328.      string must fit or it waits for the next buffer}
  329.     BEGIN
  330.         IF Length(OBuffer)+Length(Data)<BufSize THEN    {will this string fit?}
  331.             OBuffer:=ConCat(OBuffer, Data)                            {yessir}
  332.         ELSE
  333.             BEGIN
  334.                 WriteOBuffer;                                                            {nosiree, output buffer}
  335.                 OBuffer:=Data;                            {now save it}
  336.             END;
  337.     END;  {WriteRaw}
  338.     {=============================================}
  339. BEGIN  {DeCompress}
  340.     WITH RLE_Record DO
  341.         BEGIN
  342.             REPEAT
  343.                 ReadInput(2, RLE_String);    {read the prefix byte and 1 data byte}
  344.                 IF NOT EndInput THEN
  345.                     BEGIN
  346.                         ShowProgress;
  347.                         Segments:=Segments+1;
  348.                         IF RepeatCount < 0 THEN     {is this a RUN or a LEN?}
  349.                             BEGIN                     {it's a RUN}
  350.                                 SegmentType:=RUN;
  351.                                 RepeatCount:=RepeatCount * -1;
  352.                             END
  353.                         ELSE
  354.                             SegmentType:=LEN;         {it's a LEN}
  355.  
  356.                         CASE SegmentType OF
  357.                             RUN : BEGIN
  358.                                             Work:='';                   {JIC}
  359.                                             WriteRaw(PadCh(Work,Data[1],RepeatCount));
  360.                                         END;  {case RUN}
  361.                             LEN : BEGIN
  362.                                             Work:=Data[1];            {save 1st byte}
  363.                                             I:=RepeatCount - 1;    {save original LEN value}
  364.                                             IF I>0 THEN
  365.                                                 BEGIN
  366.                                                     ReadInput(I, RLE_String);         {read remaining bytes in segment}
  367.                                                     {first byte of remaining segment is in the prefix field}
  368.                                                     Work:=ConCat(Work,Chr(RepeatCount));
  369.                                                     IF I>1 THEN
  370.                                                         CASE I>2 OF    {append the remaining byte(s)}
  371.                                                             true    :
  372.                                                                         BEGIN
  373.                                                                             Move (Data[1], Work[3], I-1);
  374.                                                                             Work[0]:=Chr(Ord(Work[0]) + I - 1);  {adjust string length}
  375.                                                                         END;
  376.                                                             false    :    Work:=Work+Data[1];
  377.                                                         END; {case}
  378.                                                 END; {I>0}
  379.                                             WriteRaw (Work);
  380.                                         END;  {case LEN}
  381.                         END;  {case}
  382.                     END;  {not EndInput}
  383.             UNTIL EndInput;
  384.  
  385.             WriteOBuffer;  {output final buffer - always something in there}
  386.     END; {WITH}
  387. END;  {Decompress}
  388. {*************************************************************}
  389. FUNCTION FormTime (Time : TimeRecord) : real;
  390. {Convert a system time record to time in seconds and hundredths}
  391. BEGIN
  392.     FormTime:=((Time.Hour * 3600) + (Time.Minute * 60) + Time.Second)
  393.                         + (Time.Sec100 / 100);
  394. END;
  395. {*************************************************************}
  396. FUNCTION RunParams : boolean;
  397. {validate the run parameters supplied}
  398. VAR
  399.     P1 : string[1];
  400. BEGIN
  401.     RunParams := true;
  402.  
  403.     P1:=ParamStr(1);   {used temporarily}
  404.     CASE P1[1] OF
  405.         'c', 'C' :    CompressMode := true;
  406.         'd', 'D' :    BEGIN
  407.                                     CompressMode := false;
  408.                                     DisplayPrefix:='DE';
  409.                                 END
  410.         ELSE
  411.             RunParams := false;
  412.     END; {case}
  413.  
  414.     InputFileName :=StUpCase(CleanPathName(ParamStr(2)));
  415.     OutputFileName:=StUpCase(CleanPathName(ParamStr(3)));
  416. END;  {RunParams}
  417. {*************************************************************}
  418. BEGIN  {main program}
  419.     HiddenCursor;
  420.     TextBackGround(Cyan); {nicer than black screens}
  421.     TextColor(Red);
  422.     ClrScr;
  423.  
  424.     IF ParamCount <> 3 THEN
  425.         BEGIN
  426.             WriteLn ('3 parameters are required - ',ParamCount,' were supplied.');
  427.             WriteLn ('Param syntax is "Mode InputFile OutputFile".');
  428.             WriteLn ('Mode is C for Compress or D for Decompress.');
  429.             NormalCursor;
  430.             Halt(1);
  431.         END
  432.     ELSE
  433.         IF NOT RunParams THEN
  434.             BEGIN
  435.                 WriteLn ('Run mode parameter error - /C or /D only.');
  436.                 NormalCursor;
  437.                 Halt(1);
  438.             END;
  439.  
  440.     {Pascal will shut me down if the path\file is incorrect}
  441.     Assign    (InputFile, InputFileName);
  442.     Assign    (OutputFile, OutputFileName);
  443.     ReWrite (OutputFile, 1);
  444.     Reset        (InputFile, 1);
  445.  
  446.     InputFileSize:=FileSize(InputFile); {input bytes to be processed}
  447.  
  448.     WriteLn (DisplayPrefix, 'COMPRESSING ''',InputFileName,'''');
  449.     WriteLn    (' TO ''',OutputFileName,''' ...');
  450.     WriteLn;
  451.  
  452.     {here's where it all happens}
  453.     GetTime (Started.Hour, Started.Minute, Started.Second, Started.Sec100);
  454.     IF CompressMode THEN
  455.         Compress
  456.     ELSE
  457.         Decompress;
  458.     GetTime (Stopped.Hour, Stopped.Minute, Stopped.Second, Stopped.Sec100);
  459.  
  460.     {Audit what we did}
  461.     WriteLn;
  462.     WriteLn;
  463.     WriteLn ('Total string segments = ',TrimLead(Form('###,###,###.',Segments)));
  464.     WriteLn ('Total bytes input     = ',TrimLead(Form('###,###,###.',TotalBytesIn)));
  465.     WriteLn ('Total bytes output    = ',TrimLead(Form('###,###,###.',TotalBytesOut)));
  466.     WriteLn ('Total seconds to run  = ',FormTime(Stopped)-FormTime(Started) : 5 : 1);
  467.     IF CompressMode THEN
  468.         WriteLn ('Compression ratio     = ',
  469.                             (100 - ((TotalBytesOut / TotalBytesIn) * 100)) : 6 : 2,' %');
  470.  
  471.     Close (InputFile);
  472.     Close (OutputFile);
  473.  
  474.     WriteLn;
  475.     WriteLn ('RLE Processing Completed.');
  476.     NormalCursor;
  477. END.
  478.  
  479.