home *** CD-ROM | disk | FTP | other *** search
- {$C-}
- {$G512}
- {$P512}
-
- {*************************************************************************}
- {* Copyright (c) Kim Kokkonen, TurboPower Software, 1985 *}
- {* Released to the public domain for personal, non-commercial use only. *}
- {* Telephone: 408-378-3672, Compuserve 72457,2131 *}
- {* *}
- { sort as large a text file as fits in memory, up to 16000 lines. }
- { only the keys must fit in memory, so use a short key to sort long files.}
- { designed as an MSDOS filter, requires Turbo Pascal 3.0 to compile. }
- { written 7/85, modified 1/86 to use indexed textfiles for larger sorts. }
- { see options in WRITEHELP, call BIGSORT with no arguments to list options}
- { compile with maximum heap size A000. }
- { requires at least 256K free RAM to run as currently configured. }
- { reduce MaxLines to run in smaller space. }
- {*************************************************************************}
-
- PROGRAM bigsort(Input,Output);
-
- CONST
- {maxlines*maxlength gives the maximum filesize, here about 4 megabytes}
- MaxLines=16000;{limited by 4*maxlines<=65000}
- MaxLength=255;{max length of a given line, limited to 255}
-
- BufSize=4096;{number of bytes per blockread}
- StackParas=512;{paragraphs to reserve on stack for quicksort}
- convert_high=16777216.0;{used to convert reals to 3 byte small reals}
- convert_med=65536.0;
- convert_low=256.0;
- optiondelim='-';{char used to introduce command line options}
-
- TYPE
- lineBuf=STRING[255];
- linePtr=^Byte;
- smallReal=STRING[2];
- lineArray=ARRAY[1..MaxLines] OF linePtr;
- lineArrayPtr=^lineArray;
- positionArray=ARRAY[1..MaxLines] OF Integer;
- TextString=STRING[MaxLength];
- PathName=STRING[64];
- FilePointer=RECORD
- SeekTo:smallReal;
- LenToRead:Byte;
- END;
- FileIndexArray=ARRAY[1..MaxLines] OF FilePointer;
- FileIndexPtr=^FileIndexArray;
- TextBuffer=ARRAY[1..BufSize] OF Char;
- TextBufferPtr=^TextBuffer;
-
- {following record carries all information about the indexed text file}
- {requires 97 bytes in the segment where its var is located}
- {requires 4*maxlines+bufsize on the heap}
- IndexedFile=
- RECORD
- fil:FILE;{untyped file is critical for this application}
- EndOfFile:Boolean;{true when all of file read}
- LineNum:Integer;{last line read in}
- FilePosition:Real;{current byte position in file during readin}
- Buffer:TextBufferPtr;{pointer to buffer for this file}
- BufPos:Integer;{position in current buffer}
- BytesRead:Integer;{number read in last blockread}
- index:FileIndexPtr;{pointer to file index}
- END;
-
- VAR
- F:IndexedFile;
- Success:Boolean;
- lines:lineArrayPtr;{pointers to each text line stored here}
- Pos:positionArray;{position of each line after sort}
- nlines:Integer;{number of lines}
- showStats,partial,upper,reverse:Boolean;{option flags}
- numToCopy,beginCol,endCol:Integer;{option values}
- reg:RECORD
- CASE Integer OF
- 1:(ax,bx,cx,dx,bp,si,di,ds,es,flags:Integer);
- 2:(al,ah,bl,bh,cl,ch,dl,dh:Byte);
- END;
- tstart:Real;
- i,j:Integer;{global variables for recursive sort procedure}
- part:lineBuf;
-
- FUNCTION Time:Real;
- {-return time of day in seconds since midnight}
- BEGIN
- reg.ah:=$2C;
- MsDos(reg);
- Time:=1.0*(reg.dh+60.0*(reg.cl+60.0*reg.ch)+reg.dl/100.0);
- END;{time}
-
- PROCEDURE CheckKeys;
- {-capture ^C, ^S, ^Q}
- VAR
- c:Char;
- BEGIN
- WHILE KeyPressed DO BEGIN
- Read(Kbd,c);
- IF c=^S THEN
- REPEAT
- Read(Kbd,c);
- IF c=^C THEN Halt;
- UNTIL c=^Q
- ELSE IF c=^C THEN Halt;
- END;
- END;{checkkeys}
-
- FUNCTION IOstat(bit:Integer):Boolean;
- {-check status of the standard I/O}
- {bit=0 for input, 1 for output}
- {returns true if I/O is through console}
- VAR
- temp0,temp1:Boolean;
- BEGIN
- reg.ax:=$4400;
- reg.bx:=bit;{standard input or output}
- MsDos(reg);
- temp0:=reg.dx AND 128<>0;
- temp1:=reg.dx AND (1 SHL bit)<>0;
- IOstat:=temp0 AND temp1;
- END;{iostat}
-
- PROCEDURE WriteHelp;
- {-display a help screen}
- BEGIN
- WriteLn(Con);
- WriteLn(Con,'Usage: BIGSORT [Options] <InputPathname [>OutputPathName]');
- LowVideo;
- WriteLn(Con);
- WriteLn(Con,'Sorts text files by line.');
- WriteLn(Con,'Sort limited in size only by available RAM.');
- WriteLn(Con,'Only keys are stored in RAM, specify shorter key for bigger sort.');
- WriteLn(Con,'Each text line limited to 255 characters and must be terminated by a <CR><LF>.');
- WriteLn(Con,'Maximum of 16000 text lines.');
- WriteLn(Con);
- NormVideo;
- WriteLn(Con,'Options:');
- LowVideo;
- WriteLn(Con,' -I Ignore case while sorting');
- WriteLn(Con,' -R sort in Reverse order');
- WriteLn(Con,' -Bn Begin sort key with column n of each line (default 1)');
- WriteLn(Con,' -En End sort key with column n of each line (default end of line)');
- NormVideo;
- END;{writehelp}
-
- PROCEDURE SetOptions;
- {-analyze input line}
- VAR
- i,code:Integer;
- Num:STRING[6];
- arg:STRING[64];
- BEGIN
- {set defaults}
- upper:=False;reverse:=False;
- beginCol:=1;endCol:=255;partial:=False;
-
- WriteLn(Con);
-
- {scan through argument list}
- i:=1;
- WHILE i<=ParamCount DO BEGIN
- arg:=ParamStr(i);
- IF (arg[1]=optiondelim) AND (Length(arg)>1) THEN BEGIN
- CASE UpCase(arg[2]) OF
- 'I':upper:=True;
- 'R':reverse:=True;
- 'B':BEGIN
- Num:=Copy(arg,3,6);
- Val(Num,beginCol,code);
- IF code<>0 THEN BEGIN
- WriteLn(Con,'Illegal Begin column specified: ',arg);
- WriteHelp;
- Halt;
- END;
- IF (beginCol<=0) OR (beginCol>255) THEN BEGIN
- WriteLn(Con,'Illegal Begin column specified: ',arg);
- WriteLn(Con,' column must be in the range of 1..255');
- WriteHelp;
- Halt;
- END;
- IF beginCol>1 THEN partial:=True;
- END;
- 'E':BEGIN
- Num:=Copy(arg,3,6);
- Val(Num,endCol,code);
- IF code<>0 THEN BEGIN
- WriteLn(Con,'Illegal End column specified: ',arg);
- WriteHelp;
- Halt;
- END;
- IF (endCol<=0) OR (endCol>255) THEN BEGIN
- WriteLn(Con,'Illegal End column specified: ',arg);
- WriteLn(Con,' column must be in the range of 1..255');
- WriteHelp;
- Halt;
- END;
- IF endCol<255 THEN partial:=True;
- END;
- ELSE
- WriteLn(Con,'Unrecognized command line option: ',arg);
- WriteHelp;
- Halt;
- END;
- END ELSE BEGIN
- WriteLn(Con,'Unrecognized command line option: ',arg);
- WriteHelp;
- Halt;
- END;
- i:=Succ(i);
- END;
- numToCopy:=Succ(endCol-beginCol);
- showStats:=NOT(IOstat(1));
- END;{setoptions}
-
- PROCEDURE PutLine(VAR L:lineBuf;VAR lptr:linePtr);
- {-store a string on the heap}
- VAR
- len:Byte ABSOLUTE L;
- tlen:Byte;
- space:Integer;
- BEGIN
- tlen:=Succ(len);{length of string including length byte}
- space:=MaxAvail;
- IF (space<0) OR (space>StackParas) THEN BEGIN
- {enough space left to add string}
- GetMem(lptr,tlen);
- Move(L,lptr^,tlen);
- END ELSE BEGIN
- WriteLn(Con);
- WriteLn(Con,'not enough memory left to store text keys....');
- Halt;
- END;
- END;{putline}
-
- FUNCTION GetLine(lptr:linePtr):lineBuf;
- {-get a string back from the heap}
- VAR
- L:lineBuf;
- BEGIN
- Move(lptr^,L,Succ(lptr^));
- GetLine:=L;
- END;{getline}
-
- PROCEDURE RealToSmall(r:Real;VAR s:smallReal);
- {-convert a real in the range 0..1677215 to a three byte quantity}
- BEGIN
- IF r>=convert_high THEN BEGIN
- WriteLn(Con);
- WriteLn(Con,'real too large to convert to small real');
- WriteLn(Con,r:0:0);
- Halt;
- END;
- s[2]:=Chr(Trunc(r/convert_med));
- r:=r-Ord(s[2])*convert_med;
- s[1]:=Chr(Trunc(r/convert_low));
- r:=r-Ord(s[1])*convert_low;
- s[0]:=Chr(Trunc(r));
- END;{realtosmall}
-
- FUNCTION SmallToReal(VAR s:smallReal):Real;
- {-convert a 3 byte smallreal back to a real}
- BEGIN
- SmallToReal:=
- Int(Ord(s[0]))+convert_low*Int(Ord(s[1]))+convert_med*Int(Ord(s[2]));
- END;{smalltoreal}
-
- FUNCTION Cardinal(i:Integer):Real;
- {-return positive real 0<=r<=65535}
- VAR
- r:Real;
- BEGIN
- r:=i;
- IF r<0 THEN r:=r+65536.0;
- Cardinal:=r;
- END;{cardinal}
-
- PROCEDURE ReadInFile(VAR nlines:Integer);
- {-read lines from standard input and put the keys on the heap}
- VAR
- L:lineBuf;
-
- PROCEDURE OpenFile(fname:PathName;
- VAR F:IndexedFile;
- VAR Success:Boolean);
- {-open an indexed textfile, return true if successful}
- BEGIN
- WITH F DO BEGIN
-
- {open the physical file}
- Assign(fil,fname);
- {$I-}Reset(fil,1){$I+};
- Success:=(IOResult=0);
- IF NOT(Success) THEN Exit;
- EndOfFile:=False;
-
- {allocate the file buffer}
- Success:=16.0*Cardinal(MaxAvail)>Cardinal(SizeOf(TextBuffer));
- IF NOT(Success) THEN Exit;
- New(Buffer);
- BytesRead:=0;
- BufPos:=1;{force blockread the first time}
-
- {allocate the file index array}
- Success:=16.0*Cardinal(MaxAvail)>Cardinal(SizeOf(FileIndexArray));
- IF NOT(Success) THEN Exit;
- New(index);
- LineNum:=0;
- FilePosition:=0.0;
-
- END;
- END;{openfile}
-
- PROCEDURE ReadNewLine(VAR F:IndexedFile;VAR L:lineBuf);
- {-read a text line and store information for later random access}
- VAR
- EndOfLine:Boolean;
- lpos,terminators:Integer;
- ch:Char;
- sm:smallReal;
- BEGIN
- WITH F DO BEGIN
- EndOfLine:=False;
- lpos:=0;
- terminators:=1;
-
- {look at characters until end of line found}
- REPEAT
-
- IF BufPos>BytesRead THEN BEGIN
- {get another buffer full}
- BlockRead(fil,Buffer^,BufSize,BytesRead);
- BufPos:=1;
- END;
-
- IF BytesRead=0 THEN
- ch:=^Z
- ELSE BEGIN
- ch:=Buffer^[BufPos];
- BufPos:=Succ(BufPos);
- END;
-
- CASE ch OF
- ^M:terminators:=Succ(terminators);
- ^J:EndOfLine:=True;
- ^Z:BEGIN
- EndOfLine:=True;
- EndOfFile:=True;
- END;
- ELSE
- IF lpos<MaxLength THEN BEGIN
- lpos:=Succ(lpos);
- L[lpos]:=ch;
- END;
- END;
-
- UNTIL EndOfLine;
-
- {finish up line}
- L[0]:=Chr(lpos);
-
- {store info for later random access}
- IF LineNum<MaxLines THEN BEGIN
- LineNum:=Succ(LineNum);
- WITH index^[LineNum] DO BEGIN
- RealToSmall(FilePosition,sm);
- Move(sm,SeekTo,3);
- LenToRead:=lpos;
- END;
- FilePosition:=FilePosition+lpos+terminators;
- END;
-
- END;
- END;{readnewline}
-
- PROCEDURE GetSortKey(VAR L:lineBuf);
- {-return the sort key for the text line l}
- VAR
- i:Integer;
- BEGIN
- IF partial THEN
- L:=Copy(L,beginCol,numToCopy);
- IF upper THEN
- FOR i:=1 TO Length(L) DO L[i]:=UpCase(L[i]);
- IF reverse THEN
- FOR i:=1 TO Length(L) DO L[i]:=Chr(255-Ord(L[i]));
- END;{getsortkey}
-
- BEGIN
- nlines:=0;
- {set up the indexed file data structure}
- OpenFile('INP:',F,Success);
- IF NOT(Success) THEN BEGIN
- WriteLn(Con);
- WriteLn(Con, 'could not set up indexed file data structure....');
- Halt;
- END;
-
- WHILE NOT F.EndOfFile DO BEGIN
- {read line}
- ReadNewLine(F,L);
- GetSortKey(L);
- IF nlines<MaxLines THEN BEGIN
- nlines:=Succ(nlines);
- IF (nlines AND 63=0) THEN
- Write(Con,^H^H^H^H^H,nlines:5);
- CheckKeys;
- {store key on text heap}
- PutLine(L,lines^[nlines]);
- {initialize the pos array}
- Pos[nlines]:=nlines;
- END ELSE BEGIN
- WriteLn(Con);
- WriteLn(Con,'Exceeded maximum number of lines....');
- Halt;
- END;
- END;
- END;{readinfile}
-
- PROCEDURE SortData(l,r:Integer);
- {-recursive quicksort}
- VAR
- partpos:Integer;
-
- PROCEDURE WriteStatus(i,j:Integer);
- {-provide some reassurance that sort is proceeding}
- BEGIN
- Write(Con,^H^H^H^H^H);ClrEol;
- {prints size of current partition being sorted}
- Write(Con,(j-i):5);
- END;{writestatus}
-
- PROCEDURE Swap(i,j:Integer);
- {-swap the two referenced data elements}
- VAR
- t:Integer;
- BEGIN
- t:=Pos[i];
- Pos[i]:=Pos[j];
- Pos[j]:=t;
- END;{swap}
-
- BEGIN
-
- IF l<r THEN BEGIN
-
- i:=l;
- j:=Succ(r);
- IF (j-i)>50 THEN WriteStatus(i,j);
-
- {get a random partitioning element}
- Swap(i,i+Random(j-i));
- part:=GetLine(lines^[Pos[i]]);
-
- {swap elements until all less than partition are to left, etc.}
- REPEAT
- REPEAT
- i:=Succ(i);
- UNTIL (i>j) OR (GetLine(lines^[Pos[i]])>=part);
- REPEAT
- j:=Pred(j);
- UNTIL (GetLine(lines^[Pos[j]])<=part);
- IF i<j THEN Swap(j,i);
- UNTIL i>=j;
-
- Swap(l,j);
- partpos:=j;
- SortData(l,Pred(partpos));
- SortData(Succ(partpos),r);
- END;
-
- END;{sortdata}
-
- PROCEDURE WriteOutFile(nlines:Integer);
- {-write out the sorted information}
- VAR
- i:Integer;
- L:lineBuf;
-
- PROCEDURE ReadIndexedLine(VAR F:IndexedFile;
- Num:Integer;
- VAR L:TextString);
- {-get an indexed line from f}
- BEGIN
- WITH F DO
- WITH index^[Num] DO BEGIN
- LongSeek(fil,SmallToReal(SeekTo));
- BlockRead(fil,L[1],LenToRead);
- L[0]:=Chr(LenToRead);
- END;
- END;{readindexedline}
-
- BEGIN
- IF NOT(upper OR partial OR reverse) THEN
- {take the output directly from memory}
- FOR i:=1 TO nlines DO BEGIN
- WriteLn(GetLine(lines^[Pos[i]]));
- IF showStats AND (i AND 63=0) THEN
- Write(Con,^H^H^H^H^H,i:5);
- CheckKeys;
- END
- ELSE
- {use the indexed text file}
- FOR i:=1 TO nlines DO BEGIN
- ReadIndexedLine(F,Pos[i],L);
- IF L<>'' THEN WriteLn(L);
- IF showStats AND (i AND 15=0) THEN
- Write(Con,^H^H^H^H^H,i:5);
- CheckKeys;
- END;
- END;{writeoutfile}
-
- BEGIN{main}
-
- IF IOstat(0) THEN BEGIN
- WriteLn(Con);
- WriteLn(Con,'input must be redirected from a file....');
- WriteHelp;
- Halt;
- END;
-
- {analyze command line options}
- SetOptions;
-
- {get the space for lines array}
- IF 16.0*Cardinal(MaxAvail)<Cardinal(SizeOf(lineArray)) THEN BEGIN
- WriteLn(Con,'not enough memory for line pointer array....');
- Halt;
- END;
- New(lines);
-
- tstart:=Time;
- Write(Con,0.0:7:2,' READING ',1:5);
-
- {read in the input file}
- ReadInFile(nlines);
-
- Write(Con,^H^H^H^H^H);WriteLn(Con,'Total lines: ',nlines);
-
- {sort}
- Write(Con,(Time-tstart):7:2,' SORTING ','':5);
-
- SortData(1,nlines);
-
- Write(Con,^H^H^H^H^H);ClrEol;WriteLn(Con);
- Write(Con,(Time-tstart):7:2,' WRITING ',1:5);
- IF NOT(showStats) THEN WriteLn(Con);
-
- {write out the results}
- WriteOutFile(nlines);
-
- IF showStats THEN Write(Con,^H^H^H^H^H);
- WriteLn(Con,'Total lines: ',nlines);
- WriteLn(Con,(Time-tstart):7:2,' DONE ');
-
- END.
-