home *** CD-ROM | disk | FTP | other *** search
- { Routines to implement a binary tree structure of all input lines }
- { Copyright 1988,1989, by J. W. Rider }
-
- procedure firstline;
- { makes the first record in the btree current }
- { THIS PROCEDURE WORKS INDEPENDENTLY OF THE STATE OF FIRSTNODE }
- begin if root<>nil then begin
- current:=root; while current^.l<>nil do current:=current^.l;
- linefound:=true; end
- else begin current:=nil; linefound:=false; end; end;
-
- procedure nextline;
- { makes current the record following current record }
- begin if current<>nil then
- if current^.r<>nil then begin current:=current^.r;
- while current^.l<>nil do current:=current^.l;
- linefound:=true; end
- else begin while (current^.u<>nil) and (current^.u^.r=current) do
- current:=current^.u;
- if current^.u<>nil then begin
- current:=current^.u; linefound:=true; end
- else begin current:=nil; linefound:=false; end end
- else linefound:=false; end;
-
- procedure writenode;
- { Writes the data corresponding to a single node to standard output }
- { Called either by "prunefirst" or "retrieveln" }
- var i,j: longint; key: string; begin
- if unique then j:=1 else j:=current^.c;
- for i:=1 to j do
- if keysonly then begin
- key:=copy(current^.d,current^.ks,current^.kl);
- if not sensecase then key:=lcase(key);
- if ancase then key:=anstr(key);
- writeln(key); end
- else writeln(current^.d); end; { procedure writenode }
-
- procedure storeln(var s:string);
- { stores a btree record for each line of input }
- var storedone:boolean; newline:lp; positnum,lengthnum: integer;
-
- function lesskey:boolean;
- { returns true if the key of new line is strictly less than the key
- of the current line record }
- var rkey: string;
- begin if sortnumeric then lesskey:= (kn<current^.k) xor reversed
- else begin rkey:=copy(current^.d,current^.ks,current^.kl);
- if ancase then rkey:=anstr(rkey);
- if sensecase then lesskey:=(key < rkey) xor reversed
- else lesskey:=(key < lcase(rkey)) xor reversed;
- end; end; { function storeln.lesskey }
-
- procedure balancetree;
- { improves search performance by moving the current node to the
- root position }
- begin
- if current^.l=nil then begin
- current^.l:=root; root^.u:=current; root:=current;
- if current^.u^.r=current then
- current^.u^.r:=nil
- else current^.u^.l:=nil;
- current^.u:=nil; end
- else if current^.r=nil then begin
- current^.r:=root; root^.u:=current; root:=current;
- if current^.u^.l=current then
- current^.u^.l:=nil
- else current^.u^.r:=nil;
- current^.u:=nil; end; end;
-
- procedure findline; var treedepth:longint;
- { find the line that matches the last input }
- begin linefound:=true;
-
- { Btree performance was SO BAD for partially sorted input that
- this routine now checks to see if the input was already partially
- sorted. }
-
- {check if its last -- most likely for partially sorted input }
- if lastnode<>nil then begin
- current:=lastnode; islast:=true; isfirst:=lastnode=firstnode;
- if lastnode^.d=s then exit
- else if not lesskey then
- begin linefound:=false; exit; end; end;
-
- {check if its first -- most likely for reversed sorted input}
- if firstnode<>nil then begin
- current:=firstnode; isfirst:=true; islast:=lastnode=firstnode;
- if firstnode^.d=s then exit
- else if lesskey then
- begin linefound:=false; exit; end; end;
- isfirst:=false;
-
- { If it doesn't belong on either end, do a binary tree search on
- the rest of the lines }
- if root<>nil then begin
- current:=root; linefound:=true; treedepth:=0;
- islast:=true; isfirst:=true;
- while linefound do
- if current^.d=s then exit
- else if lesskey then begin
- islast:=false; inc(treedepth);
- if isfirst and (current^.r=nil) and (treedepth>2)
- and (treedepth>(nodecount div 2)) then begin
- balancetree; treedepth:=0; end;
- if current^.l<>nil then current:=current^.l
- else linefound:=false; end
- else begin
- isfirst:=false; inc(treedepth);
- if islast and (current^.l=nil) and (treedepth>2)
- and (treedepth>(nodecount div 2)) then begin
- balancetree; treedepth:=0; end;
- if current^.r<>nil then current:=current^.r
- else linefound:=false; end; end
- else begin current:=nil; linefound:=false end
- end; { procedure storeln.findline }
-
- function incrline:boolean;
- { if line already exists, just increment its count '.c' }
- begin findline; if linefound then inc(current^.c);
- incrline:=linefound; end; { function storeln.incrline }
-
- procedure prunefirst;
- { eliminates the first line record from the btree. This routine is
- called only if there is not enough memory to hold all to sorted
- on the heap at once. }
- var oldcur:lp; i:integer;
- begin oldcur:=current; current:=firstnode;
- writenode; dec(nodecount);
- if current^.r<>nil then current^.r^.u:=current^.u;
- if current^.u<>nil then
- current^.u^.l:=current^.r
- else root:=current^.r;
- if oldcur=current then begin oldcur:=current^.u; isfirst:=true; end;
- freemem(current,length(current^.d)+1+sizeof(lh));
- firstline; firstnode:=current; current:=oldcur;
- end; { procedure storeln.prunefirst }
-
- begin { procedure storeln }
- storedone:=false;
-
- { generate the key for the new line }
- if usefields then begin
- nlks:=findfield(keycol,s); nlkl:=findfield(keycol2,s);
- nlkl:=nlkl-nlks+1; end
- else begin
- if length(s)<keycol then nlks:=length(s)+1
- else nlks:=keycol;
- if length(s)<keycol2 then nlkl:=length(s)-nlks+1
- else nlkl:=keycol2-nlks+1; end;
-
- if ignoreblanks then
- while (nlkl<>0) and (s[nlks] in [^I,' ']) do begin
- inc(nlks);dec(nlkl); end;
-
- key:=copy(s,nlks,nlkl);
- if sortnumeric then begin
- positnum:=posnum(key,lengthnum); nlkl:=lengthnum;
- if positnum>0 then begin
- nlks:=nlks+positnum-1;
- key:=copy(key,positnum,nlkl); end
- else begin nlkl:=0; key:=''; end;
- kn:=bval(key); end
- else if not sensecase then key:=lcase(key);
- if ancase then key:=anstr(key);
-
- { if the line already exists, just increment the count c }
- if not incrline then begin
-
- { if there is not enough room to store the line, }
- while (maxavail<(length(s)+1+grain+sizeof(lh))) and (not storedone) do
-
- { output the new line if it would be first anyhow }
- if isfirst then begin writeln(s); storedone:=true;
- if earlyout then sorterror:=true;
- earlyout:=true; end
-
- { output the first line record and retreive space until room exists }
- else begin prunefirst; earlyout:=true; end;
-
- { allocate room for the line if it has not been output }
- if not storedone then begin getmem(newline,length(s)+1+sizeof(lh));
- newline^.c:=1; newline^.r:=nil; newline^.l:=nil; inc(nodecount);
-
- { store the line into the btree }
- newline^.d:=s; newline^.u:=current; newline^.k:=kn;
- newline^.ks:=nlks; newline^.kl:=nlkl;
- if current=nil then findline;
- if current<>nil then
- if lesskey then begin current^.l:=newline;
- if current=firstnode then firstnode:=newline; end
- else begin current^.r:=newline;
- if current=lastnode then lastnode:=newline; end
- else begin
- root:=newline; firstnode:=newline; lastnode:=newline; end;
- sorterror:=sorterror or (isfirst and earlyout); end; end;
-
- end; {procedure storeln}
-
- procedure retrieveln; { dumps the rest of the btree to standard output }
- var i:integer;
- begin firstline; while linefound do begin writenode; nextline; end; end;
-