home *** CD-ROM | disk | FTP | other *** search
- program indexer; {$c-,e+,f-,i-,j-,m-,p+,r+,s+,t-,u+ }
- {-------------------------------------------------------------}
- { }
- { INDEX CREATION FROM THE KEYBOARD }
- { }
- { David E. Cortesi, 2340 Tasso St., Palo Alto CA 94301. }
- { (compuserve 72155,450) }
- { }
- { Accepts index entries for a book from the keyboard, sorts }
- { the entries and sub-entries, collates page references, }
- { and creates an ASCII file that can be printed or edited. }
- { }
- { Term Recall is an unusual feature of the user interaction. }
- { If, when entering an index term, the user hits the ESC key, }
- { the program will find the least term that matches the input }
- { to that point and fill in its characters on the input line. }
- { Hitting ESC again retracts those letters and displays the }
- { letters of the next-higher matching term. This can save }
- { considerable typing -- a long term can be entered as only }
- { a couple of letters plus ESC -- and it allows the user to }
- { review the terms entered to that point in alpha order. }
- { }
- { Creates files INDEXER.OUT, the index-document file, and }
- { INDEXER.TRE, an internal record of the tree which will be }
- { reloaded on the next run if it then exists. }
- {-------------------------------------------------------------}
-
- const
- nullch = 0; { the null, end-of-string }
- strmax = 65; { max size of a string (64,00h)}
- sbufsize = 2046; { page size of a string buffer }
- sbufnum = 16; { allow up to 32K of buffers }
- maxdepth = 20; { stack size for tree-walks }
- asciibel = 7; { names for ascii characters }
- asciibs = 8;
- asciilf = 10;
- asciicr = 13;
- asciiesc = 27;
- asciiblank = 32;
- asciidel = 127;
-
- type
- strindex = 1..strmax; { indices over strings }
- strlength= 0..strmax; { lengths of strings }
- relation = (less,equal,more); { result of comparisons }
- nchar = 0..255; { numeric characters are bytes }
- str = record { an independent string is }
- len : strlength; { ..a length and some bytes, }
- val : array[strindex] of nchar { ending in 00h }
- end;
- strbuff = record { a string buffer is a compact }
- free : 0..sbufsize; { collection of strings. }
- data : array[1..sbufsize] of nchar
- end;
- stref = record { an indirect string is the }
- nb : 1..sbufnum; { index of an strbuff's address}
- bo : 1..sbufsize { and an index into it. }
- end;
- page = record { a page on which a term is }
- next : ^page; { ..referenced, and ^next one }
- num : integer
- end;
- ppage = ^page;
- node = record { one node of a binary tree }
- lson, rson, { descendant trees }
- subt : ^node; { subtree of sub-terms }
- iref, uref : stref; { original and uppercase terms }
- phead : ppage; { head of chain of page-refs }
- skip : boolean; { phony node "M" starts a tree }
- end;
- pnode = ^node;
- treewalk = record { current state of an inorder }
- current : pnode; { ..walk of a tree: this node, }
- top : 0..maxdepth; { stack-top pointer, stacked }
- stack : array[1..maxdepth] of pnode;{ nodes, mark }
- goneleft : boolean { true when backing out of leaf}
- end;
-
- var
- sbufptrs : array[1..sbufnum] of ^strbuff; { blocks of bytes}
- sbufcnt : 0..sbufnum; { how many blocks are active }
- maintree : pnode; { root of the term-tree }
- initerm : str; { "M" term for starting trees }
- indlevel : 0..9; { subterm nesting (indent) lev.}
- outfile : text; { the output document }
-
- {-------------------------------------------------------------}
- { routines operating on independent strings }
- { Pascal/Z string type was avoided to maximize portability. }
- {-------------------------------------------------------------}
-
- function upcase(c:nchar) : nchar;
- { force character to uppercase }
- begin
- if (c>=ord('a')) and (c<=ord('z')) then
- upcase := c-32
- else
- upcase := c
- end;
-
- procedure stucase(var a,b:str);
- { duplicate a string, forcing uppercase }
- var j : strlength;
- c : nchar;
- begin
- j := 0;
- repeat
- j := j+1;
- c := a.val[j];
- b.val[j] := upcase(c);
- until c=nullch;
- b.len := j-1
- end;
-
- {-------------------------------------------------------------}
- { routines operating on stored strings }
- { To keep all stored terms in string form (P/Z or our version)}
- { would use far too much storage. Here we pack strings into }
- { large blocks. The blocks are allocated as needed, to a max }
- { of 32K -- limit enforced by compiler range checking. }
- {-------------------------------------------------------------}
-
- procedure stput(var a:str; var b:stref);
- { stow string a in latest buffer, return indirect reference}
- var bp : ^strbuff;
- j : strindex;
- k : 1..sbufsize;
- begin
- bp := sbufptrs[sbufcnt]; { ^latest string buffer }
- if bp^.free<(a.len+1) then begin { not enough room! }
- new(bp); { make, count new buffer page }
- sbufcnt := sbufcnt+1; { range error here when full }
- sbufptrs[sbufcnt] := bp;
- bp^.free := sbufsize
- end;
-
- b.nb := sbufcnt; { save buffer-page number }
- j := 1;
- k := 1+sbufsize-bp^.free;
- b.bo := k; { save buffer-page offset }
-
- while j <= a.len do begin
- bp^.data[k] := a.val[j];
- j := j+1;
- k := k+1
- end;
- bp^.data[k] := nullch; { mark end of stored string }
- bp^.free := sbufsize-k { adjust bytes left in block }
- end;
-
- procedure stget(var b:stref; var a:str);
- { retrieve stored string from buffer into string-record }
- var bp : ^strbuff;
- j : strindex;
- k : 1..sbufsize;
- c : nchar;
- begin
- bp := sbufptrs[b.nb]; { point to the buffer page }
- k := b.bo; { ..and offset into it }
- j := 1;
- repeat { copy the stored string out }
- c := bp^.data[k];
- a.val[j] := c;
- j := j+1;
- k := k+1;
- until (c=nullch);
- a.len := j-2
- end;
-
- function sbcomp(var a:str; var b:stref) : relation;
- { EXACT comparison of a string to a stored string value --
- if "a" is initially equal but shorter, it is "less." }
- var bp : ^strbuff;
- j : strindex;
- k : 1..sbufsize;
- x,y : nchar;
- r : relation;
- begin
- bp := sbufptrs[b.nb];
- k := b.bo;
- j := 1;
- repeat
- x := a.val[j];
- y := bp^.data[k];
- j := j+1;
- k := k+1
- until (x<>y) or (x=nullch);
- if x=y then r := equal
- else if x<y then r := less
- else r := more;
- sbcomp := r
- end;
-
- function sxcomp(var a:str; var b:stref) : relation;
- { APPROXIMATE comparison of a string to a stored string --
- if "a" is initially equal but shorter, it is "equal." }
- var bp : ^strbuff;
- j : strindex;
- k : 1..sbufsize;
- x,y : nchar;
- r : relation;
- begin
- bp := sbufptrs[b.nb];
- k := b.bo;
- j := 1;
- repeat
- x := a.val[j];
- y := bp^.data[k];
- j := j+1;
- k := k+1
- until (x<>y) or (x=nullch);
- if (x=y) or (x=nullch) then r := equal
- else if x<y then r := less
- else r := more;
- sxcomp := r
- end;
-
- {-------------------------------------------------------------}
- { routines operating on the binary trees }
- { Each tree node represents one index term. The term itself }
- { is stored two ways, as typed and all-caps. The latter is }
- { used for comparison of terms, so that "Apple" = "apple". }
- { A node anchors a sorted chain of page-numbers, and may hold }
- { the root of an independent sub-tree of sub-terms. The tree }
- { is ordered so that all terms off the .lson are less than, }
- { and all terms off the .rson are greater, than this term. }
- {-------------------------------------------------------------}
-
- function makenode(var a, ua : str) : pnode;
- { make a new tree node given term-strings }
- var tn : ^node;
- begin
- new(tn);
- tn^.lson := nil;
- tn^.rson := nil;
- tn^.subt := nil;
- stput(a,tn^.iref);
- stput(ua,tn^.uref);
- tn^.phead := nil;
- tn^.skip := false;
- makenode := tn
- end;
-
- procedure startree(var t:pnode);
- { begin a tree with an artificial node whose term
- is "M" to encourage early balance }
- begin
- t := makenode(initerm,initerm);
- t^.skip := true
- end;
-
- function insert(tree:pnode; var a:str) : pnode;
- { put a new term into a tree, or find it if it is there.
- either way, return the term's node's address. }
- var o,p,q : ^node;
- ua : str;
- r : relation;
- begin
- stucase(a,ua);
- p := tree;
-
- repeat
- r := sbcomp(ua,p^.uref);
- if r<>equal then
- if r=less then q := p^.lson
- else q := p^.rson
- else q := p;
- o := p;
- p := q
- until (r=equal) or (p=nil);
-
- if r=equal then insert := p
- else begin { term doesn't exist in the tree }
- q := makenode(a,ua);
- if r=less then o^.lson := q
- else o^.rson := q;
- insert := q
- end;
- end;
-
- {-------------------------------------------------------------}
- { routines for tree-walking. These routines abstract the }
- { idea of an in-order tour of the tree into a single record. }
- { The usual algorithm for a walk is recursive (see J&W 11.5), }
- { which is not convenient for this program. }
- {-------------------------------------------------------------}
-
- procedure initwalk(t:pnode; var w:treewalk);
- { initialize for a walk over the given tree }
- begin
- w.current := t; { start at the top node, }
- w.goneleft := false; { ..but descend left first off }
- w.top := 0 { stack is empty }
- end;
-
- procedure push(pn: pnode; var w: treewalk);
- { push a given node onto the walk-stack }
- begin
- if w.top<maxdepth then begin
- w.top := w.top+1;
- w.stack[w.top] := pn
- end
- end;
-
- function pop(var w:treewalk) : pnode;
- { pop the top node from the walk-stack }
- begin
- if w.top>0 then begin
- pop := w.stack[w.top];
- w.top := w.top-1
- end
- else pop := nil
- end;
-
- function treestep(var w:treewalk) : pnode;
- { step to the next node in lexical order in a tree.
- return that node as result, and save it in the walk
- record as "current." Return nil if end of tree. }
- var t : pnode;
- begin
- t := w.current;
- repeat
- if not w.goneleft then begin { descend to the left }
- if t<> nil then
- while t^.lson<>nil do begin
- push(t,w);
- t := t^.lson
- end;
- w.goneleft := true { t^ a left-leaf of tree }
- end
- else { been down; have handled current; go up/right}
- if t<> nil then
- if t^.rson <> nil then begin
- t := t^.rson; { jog right, then }
- w.goneleft := false { drop down again }
- end
- else { nowhere to go but up }
- t := pop(w)
- until w.goneleft; { repeats when we jog right }
- w.current := t;
- treestep := t
- end;
-
- function setscan(tree: pnode; var w: treewalk; var a: str)
- : pnode;
- { given a partial term "a," a tree "tree," and a tree-
- walk record "w," set up w so that a series of calls on
- function treestep will return all the nodes that are
- initially equal to a in ascending order. If there are
- none such, return nil. This function sets up for Term
- Recall when the escape key is pressed during input.
-
- The algorithm is to find the matching term that is
- highest in the tree, then use treestep to find the
- lexically-least node under that term (which may not be
- a match) and then to treestep to the first match.}
-
- var ua : str;
- p,t : pnode;
- r : relation;
- quit : boolean;
- begin
- stucase(a,ua);
- initwalk(tree,w);
- t := tree;
- if t=nil then setscan := nil { no matches possible }
- else begin
- { step 1 is to find any part-equal node at all }
- quit := false;
- repeat
- r := sxcomp(ua,t^.uref);
- case r of
- less : if t^.lson<>nil then t := t^.lson
- else quit := true;
- more : if t^.rson<>nil then t := t^.rson
- else quit := true;
- equal : quit := true
- end
- until quit;
- { If we have a match, it may not be the least one.
- If this node has a left-son, there can be lesser
- matches (and nonmatches) down that branch. }
- if r<>equal then setscan := nil { no match a-tall }
- else begin
- w.current := t;
- if t^.lson=nil then w.goneleft := true
- else begin { zoom down in tree }
- w.goneleft := false;
- repeat
- t := treestep(w);
- r := sxcomp(ua,t^.uref)
- until r=equal
- end;
- setscan := t
- end
- end
- end;
-
- {-------------------------------------------------------------}
- { routines for phase 1 -- input }
- {-------------------------------------------------------------}
-
- procedure indent;
- { indent the cursor for the current nesting level }
- var i : 0..9;
- begin
- for i := 1 to indlevel do write('. . ')
- end;
-
- function readnc : nchar;
- { get one byte from the keyboard, bypassing the
- usual pascal procedures and going straight to CP/M }
- const bdos=5;
- inchar=1;
- asciicr=13;
- asciilf=10;
- type regs = record
- a : 0..255;
- bc,de,hl : integer
- end;
- var r : regs;
- procedure call(var x:regs; addr:integer); external;
- begin
- r.bc := inchar;
- call(r,bdos);
- readnc := r.a
- end;
-
- procedure getterm(tree: pnode; var a:str; var cont: boolean);
- { get a term from the user, with control keys used thus:
- cr : end the term.
- lf : end the term, begin a subterm of it.
- esc: try to complete the term with the next (first)
- matching term from the present tree-context.
- del: cancel esc-completion, return to original entry. }
- var
- c : nchar;
- j, oj : strindex;
- k : strlength;
- x,ua : str;
- quit : boolean;
- tw : treewalk;
- p : pnode;
-
- procedure backup;
- { backup the screen and the "a" string to the original
- term that was entered. }
- var qj : strindex;
- begin
- for qj := j downto (oj+1) do
- write(chr(asciibs),chr(asciiblank),chr(asciibs));
- j := oj;
- a.val[j] := nullch
- end;
-
- procedure startscan;
- { set up for an alphabetical scan over all terms that
- are an initial match to user entry thus far. Setscan
- does most of the work. }
- begin
- stucase(a,ua); { for stepscan's benefit }
- p := setscan(tree,tw,a);
- if p<>nil then { phony node only if a.len=0 }
- if p^.skip then p := treestep(tw);
- if p<>nil then begin { this node has to be equal }
- stget(p^.iref,x);
- k := x.len+1
- end
- else k := 0
- end;
-
- procedure stepscan;
- { find the next match to the original string, leaving
- its value in x, or k=0 if there is none. }
- begin
- k := 0;
- p := treestep(tw);
- if p<>nil then
- if p^.skip then p := treestep(tw);
- if p<>nil then
- if equal=sxcomp(ua,p^.uref) then begin
- stget(p^.iref,x);
- k := x.len+1
- end
- end;
-
- begin { the main Get Term procedure }
- indent; write('term: ');
- j := 1; oj := j; { no data in the a-string }
- k := 0; { no esc-scan working }
- quit := false; { not finished yet (hardly!) }
- repeat
- a.val[j] := nullch; { keep "a" a finished string }
- a.len := j-1; { ..at all times }
- c := readnc;
- case c of
-
- asciibs : { destructive backspace }
- if j>1 then begin
- write(chr(asciiblank),chr(asciibs));
- j := j-1;
- oj := j; { the current scan is accepted }
- k := 0; { ..and no scan is underway }
- end;
-
- asciicr : { normal completion }
- begin
- write(chr(asciilf));
- quit := true
- end;
-
- asciilf : { complete, move on to subterm }
- begin
- write(chr(asciicr));
- quit := true
- end;
-
- asciiesc : { automatic scan for match }
- begin
- backup; { wipe rejected match if any }
- if k=0 then startscan else stepscan;
- if k=0 then { no (further) match found }
- write(chr(asciibel))
- else { next (first?) match found }
- while j<k do begin
- a.val[j] := x.val[j];
- write(chr(a.val[j]));
- j := j+1
- end
- end;
-
- asciidel : { cancel search for match }
- begin
- backup;
- k := 0 { no active scan }
- end;
-
- else : { ordinary (?) character }
- if (c<asciiblank) or (j=strmax) then
- write(chr(asciibel))
- else begin
- a.val[j] := c;
- j := j+1;
- oj := j; { the current scan has been }
- k := 0 { ..accepted and is over }
- end
- end {case}
- until quit;
- cont := c=asciilf
- end;
-
- procedure getpage(var i: integer);
- { read a page number into an integer. If page numbers
- are not simple integers, eg "3-17" and the like, this
- routine would have to build a string. }
- begin
- indent;
- write('page: ');
- readln(i)
- end;
-
- procedure makepage(var p:ppage; i:integer);
- { make a page record and install its address }
- begin
- new(p);
- p^.next := nil;
- p^.num := i
- end;
-
- procedure addpage(np: pnode; pg: integer);
- { add a page number to the chain off a node. This is
- a classic case of an algorithm that requires a 2-exit
- loop; the scan of the chain has to stop when a higher
- page number is found OR when the end of the chain is
- reached. It could be done with Repeat or While, but
- it actually looks cleaner with Goto. }
- label 99,101,102,103;
- var p1, p2, p3: ppage;
- begin
- p1 := np^.phead;
- if p1=nil then makepage(np^.phead,pg)
- else { some pages already noted, search chain }
- if pg<p1^.num then begin
- makepage(p2,pg); { this page less than all }
- p2^.next := p1;
- np^.phead := p2
- end
- else begin { this page goes somewhere in chain }
- 99: p2 := p1^.next;
- if p2=nil then goto 101;
- if pg<p2^.num then goto 102;
- p1 := p2;
- goto 99;
- 101: { p1^ last number in chain, pg is => it }
- begin
- if pg>p1^.num then
- makepage(p1^.next,pg);
- goto 103
- end;
- 102: {p1^.num <= pg <p2^.num; pg goes between }
- begin
- if pg>p1^.num then begin
- makepage(p3,pg);
- p3^.next := p2;
- p1^.next := p3
- end
- end;
- 103: ;
- end
- end;
-
- procedure load(var atree:pnode);
- { input control: load terms into a tree from the keyboard.
- the code is recursive; if the user wants to do a subterm
- this routine calls itself to load the sub-tree of the
- superior term's node. A page number of zero is a disaster
- when we reload the saved tree, so one is converted to -1.}
- var aterm : str;
- anode : pnode;
- apage : integer;
- cont : boolean;
- begin
- repeat
- getterm(atree,aterm,cont);
- if aterm.len>0 then begin
- anode := insert(atree,aterm);
- if not cont then begin
- getpage(apage);
- if apage=0 then apage := 32767;
- addpage(anode,apage)
- end
- else begin { user hit lf, wants to recurse }
- if anode^.subt=nil then
- startree(anode^.subt);
- indlevel := indlevel+1;
- load(anode^.subt);
- indlevel := indlevel-1
- end
- end;
- until (aterm.len=0) or (indlevel>0)
- end;
-
- {-------------------------------------------------------------}
- { routines for phase 2 -- output }
- {-------------------------------------------------------------}
-
- procedure filenode(np: pnode; var oc: nchar);
- { write one node's contents, term + pages, to the output.
- It is at this level that we insert a blank line on a break
- in the sequence of main-term initial letters. Once more,
- a loop over an ordered chain is cleaner with Goto. }
- label 99;
- var a : str;
- p : ppage;
- i : 0..9;
- j : strindex;
- k1, k2 : integer;
- ic : nchar;
- begin
- if not np^.skip then begin { ignore phony nodes }
- stget(np^.iref,a);
- ic := upcase(a.val[1]);
- if (indlevel=0) and { main-term initial change? }
- (oc<>ic) then writeln(outfile);
- oc := ic;
- for i := 1 to indlevel do write(outfile,' ');
- for j := 1 to a.len do write(outfile,chr(a.val[j]));
- p := np^.phead;
- while p<>nil do begin
- write(outfile,' ');
- k1 := p^.num;
- k2 := k1+1;
- 99:p := p^.next; { elide sequential numbers }
- if p<>nil then
- if p^.num=k2 then begin
- k2 := k2+1;
- goto 99
- end;
- write(outfile,k1:1); { write "17" or "17-19" }
- if (k1+1)<k2 then write(outfile,'-',k2-1:1);
- if p<>nil then write(outfile,',');
- end;
- writeln(outfile);
- end
- end;
-
- procedure filetree(intree: pnode);
- { walk through a (sub-) tree and write each node }
- var tree : pnode;
- tw : treewalk;
- oc : nchar;
- begin
- oc := nullch;
- initwalk(intree,tw);
- tree := treestep(tw);
- while tree<>nil do begin
- filenode(tree,oc);
- if tree^.subt<>nil then begin
- indlevel := indlevel+1;
- filetree(tree^.subt);
- indlevel := indlevel-1
- end;
- tree := treestep(tw)
- end
- end;
-
- procedure dump;
- begin
- rewrite('INDEXER.OUT',outfile);
- filetree(maintree)
- end;
-
- {-------------------------------------------------------------}
- { routines for phase 0 -- initialization }
- {-------------------------------------------------------------}
-
- procedure init;
- { initialize the various mechanisms }
- begin
- indlevel := 0;
- new (sbufptrs[1]);
- sbufcnt := 1;
- sbufptrs[1]^.free := sbufsize;
- initerm.val[1] := ord('M');
- initerm.val[2] := nullch;
- initerm.len := 1;
- startree(maintree);
- end;
-
- procedure loadall;
- { if a saved-tree file INDEXER.TRE exists, load its values
- into the tree. }
- var loadtree : file of nchar;
- x : str;
- j : strindex;
- p : pnode;
- k : integer;
- k1,k2 : 0..255;
-
- procedure reload(t:pnode);
- { reload one (sub-)tree from the saved-tree file }
- { the recorded form of one node of a tree is:
- termlength (1..strmax-1),
- that many term bytes in reverse order,
- page numbers as high byte, low byte,
- page number of (zero,zero).
- the file is a sequence of terms as above. a tree ends
- with a byte of zero. a sub-tree is introduced with a
- byte of strmax. }
-
- begin {$r- range checks off during byte i/o }
- read(loadtree,j);
- while j<>nullch do begin
- x.len := j;
- for j := j downto 1 do read(loadtree,x.val[j]);
- x.val[x.len+1] := nullch;
- p := insert(t,x);
- repeat
- read(loadtree,k1,k2);
- k := (k1*256)+k2;
- if k<>0 then addpage(p,k)
- until k=0;
- read(loadtree,j);
- if j=strmax then begin { a sub-tree }
- startree(p^.subt);
- reload(p^.subt);
- read(loadtree,j)
- end
- end
- end; {$r+ }
-
- begin
- reset('INDEXER.TRE',loadtree);
- if not eof(loadtree) then reload(maintree)
- end;
-
- {-------------------------------------------------------------}
- { routines for phase 3 -- termination }
- {-------------------------------------------------------------}
-
- procedure saveall;
- { save the term-tree in the file INDEXER.TRE so it can
- be reloaded for additions later, if need be. }
- var savetree : file of nchar;
- x : str;
-
- procedure unload(t:pnode);
- { dump the contents of a (sub-) tree to disk in
- "preorder," a sequence such that the exact layout
- of the tree will be reconstructed if the tree is
- reloaded from the file. }
- label 99;
- var j : strindex;
- p : ppage;
- k : integer;
- k1, k2 : nchar;
- begin {$r- range checks off during byte i/o }
- if t^.skip then goto 99; { dump not the phony node }
- stget(t^.iref,x);
- write(savetree,x.len);
- for j:=x.len downto 1 do write(savetree,x.val[j]);
- p := t^.phead;
- while p<>nil do begin
- k := p^.num;
- k1 := k div 256; k2 := k mod 256;
- write(savetree,k1,k2);
- p := p^.next
- end;
- write(savetree,nullch,nullch); { flag end of pages }
- if t^.subt<>nil then begin
- write(savetree,strmax);{ flag start of subtree }
- unload(t^.subt);
- write(savetree,nullch) { flag end of subtree }
- end;
- 99: if t^.lson<>nil then unload(t^.lson);
- if t^.rson<>nil then unload(t^.rson);
- end; {$r+ }
-
- begin
- rewrite('INDEXER.TRE',savetree);
- unload(maintree);
- write(savetree,nullch) { flag end of main tree }
- end;
-
- {-------------------------------------------------------------}
- { The main program, at last..... }
- {-------------------------------------------------------------}
-
- begin
- init;
- loadall;
- load(maintree);
- saveall;
- dump
- end.
-