home *** CD-ROM | disk | FTP | other *** search
- {outline of declaration of subprograms:
-
- 1. program IndexText(InText, InIndex, NewIndex, OutIndex, HashFile,
- NewHashFile, input, output); (main program)
- 2. function Lt(u, v: word): Boolean;
- 3. procedure ReadWord(var f: text; var w: word);
- 4. procedure WriteWord(var f: text; w: word);
- 4a. built in CPU time function clock;
-
- 5. procedure SplitWords; (phase 1)
- 5a. function FindFile(ch: char): filecode;
- 6. function HashAddress(w: word): hashentry;
- 7. procedure Initialize;
- 8. procedure GetWord;
- 8a. procedure TellUserPage;
- 9. procedure GetChar(var ch: char);
- 10. procedure AddChar(ch: char);
- 11. procedure Conclude;
-
- 12. procedure ClassifyWords; (phase 2)
- 13. procedure BuildTree(var root: pointer; ch: char);
- 15. function Power2(c: integer): level;
- (the next three procedures are written in line.)
- 14. procedure Insert(p: pointer);
- 16. procedure FindRoot;
- 17. procedure ConnectSubtrees;
- 18. procedure GetNode(var p: pointer; ch: char);
- 19. procedure Process(r: reference);
- 20. procedure UpdateNode(p: pointer; r: reference);
- 21. procedure NewWord(var p: pointer; r: reference);
- 22. procedure InsertTree(r, p: pointer);
- 23. procedure OutputTree(p: pointer);
- 24. procedure PutNode(p: pointer);
- }
-
-
-
- program IndexText(InText, InIndex, NewIndex, HashFile, NewHashFile,
- input, output);
-
- {Produces word counts and list of references for the document file
- InText. Uses the master word list in file InIndex, if provided. Output word
- list for new text goes to file NewIndex. HashFile contains the common words
- to be ignored. If not specified, it is created on output, containing the
- words so flagged by the user.}
- {This implementation uses only phases 1 and 2. A smaller array of text files
- is also used, as specified in the exercise section.}
-
- const
- maxwd = 20; {More letters in word will be ignored.}
- minwd = 1; {Shorter words will be ignored.}
- hashsize = 2003; {should be a prime}
- linesperpage = 66; {assumes standard spacing and paper}
- maxheight = 20; {for building binary tree in phase 2}
- A = 'A';
- Z = 'Z';
- hyphen = '-';
- blank = ' ';
- apostrophe = ''''; {requires two `'s to represent one}
- underscore = '_';
- ordbackspace = 8; {ASCII control character for backspace}
- ordformfeed = 12; {ASCII control character for new page}
- changecase = 32; {ASCII difference between upper and lower case}
- nfiles = 8; {number of temporary files for unprocessed words}
- MaxRowLength = 130; {maximum length of output records}
-
- type
- word = packed array[1..maxwd] of char;
- reference = record
- wd: word;
- pg: integer; {count or page number}
- end;
- fileref = file of reference; {used for local files}
- letter = A..Z;
- hashentry = 1..hashsize;
- filecode = 1..nfiles;
-
- var
- InText, {document being processed}
- InIndex, {master word list}
- NewIndex, {word list of current document}
- HashFile,
- NewHashFile: text;
- RefFile: array[filecode] of fileref; {local files used for auxilary
- storage of words from phase 1 to phase 2:
- Normally, a separate file exist for each initial letter,
- this version uses nfiles files due operating system constraints.}
- blankword: word; {will contain all blanks}
-
- {The next two variables were originally declared in procedure SplitWords,
- they have been moved to this level in order to access them globally.}
- outcount: array[filecode] of integer; {counters for word files}
- wordcount: integer; {count of all words in the text}
-
- intextname,
- inlistname,
- newlistname,
- newhashname: word; {used to get filename from user}
- lastletter: array[filecode] of letter; {last letter in each file}
- PresentTime,
- StartTime: integer; {used to track CPU time}
- RowLength: integer; {ensures records will not exceed MaxRowLength}
-
-
- function Lt( u, v: word): Boolean;
- {Determains if word u precedes word v lexicographically.}
- begin
- Lt := (u < v)
- end;
-
- procedure ReadWord( var F: text; var w: word);
- {Reads word w from text file F. Assumes not at end of file.}
- {Uses packed array, replace using a loop if your system does not
- support packed arrays. }
- begin {procedure ReadWord}
- read(F, w)
- end; {procedure ReadWord}
-
- procedure WriteWord( var F: text; w: word);
- {Writes word w to text file F}
- {Uses packed array, replace using a loop if your system does not
- support packed arrays. }
- begin {procedure WriteWord}
- write(F, w)
- end; {procedure WriteWord}
-
- procedure SetTimer; {Call once at beginning of program execution.}
- {Finds the CPU time when called, and keeps in variables for reference.}
- {System dependent procedure.}
- begin
- PresentTime := clock;
- StartTime := PresentTime;
- end;
-
- function TotalTime: real;
- {Returns the total CPU time, in seconds, since call to SetTimer.}
- {System dependent procedure.}
- begin
- TotalTime := (clock - StartTime) / 1000.0;
- end;
-
- function ElapsedTime: real;
- {Returns elapsed CPU time since last call to function ElapsedTime,
- or call to SetTimer, whichever is more recent.}
- {System dependent procedure.}
- var r: integer;
- begin
- r := clock;
- ElapsedTime := (r - PresentTime) / 1000.0;
- PresentTime := r;
- end;
-
-
-
- procedure SplitWords;
- {sets up hash table, reads text, and divides into nfiles word lists}
-
- var
- hash: array[hashentry] of word; {hash table}
- pagecount: integer; {keeps the current page number}
- addpage: integer; {amount to increase pagecount after word}
- linecount: integer; {lines on the current page}
- w: word; {word currently being processed}
- x: hashentry; {location of w, if in hash table}
- endinput: Boolean; {true if and only if input has all been read}
- code: filecode; {into which file does word go?}
-
- {The following variables are kept for use in procedure GetWord, and for
- efficiency are set up only once in procedure Initialize:}
- backspace,
- formfeed: char;
- alphabet, {letters only - to start a word}
- contchar: set of char; {other characters ok in middle of word}
-
-
- function FindFile( ch: letter): filecode;
- {Uses binary decision tree to select one of nfiles = 8 files depending
- on the letter ch. These letters must be the same as those in the
- global array lastletter .}
- begin {function FindFile}
- if ch < 'M' then
- if ch < 'E' then
- if ch < 'C' then FindFile := 1
- else FindFile := 2
- else if ch < 'H' then FindFile := 3
- else FindFile := 4
- else if ch < 'S' then
- if ch < 'P' then FindFile := 5
- else FindFile := 6
- else if ch < 'T' then FindFile := 7
- else FindFile := 8
- end; {function FindFile}
-
-
- function HashAddress(w: word): hashentry; {modified from textbook}
- {calculates the location in hash table of word w, or, if not there,
- returns pointing to the blank word where w should go}
-
- var
- x, {calculated location}
- inc: integer; {increment for open addressing}
- begin {function HashAddress}
- x := abs(ord(w[1])*ord(w[2])+ord(w[4])+ord(w[6])) mod hashsize + 1;
- {Hash function assumes long word length. For short word machines
- we must ensure that the result is non-negative, and worry about overflow.}
-
- if (hash[x] <> w) and (hash[x] <> blankword) then
- begin
- inc := (abs(ord(w[3])-95) mod 29);
- {A key dependent increment is used to avoid clustering.}
- repeat
- inc := inc + 1;
- if inc > hashsize then
- writeln(w,' causes hash table to become full, infinite loop.');
- x := x + inc;
- if x > hashsize then x := x - hashsize;
- until (w = hash[x]) or (blankword = hash[x])
- end;
- HashAddress := x
- end; {function HashAddress}
-
-
- procedure Initialize;
- {sets up constant-valued sets for use in GetWord. Opens the text file
- and initializes various counters. Opens file holding hash table (if any),
- and reads or otherwise initializes table}
- var
- i: integer; {general purpose loop control}
-
- begin {procedure Initialize}
- backspace:= chr(ordbackspace);
- formfeed := chr(ordformfeed); {initialize ASCII control characters}
- alphabet := ['A'..'Z', 'a'..'z']; {letters only, to start a word}
- contchar := [hyphen, apostrophe, backspace, underscore];
- {characters which will not terminate word}
- for i := 1 to maxwd do
- blankword[i] := blank;
-
- write('Name of input text file?');
- ReadWord(input, intextname); readln;
- open(InText, intextname, readonly);
- reset(InText);
- endinput := eof(InText);
-
- repeat
- write( 'What is the page number on which the text begins?');
- readln(pagecount);
- if pagecount < 0 then
- writeln('Must be a non-negative integer.')
- until pagecount >= 0;
- linecount := 0;
- addpage := 0;
- wordcount := 0;
-
- for i := 1 to nfiles do
- begin
- rewrite( RefFile[i] );
- outcount[i] := 0
- end;
- lastletter[1] := 'B';
- lastletter[2] := 'D';
- lastletter[3] := 'G';
- lastletter[4] := 'L';
- lastletter[5] := 'O';
- lastletter[6] := 'R';
- lastletter[7] := 'S';
- lastletter[8] := 'Z';
-
- reset(HashFile); {assumes HASHFILE.DAT is in current directory}
- if eof(HashFile) then
- begin {There is no previous table; initialize the table to all blanks.}
- writeln('Cannot open file for hash table. Creating a new table.');
- for i := 1 to hashsize do
- hash[i] := blankword
- end
- else begin {Retrieve the previous hash table.}
- i := 0;
- repeat
- i := i + 1;
- hash[i] := HashFile^;
- get(HashFile)
- until eof(HashFile) or (i >= hashsize);
- if (not eof(HashFile)) or (i <> hashsize) then
- writeln('Error in reading hash table. Incorrect number of entries.')
- end
- end; {procedure Initialize}
-
-
-
- procedure GetWord( var w: word);
- {Gets words from input file InText, and returns only words
- at least minwd characters long. Parameter endinput becomes
- true if and only if the end of InText is reached with no word to return.
- the procedure also updates global variables wordcount and linecount,
- updates the global variable pagecount after each linesperpage cr's,
- or after each formfeed, whichever comes first, and
- uses the sets alphabet and contchar and various character constants.}
-
- label 1; {used by GetChar to exit procedure upon eof(InText)}
-
- var c: 0..maxwd; {count of characters in word}
- ch: char; {character currently processed}
- endln: Boolean; {at the end of a line?}
-
-
- procedure TellUserPage; {keep the user informed of progress}
- var i: integer;
- begin
- i := pagecount + addpage;
- writeln('At page', i:4, ' word count is', wordcount:7)
- end;
-
-
- procedure GetChar(var ch: char);
- {gets a character from input text into ch; checks for eof; updates
- page count and line count}
-
- begin {procedure GetChar}
- if eof(InText) then
- if c >= minwd then
- ch := '.' {special character to end the current word}
- else begin {no word to return; set endinput}
- endinput := true;
- goto 1 {exit from GetWord.}
- end
- else begin {not end of file: process next character}
- while InText^ in [underscore, backspace] do
- get( InText);
- ch := InText^;
- endln := eoln(InText);
- get(InText);
- if endln then
- begin
- linecount := linecount + 1;
- if linecount >= linesperpage then
- begin
- addpage := addpage + 1;
- linecount := 0;
- TellUserPage
- end
- end;
- if ch = formfeed then
- begin
- addpage := addpage + 1;
- linecount := 0;
- TellUserPage;
- endln := true; {Treat formfeed like end of line.}
- ch := blank
- end
- end
- end; {procedure GetChar}
-
-
- procedure AddChar(ch: char);
- {adds given character to word, if possible}
- begin {procedure AddChar}
- if c < maxwd then
- begin
- c := c + 1;
- w[c] := ch
- end
- end; {procedure AddChar}
-
-
- begin {procedure GetWord}
- repeat {until current word is at least minwd chars long}
- c := 0;
- repeat
- GetChar(ch) {Find a letter which will start the word.}
- until ch in alphabet;
- pagecount := pagecount + addpage;
- addpage := 0;
- if ch in ['a'..'z'] then {translate first letter to upper case.}
- ch := chr(ord(ch) - changecase); {assumes ASCII ordering of letters}
- AddChar(ch); {put first letter into the word}
- GetChar(ch);
- while (ch in alphabet) or (ch in contchar) do
- if ch in alphabet then {add letters directly to word}
- begin {processing letter}
- AddChar(ch);
- GetChar(ch)
- end {processing letter}
- else if ch = hyphen then
- begin {processing hyphen}
- GetChar(ch); {Find what comes after hyphen.}
- if endln then
- while ch = ' ' do
- GetChar(ch) {Delete both the hyphen and the end of line}
- else if ch = hyphen then {Two hyphens form a dash; ends word}
- ch := blank {Use a blank to terminate the word.}
- else if ch in alphabet then
- AddChar(hyphen) {Include other hyphens in word}
- else {nothing}
- end {processing hyphen}
- else if ch = apostrophe then
- begin {processing apostrophe}
- GetChar(ch);
- if ch = 's' then {Delete `'s' at end of word only}
- begin
- GetChar(ch);
- if ch in contchar then
- begin
- AddChar(apostrophe);
- AddChar('s')
- end
- end
- else if ch in alphabet then
- AddChar(apostrophe) {Allow contractions.}
- end {processing apostrophe}
- else {Remaining possibilities are backspace and underscore.}
- GetChar(ch); {Delete these characters.}
- {While loop on continuing characters ends here.}
- wordcount := wordcount + 1
- until c >= minwd; {Skip over short words.}
-
- while c < maxwd do {Fill with blanks.}
- begin
- c := c + 1;
- w[c] := blank
- end;
- 1: {When end of file occurs, program will exit to here from GetChar}
- end; {procedure GetWord}
-
-
-
- procedure Conclude;
- {Writes out counts of various word lists. For some systems, it is
- necessary to close files, which should be done here.}
- var
- i: integer; {loop index}
- begin {procedure Conclude}
- writeln('The total number of words read in is ', wordcount:7);
- writeln;
- writeln('The number of words to process further in the next stage,');
- writeln('in each temporary file, is below.');
- writeln(' a-b c-d e-g h-l m-o p-r s t-z');
- for i := 1 to nfiles do
- write(outcount[i]:8);
- writeln;
- writeln
- end; {procedure Conclude}
-
-
-
- begin {procedure SplitWords}
- Initialize; {sets up files, hash table, constants}
- GetWord(w); {obtain a single word from InText}
- while not endinput do
- begin
- x := HashAddress(w);
- if w <> hash[x] then
- begin
- code := FindFile( w[1] );
- outcount[code] := outcount[code] + 1;
- with RefFile[code]^ do
- begin
- wd := w;
- pg := pagecount
- end;
- Put(RefFile[code])
- end;
- GetWord(w)
- end;
- Conclude {writes word counts to output.}
- end; {procedure SplitWords}
-
-
-
-
-
- {start of phase 2}
-
- procedure ClassifyWords;
- {For each letter of the alphabet, the procedure reads in a list of
- words from InIndex, builds them into a binary tree, supplements it
- with entries from RefFile, and writes the result to files NewIndex
- and NewHashFile.}
-
- type
- wordtype = (hash, count, page, question, index); {ways to process a word}
- pointref = ^reflist;
- reflist = record {list of references}
- pg: integer;
- next: pointref
- end;
- pointer = ^node;
- node = record {vertex of the binary tree}
- wd: word;
- left,
- right: pointer;
- ct: integer;
- case kind: wordtype of
- hash, count:
- ();
- page, question, index:
- (ref: pointref)
- end;
- var
- root: pointer; {root of binary tree}
- code: filecode; {loop through temporary files}
- endlist: Boolean; {at end of input word list?}
- i: integer; {general purpose loop variable}
-
-
-
- procedure BuildTree(var root: pointer; code: filecode);
-
- {Reads a sequential file in alphabetical order, and converts it into
- a binary search tree. Stops reading when the first letter of word
- is after lastletter[code].
- const maxheight = 20 (in main program) allows 512k entries.}
-
- {This procedure was modified slightly to fit the needs of this application.
- The parameters of GetNode now include a character ch, which has also
- been introduced as a local variable.}
-
- type
- level = -1 .. maxheight; {number of steps above leaves}
-
- var
- lastnode: array[level] of pointer; {contains pointer to
- last node processed on each level}
- counter: integer; {number of nodes read in so far}
- p: pointer; {p^ is present input node}
- lev: level; {level of p^}
- ch: char; {will be last letter to be processed.}
-
-
- function Power2(c: integer): level;
- {finds the highest power of 2 which divides c}
- var
- lev: level;
- begin {function Power2}
- lev := 0;
- while not odd(c) do
- begin
- c := c div 2;
- lev := lev + 1
- end;
- Power2 := lev
- end; {function Power2}
-
-
- procedure Insert(p: pointer);
- {Inserts p^ as rightmost node of a partial binary search tree.}
- var
- lev: level; {level of p^}
- begin {Procedure Insert}
- lev := Power2(counter);
- p^.right := nil;
- p^.left := lastnode[lev - 1];
- lastnode[lev] := p;
- if lastnode[lev + 1] <> nil then
- with lastnode[lev + 1]^ do
- if right = nil then right := p
- end; {Procedure Insert}
-
-
- procedure FindRoot;
- var
- lev: level;
- begin {Procedure FindRoot}
- if counter = 0 then
- root := nil {Tree is empty.}
- else begin {Non-empty tree}
- lev := maxheight; {Find the highest occupied level; it gives the root}
- while lastnode[lev] = nil do lev := lev - 1;
- root := lastnode[lev]
- end
- end; {Procedure FindRoot}
-
-
- procedure ConnectSubtrees;
- var
- p: pointer;
- lev: level;
- s: level;
- begin {Procedure ConnectSubtrees}
- lev := maxheight;
- while (lastnode[lev] = nil) and (lev > 1) do
- lev := lev - 1; {Find the highest node: root}
- while lev > 1 do {Nodes on levels 1 and 0 are already OK}
- with lastnode[lev]^ do
- if right <> nil then
- lev := lev - 1 {Search down for the highest dangling node}
- else begin {Case: right subtree is undefined.}
- p := left; {Find the highest entry in lastnode that}
- s := lev - 1; {is not in the left subtree.}
- repeat
- p := p^.right;
- s := s - 1
- until (p = nil) or (p <> lastnode[s]);
- right := lastnode[s];
- lev := s {Nodes on levels between lev and s are on the left.}
- end {Connecting dangling subtrees}
- end; {Procedure ConnectSubtrees}
-
-
- procedure GetNode( var p: pointer; ch: char);
- {reads a word from file InIndex and sets node correspondingly}
- {returns p = nil at eof or when next word starts later than code.}
- var
- wordcode: char; {letter indicating type of word}
-
- begin {procedure GetNode}
- while InIndex^ = '&' do {ignore lines starting with '&'}
- readln(InIndex);
- while (not eof(InIndex)) and (InIndex^ = blank) do
- get(InIndex); {Skip all leading blanks}
- if endlist or eof(InIndex) then
- p := nil
- else if InIndex^ > ch then
- p := nil
- else begin
- new(p);
- with p^ do begin
- ReadWord(InIndex, wd);
- while (InIndex^ = ' ') and (not eoln(InIndex)) do
- get(InIndex);
- read(InIndex, wordcode);
- ct := 0;
- if wordcode in ['C', 'H','I','P','?'] then
- case wordcode of
- 'C': kind := count;
-
- 'H': begin
- writeln('Warning: The input word list contains ', wd);
- writeln(' which belongs in the hash table.');
- kind := hash
- end;
-
- 'I': begin kind := index; ref := nil end;
- 'P': begin kind := page; ref := nil end;
- '?': begin
- writeln('Questionable word: ', wd, ' in word list.');
- write('New category (P, I, C, H, ?');
- repeat
- readln(wordcode);
- if wordcode > 'Z' then
- wordcode := chr(ord(wordcode) - changecase)
- until wordcode in ['H','C','P','?','I'];
- case wordcode of
- 'H': kind := hash;
- 'C': kind := count;
- 'P', ' ': kind := page;
- '?': kind := question;
- 'I': kind := index
- end;
- if kind in [page, question, index] then ref := nil
- end
- end
- else
- writeln('Erroneous word code ', wordcode, ' in file InIndex.')
- end; {with statement setting up the node}
- readln(InIndex); {Advance to the start of the next entry.}
- endlist := eof(InIndex)
- end
- end; {procedure GetNode}
-
-
- begin {procedure BuildTree}
- for lev := -1 to maxheight do lastnode[lev] := nil;
- counter := 0;
- ch := lastletter[code];
- GetNode(p, ch);
- while p <> nil do
- begin
- counter := counter + 1;
- Insert(p);
- GetNode(p, ch)
- end; {reading and processing input}
- FindRoot;
- ConnectSubtrees
- end; {procedure BuildTree}
-
-
-
- procedure Process( r: reference);
- {Takes the word and page reference r, and updates the binary tree.}
- var
- p: pointer; {trace through the tree}
- found: Boolean; {Is the word in the tree?}
-
-
- procedure UpdateNode( p: pointer; r: reference);
- {uses reference r to update information in node p^}
-
- var
- q: pointref; {used to add reference to list}
- begin {procedure UpdateNode}
- with p^ do
- begin
- ct := ct + 1;
- if kind in [page, question, index] then
- if ref = nil then
- begin
- new(ref);
- ref^.pg := r.pg;
- ref^.next := nil
- end
- else if ref^.pg <> r.pg then
- begin {add the new reference to list.}
- new(q);
- q^.pg := r.pg;
- q^.next := ref;
- ref := q
- end
- end {with statement to update tree}
- end; {procedure UpdateNode}
-
-
- procedure NewWord(var p: pointer; r: reference);
- {Creates a node for the first occurrence of a new reference r. A
- pointer to the new node is returned in p.}
-
- var
- response: char; {answer received from user}
- begin {procedure NewWord}
- new(p);
- with p^ do
- begin
- wd := r.wd;
- left := nil;
- right := nil;
- ct := 1;
-
- kind := question;
- repeat {ask user what kind of word}
- WriteWord(output, wd);
- write(' is (H, C, P, ?, I)?');
- readln(response);
- if response > 'Z' then response := chr(ord(response) - changecase)
- until response in ['H', 'C', 'P', ' ', '?', 'I'];
- case response of
- 'H': kind := hash;
- 'C': kind := count;
- 'P', ' ': kind := page;
- '?': begin
- kind := question;
- writeln('First occurence of word is on page', r.pg:5, '.')
- end;
- 'I': kind := index
- end; {case statement}
- if kind in [page, question, index] then
- begin
- new(ref);
- ref^.pg := r.pg;
- ref^.next := nil;
- end
- end {with statement}
- end; {procedure NewWord}
-
-
- procedure InsertTree(r, p: pointer);
- {adds a node p^ to the tree with root r^; requires that r <> nil
- and p^ not be in the tree; proceeds by recursion}
-
- begin {procedure InsertTree}
- if Lt(p^.wd, r^.wd) then
- if r^.left = nil then r^.left := p
- else InsertTree(r^.left, p)
- else
- if r^.right = nil then r^.right := p
- else InsertTree(r^.right, p)
- end; {procedure InsertTree}
-
-
- begin {procedure Process}
- if root = nil then {The tree might be empty.}
- NewWord(root, r)
- else begin {case of non-empty tree}
- p := root; {Begin a tree search.}
- found := false;
- repeat
- if r.wd = p^.wd then
- found := true
- else if Lt(r.wd,p^.wd) then
- p := p^.left
- else
- p := p^.right
- until found or (p = nil);
-
- if found then UpdateNode(p, r)
- else begin {p^ was not found: add it to the tree.}
- NewWord(p, r);
- InsertTree(root, p)
- end
- end
- end; {procedure Process}
-
-
- procedure OutputTree( p: pointer);
- {traverses the tree for which p^ is the root in inorder}
-
- procedure PutNode( p: pointer);
- {Puts the information in p^ into the file NewIndex.}
-
- var
- q: pointref; {used to traverse list of references}
- response: char;
- begin {procedure PutNode}
- with p^ do if ct > 0 then
- begin {Otherwise, word is not in document.}
- if kind <> hash then
- WriteWord(NewIndex, wd);
- case kind of
- hash: begin {new hash entries written to NewHashFile}
- WriteWord(NewHashFile, wd);
- writeln(NewHashFile)
- end;
- count: write(NewIndex, 'C');
- page: write(NewIndex, 'P');
- index: write(NewIndex, 'I');
- question:
- begin
- repeat {ask user what kind of word}
- WriteWord(output, wd);
- write(' is questionable. Change to (h, c, p, ?, i)?');
- readln(response);
- if response > 'Z' then response := chr(ord(response) - changecase)
- until response in ['H', 'C', 'P',' ', '?', 'I'];
- case response of
- 'H': begin kind := hash; write(NewIndex, 'H') end;
- 'C': begin kind := count; write(NewIndex, 'C') end;
- 'P', ' ': begin kind := page; write(NewIndex, 'P') end;
- 'I': begin kind := index; write(NewIndex, 'I') end;
- '?': begin
- kind := question;
- write(NewIndex, '?');
- write('The word appears on the following page(s)');
- q := ref;
- repeat
- write(q^.pg:6);
- q := q^.next
- until q = nil;
- writeln
- end {case of questionable word}
- end {case response statement}
- end {treating new or question words}
- end; {case kind statement}
- if kind <> hash then
- write(NewIndex, ct:6);
- if kind in [page, question, index] then
- begin
- q := ref;
- RowLength := 28; {ensures that record will not exceed desired length}
- repeat
- if RowLength > (MaxRowLength - 4) then
- begin
- writeln(NewIndex);
- write(NewIndex,'& '); {& indicates continuation of index}
- RowLength := 3
- end;
- write( NewIndex, q^.pg:4);
- q := q^.next;
- RowLength := RowLength + 4
- until q = nil;
- end;
- if kind <> hash then
- writeln( NewIndex )
- end {with statement and if statement}
- end; {procedure PutNode}
-
-
- begin {procedure OutputTree}
- if p <> nil then
- with p^ do
- begin
- OutputTree(left); {Traverse the left subtree}
- PutNode(p);
- OutputTree(right); {Traverse the right subtree}
- dispose(p)
- end
- end; {procedure OutputTree}
-
-
-
- begin {procedure ClassifyWords}
-
- write('Name of input word list ?');
- ReadWord(input, inlistname);
- readln;
- open(InIndex, inlistname, readonly);
- reset(InIndex);
- endlist := eof(InIndex);
-
- write('Name of output word list ?');
- ReadWord(input, newlistname);
- readln;
- open(NewIndex, newlistname);
- rewrite(NewIndex);
-
- writeln('Rewriting NEWHASHFILE.DAT to contain all new hash words.');
- rewrite(NewHashFile);
-
- writeln('At the appearance of each word, indicate its disposition:');
- writeln(' H - Place this word in hash table and count its frequency.');
- writeln(' C - Count how many times this word appears.');
- writeln(' P - List pages on which this word appears.');
- writeln(' ? - Question this word: list pages on which it appears.');
- writeln(' I - Index this word: list pages on which it appears.');
-
- for code := 1 to nfiles do {start main loop through temporary files.}
- begin
- BuildTree(root, code); {Get the part of master wordlist starting with
- code from the file InIndex, and build it into a binary tree.}
- reset(RefFile[code]);
- for i := 1 to outcount[code] do
- begin
- Process(RefFile[code]^);
- {use new words from RefFile[code] to update the tree.}
- get( RefFile[code] )
- end;
-
- OutputTree(root)
- {write the contents of the tree into file NewIndex.}
- end {main loop on temporary files}
- end; {procedure ClassifyWords}
-
- {end of all procedures}
-
-
-
- begin {main program}
- SetTimer;
- SplitWords; {Phase 1}
- writeln('Time in first phase is ', ElapsedTime:7:1, ' seconds.');
- writeln;
-
- ClassifyWords; {Phase 2}
- writeln('Time in second phase is', ElapsedTime:7:1, ' seconds.');
-
- writeln;
- writeln('Processing of input document ', intextname, ' is complete.');
- writeln('Total time in program was ', TotalTime:7:1, ' seconds.')
- end.
-
-