home *** CD-ROM | disk | FTP | other *** search
-
- {
- 1. Program IndexText(InText, InIndex, NewIndex, OutIndex,
- HashFile, Input, Output);
- 2. Function Lt(u, v: word): Boolean;
- 3. Procedure ReadWord(var F: text; var w: word);
- 4. Procedure WriteWord(var F: text; w: word);
-
- 5. Procedure SplitWords; phase 1
- 6. Function HashAddress(w: word): hashentry;
- 7. Procedure Initialize;
- 8. Procedure GetWord;
- 9. Procedure GetChar(var ch: char);
- 10. Procedure AddChar(ch: char);
- 11. Procedure Conclude;
-
- 12. Procedure ClassifyWords; phase 2
- 13. Procedure BuildTree(var root: pointer; ch: char);
- 14. Procedure Insert(p: pointer);
- 15. Function Power2(c: integer): level;
- 16. Procedure FindRoot;
- 17. Procedure ConnectSubtrees;
- 18. Procedure GetNode(var p: pointer; ch: char);
- 19. Procedure Process(r: reference);
- 20. Procedure UpdateNode(p: pointer; r: reference);
- 21. Procedure NewWord(var p: pointer; r: reference);
- 22. Procedure InsertTree(r, p: pointer);
- 23. Procedure OutputTree(p: pointer);
- 24. Procedure PutNode(p: pointer);
-
- 25. Procedure UpdateHashFile; phase 3
- 26. Function HashAddress(w: word): hashentry;
- 27. Procedure MergeIndices;
- 28. Procedure CopyLine
- }
-
-
- Program IndexText(InText, InIndex, NewIndex, OutIndex, HashFile,
- NewHashFile, Input, Output);
- {Produces word counts and list of references for the document file InText.
- Uses the master word list in file InIndex, if provided.
- Output word list for the new text goes to file NewIndex.
- The merger of these two files becomes OutIndex.
- HashFile contains the common words to be ignored. If not specified, it is
- created on output, containing the words so flagged by the user.}
- Const
- maxwd = 20; {More letters in a word will be ignored.}
- minwd = 3; {Shorter words will be ignored}
- hashsize = 2003; {should be a prime; size of hash table}
- linesperpage = 66; {assumes standard spacing and paper}
- maxheight = 20; {for building binary tree in phase 2}
- A = 'A';
- Z = 'Z';
- hyphen = '-';
- blank = ' ';
- apostrophe = ''''; {requires two apostrophes to represent one}
- underscore = '_';
- ordbackspace = 8; {ASCII control character for backspace}
- ordformfeed = 12; {ASCII control character for new page}
- changecase = 32; {ASCII difference between upper and lower case}
- Type
- word = packed array[1..maxwd] of char;
- reference = record
- wd: word;
- pg: integer; {page number}
- end;
- fileref = file of reference; {used for local files}
- letter = A..Z;
- hashentry = 1..hashsize;
- Var
- InText, {document being processed}
- InIndex, {master word list}
- NewIndex, {word list of current document}
- OutIndex: text; {updated master word list}
- HashFile,
- NewHashFile: file of word; {local file, used to update HashFile}
- RefFile: array[letter] of fileref; {local files used for
- auxiliary storage of words from phase 1 to phase 2:
- separate file for each initial letter}
- blankword: word; {will contain all blanks}
- outcount: array[letter] of integer; {counters for word files}
- wordcount: integer; {count of all words in the text}
-
-
-
- Function Lt(u,v: word): Boolean;
- {Determine if word u precedes word v lexicographically.}
- Var
- i: 1..maxwd; {loop variable}
- Begin {function Lt}
- i := 1;
- While (i < maxwd) and (u[i] = v[i]) do i := i + 1;
- Lt := (u[i] < v[i])
- {Above is version that works with ASCII code. For codes where blank comes
- after letters, modifications are necessary.}
- End; {function Lt}
-
-
- Procedure ReadWord( var F: text; var w: word);
- {reads word w from text file F; assumes not at end of file}
- Var
- c: 1..maxwd;
- Begin {procedure ReadWord}
- For c := 1 to maxwd do
- read(F, w[c])
- End; {procedure ReadWord}
-
-
-
- procedure WriteWord(var F: text; w: word);
- {writes word w to text file F}
- var
- c: 1..maxwd;
- begin
- for c := 1 to maxwd do
- write(F, w[c])
- end;
-
-
-
-
-
- {Phase 1: Splitting the Text into Words}
-
-
- Procedure SplitWords;
- {sets up hash table, reads text, and divides into 26 word lists}
- Var
- hash: array[hashentry] of word; {hash table}
- pagecount, {keeps the current page number}
- addpage, {amount to increase pagecount after word}
- linecount: integer; {line number on the current page}
- w: word; {word currently being processed}
- x: hashentry; {location of w, if in hash table}
- endinput: Boolean; {true if and only if input has all been read}
- firstletter: char; {Into which file does word w go?}
- {The following are kept for use in procedure GetWord,
- and for efficiency are set up only once in procedure Initialize.}
- backspace,
- formfeed: char; {ASCII control characters}
- contchar, {characters OK in the middle of a word}
- alphabet: set of char; {letters only --- to start a word}
- {Implementation dependent: A good Pascal compiler should allow "set of char";
- otherwise, a restricted range is required.}
-
-
-
- function HashAddress(w: word): hashentry; {modified from the textbook}
- {calculates the location in hash table of word w, or, if not there,
- returns pointing to the blank word where w should go}
-
- var
- x, {calculated location}
- inc: integer; {increment for open addressing}
- begin {function HashAddress}
- x := abs(ord(w[1])*ord(w[2])+ord(w[4])+ord(w[6])) mod hashsize + 1;
- {Hash function assumes long word length. For short word machines
- we must ensure that the result is non-negative, and worry about overflow.}
-
- if (hash[x] <> w) and (hash[x] <> blankword) then
- begin
- inc := (abs(ord(w[3])-95) mod 29);
- {A key dependent increment is used to avoid clustering.}
- repeat
- inc := inc + 1;
- if inc > hashsize then
- writeln(w,' causes hash table to become full, infinite loop.');
- x := x + inc;
- if x > hashsize then x := x - hashsize;
- until (w = hash[x]) or (blankword = hash[x])
- end;
- HashAddress := x
- end; {function HashAddress}
-
-
- Procedure Initialize;
- {sets up constant-valued sets for use in GetWord;
- opens the text file and initializes various counters;
- opens file holding hash table (if any), and reads or
- otherwise initializes the table.}
- Var
- ch: char; {used as an index}
- i: integer; {general--purpose loop control}
- Begin {procedure Initialize}
- backspace:= chr(ordbackspace);
- formfeed := chr(ordformfeed); {Initialize ASCII control characters.}
- alphabet := ['A'..'Z', 'a'..'z']; {letters only, to start a word}
- contchar := alphabet + [hyphen, apostrophe, backspace, underscore];
- {characters that will not terminate the word}
- For i := 1 to maxwd do
- blankword[i] := blank;
- reset(InText);
- endinput := eof(InText);
- Repeat
- write( 'What is the page number on which the text begins?');
- readln(pagecount);
- if pagecount < 0 then
- writeln('Must be a nonnegative integer.')
- until pagecount >= 0;
- linecount := 0;
- addpage := 0;
- wordcount := 0;
- For ch := A to Z do
- Begin
- Rewrite( RefFile[ch] );
- Outcount[ch] := 0
- End;
- reset(HashFile);
- if eof(HashFile) then
- begin {There is no previous table; initialize the table to all blanks.}
- writeln('Cannot open file for hash table. Creating a new table.');
- for i := 1 to hashsize do
- hash[i] := blankword
- end
- else begin {Retrieve the previous hash table.}
- i := 0;
- repeat
- i := i + 1;
- hash[i] := HashFile^;
- get(HashFile)
- until eof(HashFile) or (i >= hashsize);
- if (not eof(HashFile)) or (i <> hashsize) then
- writeln('Error in reading hash table. Incorrect number of entries.')
- end
- end; {procedure Initialize}
-
-
-
- Procedure GetWord( var w: word);
- {Gets words from input file InText, and returns only words
- at least minwd characters long. Parameter endinput becomes
- true if and only if the end of InText is reached with no word to return.
- This parameter is set by the subsidiary procedure GetChar.
- The procedure also updates global variables wordcount and linecount,
- updates the global variable pagecount after each linesperpage cr's,
- or after each formfeed, whichever comes first, and
- uses the sets alphabet and contchar and various character constants.}
- label 1; {used by GetChar to exit procedure on eof(InText)}
- Var c: 0..maxwd; {count of characters in word}
- ch: char; {character currently processed}
- endln: Boolean; {At the end of a line?}
-
-
- Procedure GetChar(var ch: char);
- {gets a character from input text into ch; checks for eof;
- updates page count and line count}
- Begin {procedure GetChar}
- If eof(InText) then
- If c >= minwd then
- ch := '.' {special character to end the current word}
- Else begin {no word to return; set endinput}
- endinput := true;
- goto 1 {Exit from GetWord.}
- End
- Else begin {not at end of file: process next character}
- ch := InText^;
- endln := eoln(InText);
- get(InText);
- If endln then
- Begin
- linecount := linecount + 1;
- If linecount >= linesperpage then
- Begin
- addpage := addpage + 1;
- linecount := 0
- End
- End;
- If ch = formfeed then
- Begin
- addpage := addpage + 1;
- linecount := 0;
- endln := true; {Treat formfeed like end of line.}
- ch := blank
- End
- End
- End; {procedure GetChar}
-
-
-
- Procedure AddChar(ch: char);
- {adds given character to word, if possible}
- Begin {procedure AddChar}
- If c < maxwd then
- Begin
- c := c + 1;
- w[c] := ch
- End
- End; {procedure AddChar}
-
-
-
- begin {procedure GetWord}
- repeat {until current word is at least minwd chars long}
- c := 0;
- repeat
- GetChar(ch) {Find a letter that will start the word.}
- until ch in alphabet;
- pagecount := pagecount + addpage;
- addpage := 0;
- If ch in ['a'..'z'] then {Translate the first letter to uppercase.}
- ch := chr(ord(ch) - changecase); {system dependent}
- AddChar(ch); {Put first letter into the word.}
- GetChar(ch);
- While ch in contchar do
- If ch in alphabet then {Add letters directly to word.}
- Begin {processing letter}
- AddChar(ch);
- GetChar(ch)
- End {processing letter}
- Else If ch = hyphen then
- Begin {processing hyphen}
- GetChar(ch); {Find what comes after hyphen.}
- If endln then
- GetChar(ch) {Delete both the hyphen and the end of line.}
- Else if ch = hyphen then {Two hyphens represent a dash.}
- ch := blank {Use a blank to terminate the word.}
- Else If ch in alphabet then
- AddChar(hyphen) {Include hyphens between letters}
- Else {nothing} {Delete all other hyphens}
- End {processing hyphen}
- Else if ch = apostrophe then
- Begin {processing apostrophe}
- GetChar(ch);
- If ch = 's' then {Delete 's at end of word only.}
- Begin
- GetChar(ch);
- If ch in contchar then
- Begin
- AddChar(apostrophe);
- AddChar('s')
- End
- End
- Else if ch in alphabet then
- AddChar(apostrophe) {Allow contractions.}
- End {processing apostrophe}
- Else {Remaining possibilities are backspace and underscore.}
- GetChar(ch); {Delete these characters.}
- {while loop on continuing characters ends here.}
- wordcount := wordcount + 1
- Until c >= minwd; {Skip over short words.}
- While c < maxwd do {Fill with blanks.}
- Begin
- c := c + 1;
- w[c] := blank
- End;
- 1: {When end of file occurs, program will exit to here from GetChar.}
- End; {procedure GetWord}
-
-
-
- Procedure Conclude;
- {Writes out counts of various word lists. For some systems, it is
- necessary to close files, which should be done in this procedure.}
- Var
- ch: char; {loop index}
- Begin {procedure Conclude}
- writeln('The total number of words read in is ', wordcount:7);
- writeln;
- writeln('The number of words to process further in the next stage,');
- writeln('beginning with each letter, is below.');
- writeln;
- for ch := 'A' to 'M' do write(' ', ch:1, ' ');
- writeln;
- for ch := 'A' to 'M' do write(outcount[ch]:4, ' ');
- writeln;
- writeln;
- for ch := 'N' to 'Z' do write(' ', ch:1, ' ');
- writeln;
- for ch := 'N' to 'Z' do write(outcount[ch]:4, ' ');
- writeln;
- writeln
- End; {procedure Conclude}
-
-
- Begin {procedure SplitWords}
- Initialize; {sets up files, hash table, constants}
- GetWord(w); {obtains a single word from InText}
- While not endinput do
- Begin
- x := HashAddress(w);
- If w <> hash[x] then
- Begin {Not in hash table; put into RefFile.}
- firstletter := w[1];
- outcount[firstletter] := outcount[firstletter] + 1;
- With RefFile[firstletter]^ do
- Begin
- wd := w;
- pg := pagecount
- End;
- Put(RefFile[firstletter])
- End;
- GetWord(w)
- End;
- Conclude {writes word counts to Output}
- End; {procedure SplitWords}
-
-
-
-
-
- {Phase 2: Classifying the words}
-
-
- Procedure ClassifyWords;
- {For each letter of the alphabet, the procedure reads in a list of words from
- InIndex, builds them into a binary tree, supplements it with entries from
- RefFile, and writes result to NewIndex and NewHashFile.}
- Type
- wordtype = (hash, count, index); {three ways to process a word}
- pointref = ^reflist;
- reflist = record {list of references}
- pg: integer;
- next: pointref
- end;
- pointer = ^node;
- node = record {vertex of the binary tree}
- wd: word;
- left,
- right: pointer;
- case kind: wordtype of
- hash:
- (); {empty}
- count:
- (ct: integer);
- index:
- (ref: pointref)
- end;
- Var
- root: pointer; {root of the binary tree}
- ch: char; {Loop on the first letter of word.}
-
-
-
- procedure BuildTree(var root: pointer; ch: char);
- {Uses an auxiliary procedure GetNode(p) to obtain a list of items in
- proper order of keys, and builds them into a binary search tree.}
- const
- maxheight = 20;
- type
- level = -1 .. maxheight; {number of steps above leaves}
- var
- lastnode: array[level] of pointer; {contains a pointer to
- the last node processed on each level}
- counter: integer; {number of nodes read in so far}
- p: pointer; {p^ is the present input node}
- lev: level; {level of p^}
-
-
- function Power2(c: integer): level;
- {Finds the highest power of 2 that divides c. Requires c <> 0.}
- var
- lev: level;
- begin {Function Power2}
- lev := 0;
- while not odd(c) do
- begin c := c div 2; lev := lev + 1 end;
- Power2 := lev
- end; {Function Power2}
-
-
- procedure Insert(p: pointer);
- {Inserts p^ as rightmost node of a partial binary search tree.}
- var
- lev: level; {level of p^}
- begin {Procedure Insert}
- lev := Power2(counter);
- p^.right := nil;
- p^.left := lastnode[lev - 1];
- lastnode[lev] := p;
- if lastnode[lev + 1] <> nil then
- with lastnode[lev + 1]^ do
- if right = nil then right := p
- end; {Procedure Insert}
-
-
- procedure FindRoot;
- var
- lev: level;
- begin {Procedure FindRoot}
- if counter = 0 then
- root := nil {Tree is empty.}
- else begin {Non-empty tree}
- lev := maxheight; {Find the highest occupied level; it gives the root}
- while lastnode[lev] = nil do lev := lev - 1;
- root := lastnode[lev]
- end
- end; {Procedure FindRoot}
-
-
- procedure ConnectSubtrees;
- var
- p: pointer;
- lev: level;
- s: level;
- begin {Procedure ConnectSubtrees}
- lev := maxheight;
- while (lastnode[lev] = nil) and (lev > 1) do
- lev := lev - 1; {Find the highest node: root}
- while lev > 1 do {Nodes on levels 1 and 0 are already OK}
- with lastnode[lev]^ do
- if right <> nil then
- lev := lev - 1 {Search down for the highest dangling node}
- else begin {Case: right subtree is undefined.}
- p := left; {Find the highest entry in lastnode that}
- s := lev - 1; {is not in the left subtree.}
- repeat
- p := p^.right;
- s := s - 1
- until (p = nil) or (p <> lastnode[s]);
- right := lastnode[s];
- lev := s {Nodes on levels between lev and s are on the left.}
- end {Connecting dangling subtrees}
- end; {Procedure ConnectSubtrees}
-
-
-
- Procedure GetNode( var p: pointer; ch: char);
- {reads a word from file InIndex and sets node correspondingly;
- returns p = nil at eof or when next word starts later than ch}
- Var
- wordcode: char; {letter indicating type of word}
- Begin {procedure GetNode}
- While (not eof(InIndex)) and (InIndex^ = blank) do
- Get(InIndex); {Skip all the leading blanks.}
- If eof(InIndex) then
- p := nil
- Else if InIndex^ > ch then
- p := nil
- Else begin
- new(p);
- with p^ do begin
- ReadWord(InIndex, wd);
- Read(InIndex, wordcode);
- If wordcode = 'i'
- then begin kind := index; ref := nil end
- Else if wordcode = 'c'
- then begin kind := count; ct := 0 end
- Else
- Writeln('Erroneous word code in file InIndex.')
- End; {with statement setting up node}
- readln(InIndex) {Advance to the start of the next entry.}
- End
- End; {procedure GetNode}
-
-
-
- begin {Procedure BuildTree}
- for lev := -1 to maxheight do lastnode[lev] := nil;
- counter := 0;
- GetNode(p, ch);
- while p <> nil do
- begin
- counter := counter + 1;
- Insert(p);
- GetNode(p, ch)
- end; {receiving and processing input}
- FindRoot;
- ConnectSubtrees
- end; {Procedure BuildTree}
-
-
-
-
- Procedure Process( r: reference);
- {takes the word and page reference r and updates the binary tree}
- Var
- p: pointer; {Trace through the tree.}
- found: Boolean; {Is the word in the tree?}
-
-
-
- Procedure UpdateNode( p: pointer; r: reference);
- {uses reference r to update information in node p^}
- Var
- q: pointref; {used to add reference to list}
- Begin {procedure UpdateNode}
- With p^ do
- Case kind of
- hash:; {no action needed}
- count: ct := ct + 1;
- index: If ref = nil then
- Begin
- new(ref);
- ref^.pg := r.pg;
- ref^.next := nil
- End
- Else if ref^.pg <> r.pg then
- Begin {Add the new reference to the list.}
- New(q);
- q^.pg := r.pg;
- q^.next := ref;
- ref := q
- End
- End {case statement to update tree}
- End; {procedure UpdateNode}
-
-
-
- Procedure NewWord(var p: pointer; r: reference);
- {Creates a node for the first occurrence of a new reference r.
- A pointer to the new node is returned in p.}
- Var
- response: char; {answer received from user}
- Begin {procedure NewWord}
- new(p);
- With p^ do
- Begin
- wd := r.wd;
- left := nil;
- right := nil;
- Repeat {Ask user what kind of word.}
- WriteWord(output, wd);
- write('is (F, C, I)?');
- read(response)
- Until response in ['F', 'C', 'I' ,'f', 'c', 'i'];
- Case response of
- 'F','f': kind := hash;
- 'C','c': Begin
- kind := count;
- ct := 1
- End;
- 'I','i': Begin
- kind := index;
- new(ref);
- ref^.pg := r.pg;
- ref^.next := nil;
- End
- End {case statement}
- End {with statement}
- End; {procedure NewWord}
-
-
-
- Procedure InsertTree(r, p: pointer);
- {adds a node p^ to the tree with root r^; requires that r <> nil
- and p^ not be in the tree; proceeds by recursion}
- Begin {procedure InsertTree}
- If Lt(p^.wd, r^.wd) then
- If r^.left = nil then r^.left := p
- Else InsertTree(r^.left, p)
- Else
- If r^.right = nil then r^.right := p
- Else InsertTree(r^.right, p)
- End; {procedure InsertTree}
-
-
-
-
- Begin {procedure Process}
- If root = nil then {The tree might be empty.}
- NewWord(root, r)
- Else begin {case of nonempty tree}
- p := root; {Begin a tree search.}
- found := false;
- Repeat
- If r.wd = p^.wd then
- found := true
- Else If Lt(r.wd, p^.wd) then
- p := p^.left
- Else
- p := p^.right
- Until found or (p = nil);
- If found then UpdateNode(p, r)
- Else begin {p^ was not found: add it to the tree.}
- NewWord(p, r);
- InsertTree(root, p)
- End
- End
- End; {procedure Process}
-
-
-
-
- Procedure OutputTree( p: pointer);
- {traverses the tree for which p^ is the root in inorder}
-
-
- Procedure PutNode(p: pointer);
- Var
- q: pointref; {used to traverse list of references}
- Begin {procedure PutNode}
- With p^ do
- Case kind of
- hash: Begin
- NewHashFile^ := wd;
- put( NewHashFile )
- End;
- count: If ct <> 0 then {Otherwise, word is not in the document.}
- Begin
- WriteWord(NewIndex, wd);
- write(NewIndex, 'c');
- writeln( NewIndex, ct:5)
- End;
- index: If ref <> nil then
- Begin
- WriteWord(NewIndex, wd);
- write(NewIndex, 'i');
- q := ref;
- Repeat
- write( NewIndex, q^.pg:5);
- q := q^.next
- Until q = nil;
- writeln( NewIndex )
- End
- End {case statement}
- End; {procedure PutNode}
-
-
- Begin {procedure OutputTree}
- If p <> nil then
- With p^ do
- Begin
- OutputTree(left); {Traverse the left subtree.}
- PutNode(p);
- OutputTree(right); {Traverse the right subtree.}
- Dispose(p)
- End
- End; {procedure OutputTree}
-
-
-
-
- Begin {procedure ClassifyWords}
- writeln('At the appearance of each word, give its disposition:');
- writeln(' F --- Forget all occurrences of this word.');
- writeln(' C --- Count how many times this word appears.');
- writeln(' I --- Index this word: list the pages on which it appears.');
- Reset(InIndex);
- Rewrite(NewIndex);
- For ch := A to Z do {Start main loop on first letter of word.}
- Begin
- BuildTree(root, ch); {Get the part of master wordlist starting with ch
- from the file InIndex, and build it into a binary tree.}
- reset(RefFile[ch]);
- While not eof(RefFile[ch]) do
- Begin
- Process(RefFile[ch]^);
- {Use new words from RefFile[ch] to update the tree.}
- get( RefFile[ch] )
- End;
- OutputTree(root) {Write the contents of the tree into files NewIndex and
- NewHashFile.}
- End {main loop on letters of alphabet}
- End; {procedure ClassifyWords}
-
-
-
-
-
- {Phase 3: Updating the Permanent Files}
-
-
- Procedure UpdateHashFile;
- {reads in old hash table, inserts file of new entries; writes out to HashFile}
- Var
- hash: array[hashentry] of word;
- x: hashentry;
- w: word;
-
-
- Function HashAddress(w: word): hashentry;
- {calculates the location in hash table of word w, or, if none,
- returns pointing to the blank word where w should go}
- Var
- x, {calculated location}
- inc: integer; {increment for open addressing}
- Begin {function HashAddress}
- x := (ord(w[1]) * ord(w[3]) * ord(w[4]) + ord(w[6])) mod hashsize + 1;
- {Hash function assumes long word length. For short word machines, we
- must ensure that the result is nonnegative, and worry about overflow.}
- If (hash[x] <> w) and (hash[x] <> blankword) then
- Begin
- inc := 1;
- Repeat
- x := x + inc;
- If x > hashsize then x := x - hashsize;
- inc := inc + 2
- Until (w = hash[x]) or (blankword = hash[x])
- End;
- HashAddress := x
- End; {function HashAddress}
-
-
- Begin {procedure UpdateHashFile}
- reset(HashFile);
- If eof(HashFile) then {HashFile is empty; create new table.}
- For x := 1 to hashsize do
- hash[x] := blankword
- Else
- For x := 1 to hashsize do
- read(HashFile, hash[x]);
- {Some versions of Pascal do not allow procedures read and write for
- files other than text. For such systems, expand to use get and put.}
- reset(NewHashFile);
- While not eof(NewHashFile) do
- Begin
- read(NewHashFile, w);
- hash[HashAddress(w)] := w
- {If the table is full, new entries will replace old ones.}
- End;
- rewrite(HashFile);
- For x := 1 to hashsize do
- Write(HashFile, hash[x])
- End; {procedure UpdateHashFile}
-
-
-
-
- Procedure MergeIndices;
- {merges files NewIndex and InIndex into file OutIndex}
- Var
- u, v: word; {for new and old indices, respectively}
- m, n: integer; {counts for above entries}
- ukind,
- vkind: char; {Is the word of kind i or c?}
-
-
- Procedure CopyLine( var w: word; var F: text; newline, endline: Boolean);
- {Copies the remainder of a line from the file F to OutIndex.
- If newline is true, then the word w is also written, and kind is copied.
- If endline is true, then the line written to OutIndex is ended.
- The procedure also reads a new word w from the next line in F.}
- Var
- n: integer; {number copied from file to file}
- kind: char; {word code copied from file to file}
- Begin {procedure CopyLine}
- If newline then
- Begin
- WriteWord(OutIndex, w);
- read(F, kind);
- write(OutIndex, kind)
- End Else
- While (not eof(F)) and (not eoln(F)) and (F^ = blank) do
- get(F);
- While (not eof(F)) and (not eoln(F)) do
- Begin
- read(F, n);
- write(OutIndex, n:5);
- While (not eoln(F)) and (F^ = blank) do
- get(F); {Skip blanks.}
- End;
- readln(F);
- If not eof(F) then
- ReadWord(F, w);
- If endline then writeln(OutIndex)
- End; {procedure CopyLine}
-
-
-
- Begin {procedure MergeIndices}
- reset(NewIndex);
- reset(InIndex);
- rewrite(OutIndex);
- If eof(NewIndex) or eof(InIndex) then
- writeln('One of the indices is empty. No merge will be done.')
- Else Begin
- ReadWord(NewIndex, u);
- ReadWord( InIndex, v);
- Repeat
- If Lt(u,v) then
- CopyLine(u, NewIndex, true, true)
- {Boolean parameters mean, respectively; start new line; end the line.}
- Else If Lt(v,u) then
- CopyLine(v, InIndex, true, true)
- Else begin {Words are equal. Determine the kind of word.}
- read(NewIndex, ukind);
- read( InIndex, vkind);
- If ukind <> vkind then
- writeln('Inconsistent word types found in merge.');
- WriteWord(OutIndex, u);
- write(OutIndex, ukind);
- If ukind = 'c' then
- Begin
- readln(NewIndex, m);
- readln( InIndex, n);
- m := m + n;
- writeln(OutIndex, m:5);
- If not eof(NewIndex) then ReadWord(NewIndex, u);
- If not eof( InIndex) then ReadWord( InIndex, v)
- End
- Else begin {Copy both lists of page numbers.}
- CopyLine(u, NewIndex, false, false);
- CopyLine(v, InIndex, false, true)
- End
- End {finished processing equal words}
- Until eof(NewIndex) or eof(InIndex);
- While not eof(NewIndex) do
- CopyLine(u, NewIndex, true, true);
- While not eof(InIndex) do
- CopyLine(v, InIndex, true, true)
- {At most one of the two loops above will iterate.}
- End
- End; {procedure MergeIndices}
-
-
-
-
-
- Begin {main program}
- SplitWords; {phase 1}
- ClassifyWords; {phase 2}
- UpdateHashFile; {phase 3, first part}
- MergeIndices; {phase 3, second part}
- End.
-
-