home *** CD-ROM | disk | FTP | other *** search
-
- {outline of declaration of subprograms:
-
- 1. program IndexText(InText, InIndex, NewIndex, OutIndex, HashFile,
- NewHashFile, input, output); (main program)
- 2. function Lt(u, v: word): Boolean;
- 3. procedure ReadWord(var f: text; var w: word);
- 4. procedure WriteWord(var f: text; w: word);
- 4a. built in CPU time function clock;
-
- 5. procedure SplitWords; (phase 1)
- 5a. function FindFile(ch: char): filecode;
- 6. function HashAddress(w: word): hashentry;
- 7. procedure Initialize;
- 8. procedure GetWord;
- 8a. procedure TellUserPage;
- 9. procedure GetChar(var ch: char);
- 10. procedure AddChar(ch: char);
- 11. procedure Conclude;
-
- procedure ClassifyWords;
- procedure InitializeTable(RefTable: RefHashTable);
- function HashAddress(x: reference): integer;
- procedure Insert(x: reference; pos: integer; var RefTable: RefHashTable);
- procedure Place(var F: fileref; RefTable: RefHashTable);
- function Empty(L: list): Boolean;
- procedure LinkEntries(RefTable: RefHashTable; var NewList: list);
- procedure RemoveFirst(var p: pointer; L: list);
- procedure SkipBlank(var F: text);
- procedure ReadReference(var r: pointer; var F: text);
- procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text);
- procedure GetWordType(p: pointer);
- procedure Delete(var p: pointer);
- procedure CompareAndMerge(NewList: list;var InIndex,NewIndex,NewHashFile: text);
- procedure Merge(p, q: pointer; var r: pointer);
- procedure Divide(var p, q: pointer);
- procedure MergeSort(var p: pointer);
- procedure MainMergeSort(var L: list);
- }
-
-
- program IndexText(InText, InIndex, NewIndex, HashFile, NewHashFile,
- input, output);
-
- {Produces word counts and list of references for the document file
- InText. Uses the master word list in file InIndex, if provided. Output word
- list for new text goes to file NewIndex. HashFile contains the common words
- to be ignored. If not specified, it is created on output, containing the
- words so flagged by the user.}
- {This implementation uses only phases 1 and 2. A smaller array of text files
- is also used, as specified in the exercise section.}
-
- const
- maxwd = 20; {More letters in word will be ignored.}
- minwd = 1; {Shorter words will be ignored.}
- hashsize = 2003; {should be a prime}
- linesperpage = 66; {assumes standard spacing and paper}
- maxheight = 20; {for building binary tree in phase 2}
- A = 'A';
- Z = 'Z';
- hyphen = '-';
- blank = ' ';
- apostrophe = ''''; {requires two `'s to represent one}
- underscore = '_';
- ordbackspace = 8; {ASCII control character for backspace}
- ordformfeed = 12; {ASCII control character for new page}
- changecase = 32; {ASCII difference between upper and lower case}
- nfiles = 8; {number of temporary files for unprocessed words}
- MaxRowLength = 130; {maximum length of output records}
-
- type
- word = packed array[1..maxwd] of char;
- reference = record
- wd: word;
- pg: integer; {count or page number}
- end;
- fileref = file of reference; {used for local files}
- letter = A..Z;
- hashentry = 1..hashsize;
- filecode = 1..nfiles;
-
- var
- InText, {document being processed}
- InIndex, {master word list}
- NewIndex, {word list of current document}
- HashFile,
- NewHashFile: text;
- RefFile: array[filecode] of fileref; {local files used for auxilary
- storage of words from phase 1 to phase 2:
- Normally, a separate file exist for each initial letter,
- this version uses nfiles files due operating system constraints.}
- blankword: word; {will contain all blanks}
-
- {The next two variables were originally declared in procedure SplitWords,
- they have been moved to this level in order to access them globally.}
- outcount: array[filecode] of integer; {counters for word files}
- wordcount: integer; {count of all words in the text}
-
- intextname,
- inlistname,
- newlistname,
- newhashname: word; {used to get filename from user}
- lastletter: array[filecode] of letter; {last letter in each file}
- PresentTime,
- StartTime: integer; {used to track CPU time}
- RowLength: integer; {ensures records will not exceed MaxRowLength}
-
-
-
- function Lt( u, v: word): Boolean;
- {Determains if word u precedes word v lexicographically.}
- begin
- Lt := (u < v)
- end;
-
- procedure ReadWord( var F: text; var w: word);
- {Reads word w from text file F. Assumes not at end of file.}
- {Uses packed array, replace using a loop if your system does not
- support packed arrays. }
- begin {procedure ReadWord}
- read(F, w)
- end; {procedure ReadWord}
-
- procedure WriteWord( var F: text; w: word);
- {Writes word w to text file F}
- {Uses packed array, replace using a loop if your system does not
- support packed arrays. }
- begin {procedure WriteWord}
- write(F, w)
- end; {procedure WriteWord}
-
- procedure SetTimer; {Call once at beginning of program execution.}
- {Finds the CPU time when called, and keeps in variables for reference.}
- {System dependent procedure.}
- begin
- PresentTime := clock;
- StartTime := PresentTime;
- end;
-
- function TotalTime: real;
- {Returns the total CPU time, in seconds, since call to SetTimer.}
- {System dependent procedure.}
- begin
- TotalTime := (clock - StartTime) / 1000.0;
- end;
-
- function ElapsedTime: real;
- {Returns elapsed CPU time since last call to function ElapsedTime,
- or call to SetTimer, whichever is more recent.}
- {System dependent procedure.}
- var r: integer;
- begin
- r := clock;
- ElapsedTime := (r - PresentTime) / 1000.0;
- PresentTime := r;
- end;
-
-
-
- procedure SplitWords;
- {sets up hash table, reads text, and divides into nfiles word lists}
-
- var
- hash: array[hashentry] of reference; {hash table}
- pagecount: integer; {keeps the current page number}
- addpage: integer; {amount to increase pagecount after word}
- linecount: integer; {lines on the current page}
- w: word; {word currently being processed}
- x: hashentry; {location of w, if in hash table}
- endinput: Boolean; {true if and only if input has all been read}
- code: filecode; {into which file does word go?}
-
- {The following variables are kept for use in procedure GetWord, and for
- efficiency are set up only once in procedure Initialize:}
- backspace,
- formfeed: char;
- alphabet, {letters only - to start a word}
- contchar: set of char; {other characters ok in middle of word}
-
-
- function FindFile( ch: letter): filecode;
- {Uses binary decision tree to select one of nfiles = 8 files depending
- on the letter ch. These letters must be the same as those in the
- global array lastletter .}
-
- begin {function FindFile}
- if ch < 'M' then
- if ch < 'E' then
- if ch < 'C' then FindFile := 1
- else FindFile := 2
- else if ch < 'H' then FindFile := 3
- else FindFile := 4
- else if ch < 'S' then
- if ch < 'P' then FindFile := 5
- else FindFile := 6
- else if ch < 'T' then FindFile := 7
- else FindFile := 8
- end; {function FindFile}
-
-
-
- function HashAddress(w: word): hashentry;
- {calculates the location in hash table of word w, or, if not there,
- returns pointing to the blank word where w should go}
-
- var
- x, {calculated location}
- inc: integer; {increment for open addressing}
- begin {function HashAddress}
- x := abs(ord(w[1])*ord(w[2])+ord(w[4])+ord(w[6])) mod hashsize + 1;
- {Hash function assumes long word length. For short word machines
- we must ensure that the result is non-negative, and worry about overflow.}
-
- if (hash[x].wd <> w) and (hash[x].wd <> blankword) then
- begin
- inc := (abs(ord(w[3])-95) mod 29);
- {A key dependent increment is used to avoid clustering.}
- repeat
- inc := inc + 1;
- if inc > hashsize then
- writeln(w,' causes hash table to become full, infinite loop.');
- x := x + inc;
- if x > hashsize then x := x - hashsize;
- until (w = hash[x].wd) or (blankword = hash[x].wd)
- end;
- HashAddress := x
- end; {function HashAddress}
-
-
- procedure Initialize;
- {sets up constant-valued sets for use in GetWord. Opens the text file
- and initializes various counters. Opens file holding hash table (if any),
- and reads or otherwise initializes table}
- var
- i: integer; {general purpose loop control}
-
- begin {procedure Initialize}
- backspace:= chr(ordbackspace);
- formfeed := chr(ordformfeed); {initialize ASCII control characters}
- alphabet := ['A'..'Z', 'a'..'z']; {letters only, to start a word}
- contchar := [hyphen, apostrophe, backspace, underscore];
- {characters which will not terminate word}
- for i := 1 to maxwd do
- blankword[i] := blank;
-
- write('Name of input text file?');
- ReadWord(input, intextname); readln;
- open(InText, intextname, readonly);
- reset(InText);
- endinput := eof(InText);
-
- repeat
- write( 'What is the page number on which the text begins?');
- readln(pagecount);
- if pagecount < 0 then
- writeln('Must be a non-negative integer.')
- until pagecount >= 0;
- linecount := 0;
- addpage := 0;
- wordcount := 0;
-
- for i := 1 to nfiles do
- begin
- rewrite( RefFile[i] );
- outcount[i] := 0
- end;
- lastletter[1] := 'B';
- lastletter[2] := 'D';
- lastletter[3] := 'G';
- lastletter[4] := 'L';
- lastletter[5] := 'O';
- lastletter[6] := 'R';
- lastletter[7] := 'S';
- lastletter[8] := 'Z';
-
- reset(HashFile); {assumes HASHFILE.DAT is in current directory}
-
- for i := 1 to hashsize do
- with hash[i] do
- begin
- read(HashFile, pg);
- get(HashFile); {skip the blank between number and word}
- ReadWord(HashFile, wd);
- readln(HashFile);
- pg := 0; {initialize all the counts to 0}
- end;
- writeln('The hash table has been read.')
- end; {procedure Initialize}
-
-
-
- procedure GetWord( var w: word);
- {Gets words from input file InText, and returns only words
- at least minwd characters long. Parameter endinput becomes
- true if and only if the end of InText is reached with no word to return.
- the procedure also updates global variables wordcount and linecount,
- updates the global variable pagecount after each linesperpage cr's,
- or after each formfeed, whichever comes first, and
- uses the sets alphabet and contchar and various character constants.}
-
- label 1; {used by GetChar to exit procedure upon eof(InText)}
-
- var c: 0..maxwd; {count of characters in word}
- ch: char; {character currently processed}
- endln: Boolean; {at the end of a line?}
-
-
- procedure TellUserPage; {keep the user informed of progress}
- var i: integer;
- begin
- i := pagecount + addpage;
- writeln('At page', i:4, ' word count is', wordcount:7)
- end;
-
-
- procedure GetChar(var ch: char);
- {gets a character from input text into ch; checks for eof; updates
- page count and line count}
-
- begin {procedure GetChar}
- if eof(InText) then
- if c >= minwd then
- ch := '.' {special character to end the current word}
- else begin {no word to return; set endinput}
- endinput := true;
- goto 1 {exit from GetWord.}
- end
- else begin {not end of file: process next character}
- while InText^ in [underscore, backspace] do
- get( InText);
- ch := InText^;
- endln := eoln(InText);
- get(InText);
- if endln then
- begin
- linecount := linecount + 1;
- if linecount >= linesperpage then
- begin
- addpage := addpage + 1;
- linecount := 0;
- TellUserPage
- end
- end;
- if ch = formfeed then
- begin
- addpage := addpage + 1;
- linecount := 0;
- TellUserPage;
- endln := true; {Treat formfeed like end of line.}
- ch := blank
- end
- end
- end; {procedure GetChar}
-
-
- procedure AddChar(ch: char);
- {adds given character to word, if possible}
- begin {procedure AddChar}
- if c < maxwd then
- begin
- c := c + 1;
- w[c] := ch
- end
- end; {procedure AddChar}
-
-
- begin {procedure GetWord}
- repeat {until current word is at least minwd chars long}
- c := 0;
- repeat
- GetChar(ch) {Find a letter which will start the word.}
- until ch in alphabet;
- pagecount := pagecount + addpage;
- addpage := 0;
- if ch in ['a'..'z'] then {translate first letter to upper case.}
- ch := chr(ord(ch) - changecase); {assumes ASCII ordering of letters}
- AddChar(ch); {put first letter into the word}
- GetChar(ch);
- while (ch in alphabet) or (ch in contchar) do
- if ch in alphabet then {add letters directly to word}
- begin {processing letter}
- AddChar(ch);
- GetChar(ch)
- end {processing letter}
- else if ch = hyphen then
- begin {processing hyphen}
- GetChar(ch); {Find what comes after hyphen.}
- if endln then
- while ch = ' ' do
- GetChar(ch) {Delete both the hyphen and the end of line}
- else if ch = hyphen then {Two hyphens form a dash; ends word}
- ch := blank {Use a blank to terminate the word.}
- else if ch in alphabet then
- AddChar(hyphen) {Include other hyphens in word}
- else {nothing}
- end {processing hyphen}
- else if ch = apostrophe then
- begin {processing apostrophe}
- GetChar(ch);
- if ch = 's' then {Delete `'s' at end of word only}
- begin
- GetChar(ch);
- if ch in contchar then
- begin
- AddChar(apostrophe);
- AddChar('s')
- end
- end
- else if ch in alphabet then
- AddChar(apostrophe) {Allow contractions.}
- end {processing apostrophe}
- else {Remaining possibilities are backspace and underscore.}
- GetChar(ch); {Delete these characters.}
- {While loop on continuing characters ends here.}
- wordcount := wordcount + 1
- until c >= minwd; {Skip over short words.}
-
- while c < maxwd do {Fill with blanks.}
- begin
- c := c + 1;
- w[c] := blank
- end;
- 1: {When end of file occurs, program will exit to here from GetChar}
- end; {procedure GetWord}
-
-
-
- procedure Conclude;
- {Writes out counts of various word lists. For some systems, it is
- necessary to close files, which should be done here.}
-
- var
- i,j: integer; {loop index}
- response: char; {user's answer to question}
-
- begin {procedure Conclude}
- writeln('The total number of words read in is ', wordcount:7);
- writeln;
- writeln('The number of words to process further in the next stage,');
- writeln('in each temporary file, is below.');
- writeln(' a-b c-d e-g h-l m-o p-r s t-z');
- for i := 1 to nfiles do
- write(outcount[i]:8);
- writeln;
- writeln;
-
- (* not implemented:
- repeat
- write('Do you wish the counts from hash table to be kept in a file (y,n)?');
- readln(response);
- if response > 'Z' then response := chr(ord(response)-changecase)
- until response in ['N', 'Y'];
- if response = 'Y' then
- begin
-
- write('Name of file ?');
- ReadWord(input, newhashname);
- readln;
- open(NewHashFile, newhashname);
- rewrite(NewHashFile);
-
- for i := 1 to hashsize do
- with hash[i] do begin
- write(NewHashFile, pg:4, ' ');
- j := 1;
- repeat
- write(NewHashFile, wd[j]);
- j := j + 1;
- until (wd[j] = ' ') or (j >= maxwd);
- writeln(NewHashFile)
- end
- end *)
- end; {procedure Conclude}
-
-
- begin {procedure SplitWords}
- Initialize; {sets up files, hash table, constants}
- GetWord(w); {obtain a single word from InText}
- while not endinput do
- begin
- x := HashAddress(w);
- if w = hash[x].wd then
- hash[x].pg := hash[x].pg + 1
- else begin {not in hash table; put into RefFile}
- code := FindFile( w[1] );
- outcount[code] := outcount[code] + 1;
- with RefFile[code]^ do
- begin
- wd := w;
- pg := pagecount
- end;
- Put(RefFile[code])
- end;
- GetWord(w)
- end;
- Conclude {writes word counts to output.}
- end; {procedure SplitWords}
-
-
-
-
- {start of phase 2}
-
- procedure ClassifyWords;
- {The references stored in the temporary files are placed in a new hash table,
- the words from the file InIndex are compared with the words in the new table
- as they are merged into the file NewIndex.}
-
- const
- RefTableSize = 3023; {Size of the hash table to temporarily store words.}
- RefTableMax = 3022;
- type
- wordtype = (hash, count, index); {ways to process a word}
- pointref = ^reflist;
- reflist = record {list of references}
- pg: integer;
- next: pointref
- end;
- pointer = ^node;
- node = record {node of list storing wrods.}
- wd: word;
- kind: wordtype;
- ct: integer;
- ref: pointref;
- next: pointer
- end;
- {Cannot use varying types as @wordtype is not known upon first reading.}
- list = record
- head: pointer
- end;
- RefHashTable = array[0..RefTableMax] of list;
-
- var
- code: filecode; {loop through temporary files}
- RefTable: RefHashTable; {stores all references in memory}
- NewList: list;
-
- function Empty(L: list): Boolean;
- begin
- Empty := (L.head = nil)
- end;
-
- procedure InitializeTable(var RefTable: RefHashTable);
- var
- i: integer;
- begin {procedure InitializeTable}
- for i := 0 to RefTableMax do
- RefTable[i].head := nil;
- end; {procedure InitializeTable}
-
-
- function RefTableAddress(x: reference): integer;
- { Returns hashed address of reference. }
- var
- i: integer;
- h: integer;
- begin {function Hash}
- h := 0;
- with x do
- for i := 1 to maxwd do
- h := h + ord(wd[i]);
- RefTableAddress := h mod RefTableSize
- end; {function Hash}
-
-
- procedure Insert(x: reference; pos: integer; var RefTable: RefHashTable);
- { Inserts the reference into the hash table of references. }
- var
- done: Boolean;
- p: pointer;
- q: pointref;
- begin {procedure Insert}
- done := false;
- p := RefTable[pos].head;
- while (p <> nil) and (not done) do
- begin {Search for the word, update the reference if it is found.}
- if p^.wd = x.wd then
- begin
- p^.ct := p^.ct + 1; {update count and page reference}
- new(q);
- q^.pg := x.pg;
- q^.next := p^.ref;
- p^.ref := q;
- done := true
- end
- else
- p := p^.next
- end;
- if not done then
- begin {Insert a new entry if the word is not already in the table.}
- p := nil;
- new(p);
- p^.wd := x.wd;
- p^.ct := 1; {Initialize the count and the page refernces.}
- new(q);
- q^.pg := x.pg;
- q^.next := nil;
- p^.ref := q;
- p^.next := RefTable[pos].head;
- RefTable[pos].head := p
- end
- end; {procedure Insert}
-
-
- procedure Place(var F: fileref; var RefTable: RefHashTable);
- { Places the words in file @F into the reference table. }
- var
- x: reference;
- begin {procedure Place}
- reset(F);
- while not eof(F) do
- begin
- x := F^;
- get(F);
- Insert(x, RefTableAddress(x), RefTable)
- end
- end; {procedure Place}
-
-
- procedure LinkEntries(var RefTable: RefHashTable; var NewList: list);
- { The references in the hashed table are combined into the list @NewList. }
- var
- i: integer;
- p: pointer;
- begin {procedure LinkEntries}
- i := 0;
- while (i < RefTableMax) and Empty(RefTable[i]) do {find the first entry}
- i := i + 1;
- if i <= RefTableMax then
- begin
- NewList.head := RefTable[i].head; {Initialize the list to point to the first entry.}
- p := RefTable[i].head;
- if p <> nil then {Find the end of the first chain.}
- while p^.next <> nil do
- p := p^.next;
- while (i < RefTableMax) do {link remaining entries}
- begin
- i := i + 1;
- if not Empty(RefTable[i]) then
- begin
- p^.next := RefTable[i].head;
- while p^.next <> nil do {Move @p to the end of the chain.}
- p := p^.next
- end
- end
- end
- else
- NewList.head := nil
- end; {procedure LinkEntries}
-
-
- procedure RemoveFirst(var p: pointer; var L: list);
- { Removes the first node from the list @L. }
- begin
- p := L.head;
- if not Empty(L) then
- begin
- L.head := L.head^.next;
- p^.next := nil
- end
- end;
-
-
- procedure ReadReference(var r: pointer; var F: text);
- { Reads refernce from the file @F. }
- var
- k: char;
- begin {procedure ReadReference}
- if eof(F) then
- r := nil
- else begin
- ReadWord(F, r^.wd);
- readln(F, k);
- case k of
- 'F', 'f': r^.kind := hash;
- 'C', 'c': begin
- r^.kind := count;
- r^.ct := 0
- end;
- 'I', 'i': begin
- r^.kind := index;
- r^.ref := nil
- end
- end
- end
- end; {procedure ReadReference}
-
-
- procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text);
- var
- q: pointref;
- begin {procedure WriteReference}
- with p^ do
- case kind of
- hash: begin
- WriteWord(NewHashFile, wd);
- writeln(NewHashFile)
- end;
- count:begin
- WriteWord(NewIndex, wd);
- write(NewIndex, 'c');
- writeln(NewIndex, ct:5)
- end;
- index:begin
- WriteWord(NewIndex, wd);
- write(NewIndex, 'i');
- q := ref;
- while q <> nil do
- begin
- write(NewIndex, q^.pg:5);
- q := q^.next
- end;
- writeln(NewIndex)
- end
- end
- end; {procedure WriteReference}
-
-
- procedure GetWordType(p: pointer);
- { Request the user to specify the category of the given word. }
- var
- response: char;
- begin {procedure GetWordType}
- with p^ do
- begin
- repeat
- WriteWord(output, wd);
- write(' is (F, C, I)?');
- readln(response)
- until response in ['F', 'f', 'C', 'c', 'I', 'i'];
- case response of
- 'F', 'f': kind := hash;
- 'C', 'c': kind := count;
- 'I', 'i': kind := index
- end
- end
- end; {procedure GetWordType}
-
-
- procedure Delete(var p: pointer);
- { Delete the word @p^ as well as all of the page references associated with it. }
- var
- q, r: pointref;
- begin {procedure Delete}
- if p^.kind = index then
- begin
- q := p^.ref;
- while q <> nil do
- begin {dispose the reference list}
- r := q^.next;
- dispose(q);
- p^.ref := r;
- q := r
- end
- end;
- dispose(p) {dispose the node itself}
- end; {procedure Delete}
-
-
- procedure CompareAndMerge(var L: list; var InIndex, NewIndex, NewHashFile: text);
- { Compare the list @L with @InIndex, merge if was found. }
- var
- p, r: pointer;
- begin {procedure CompareAndMerge}
- RemoveFirst(p, L);
- new(r);
- ReadReference(r, InIndex);
- while p <> nil do
- if r = nil then
- begin
- GetWordType(p);
- WriteReference(p, NewIndex, NewHashFile);
- Delete(p); {Remove reference list and node from memory.}
- RemoveFirst(p, L)
- end
- else if p^.wd < r^.wd then
- begin
- GetWordType(p);
- WriteReference(p, NewIndex, NewHashFile);
- Delete(p); {Remove reference list and node from memory.}
- RemoveFirst(p, L)
- end
- else if p^.wd > r^.wd then {do not write word not used to NewIndex}
- ReadReference(r, InIndex)
- else begin {p^.wd = r^.wd}
- p^.kind := r^.kind;
- WriteReference(p, NewIndex, NewHashFile);
- Delete(p);
- RemoveFirst(p, L);
- ReadReference(r, InIndex)
- end
- end; {procedure CompareAndMerge}
-
-
- procedure Merge(p, q: pointer; var r: pointer);
- {Merges two sorted lists into one, that will begin at r;
- requires that both lists be non empty. This version is modified
- slightly from the version listed in the text due to a difference
- in the data structures used.}
- var
- s: pointer; {always points to last node of sorted list}
- begin {procedure Merge}
- if (p = nil) or (q = nil) then
- writeln('Merge called with empty list(s).');
- {First find the head, r, of the merged list.}
- if p^.wd <= q^.wd then {change .info.key to .wd}
- begin
- r := p;
- p := p^.next
- end
- else begin
- r := q;
- q := q^.next
- end;
- s := r; {s always points to the last entry of the merged list.}
- while (p <> nil) and (q <> nil) do
- if p^.wd <= q^.wd then {change .info.key to .wd}
- begin
- s^.next := p; {Attach the node with the smaller key to the sorted list.}
- s := p;
- p := p^.next {Advance to the next unmerged node.}
- end
- else begin
- s^.next := q;
- s := q;
- q := q^.next
- end;
- {After one list is exhausted, attach the remainder of the other one.}
- if p = nil then
- s^.next := q
- else
- s^.next := p
- end; {procedure Merge}
-
- (*===========================================================================*)
- procedure Divide(var p, q: pointer);
- {takes the list to which p points, divides it in half, and returns with
- p pointing to head of the first half and q to the head of second half;
- requires that the original list contain at least two items, or an
- error occurs}
- var
- r: pointer;
- begin {procedure Divide}
- q := p; {Start q at position 1, and r at position 3.}
- r := p^.next;
- r := r^.next;
- while r <> nil do {Move r two positions for each move of q.}
- begin
- r := r^.next;
- q := q^.next;
- if r <> nil then
- r := r^.next
- end;
- {Break the list into halves after q^.}
- r := q^.next;
- q^.next := nil;
- q := r
- end; {procedure Divide}
-
- procedure MainMergeSort(var L: list);
- {Main procedure to invoke recursive MergeSort}
-
- procedure MergeSort(var p: pointer);
- {divides the list starting at p^ in half, sorts it recursively, and merges
- the sublists}
- var
- q: pointer; {marks the halfway point in the list}
- begin
- if p <> nil then if p^.next <> nil then
- begin {Otherwise, list has 0 or 1 entry, with no need to sort.}
- Divide(p, q);
- MergeSort(p);
- MergeSort(q);
- Merge(p, q, p)
- End
- End;
-
- begin
- MergeSort(L.head)
- end;
- (*===========================================================================*)
-
- begin {procedure ClassifyWords}
-
- write('Name of input word list ?');
- ReadWord(input, inlistname);
- readln;
- open(InIndex, inlistname, readonly); {may vary on different systems}
- reset(InIndex);
-
- write('Name of output word list ?');
- ReadWord(input, newlistname);
- readln;
- open(NewIndex, newlistname); {may vary on different systems}
- rewrite(NewIndex);
-
- write('Name of file for new hash words ?');
- ReadWord(input, newhashname);
- readln;
- open(NewHashFile, newhashname); {may vary on different systems}
- rewrite(NewHashFile);
-
- InitializeTable(RefTable);
- for code := 1 to nfiles do
- Place(RefFile[code], RefTable);
- LinkEntries(RefTable, NewList);
- MainMergeSort(NewList);
- if not Empty(NewList) then
- CompareAndMerge(NewList, InIndex, NewIndex, NewHashFile);
- close(InIndex); {may vary on different systems}
- close(NewIndex);
- close(NewHashFile)
- end; {procedure ClassifyWords}
-
-
-
- begin {main program}
- SetTimer;
- SplitWords; {Phase 1}
- writeln('Time in first phase is ', ElapsedTime:7:1, ' seconds.');
- writeln;
-
- ClassifyWords; {Phase 2}
- writeln('Time in second phase is', ElapsedTime:7:1, ' seconds.');
-
- writeln;
- writeln('Processing of input document ', intextname, ' is complete.');
- writeln('Total time in program was ', TotalTime:7:1, ' seconds.')
- end.
-
-