home *** CD-ROM | disk | FTP | other *** search
- {$A+,B-,D-,E-,F+,G-,I+,L-,N-,O-,R-,S-,V+,X+}
- {$M 16384,0,655360}
- Program MakeDict;
- { MAKEDICT - A dictionary builder. Copyright (c) 1990,91 by Edwin T. Floyd.
- Build an optimal dictionary from an input word list. The input word list
- should contain one word per line. The output dictionary is saved to disk.
- Run MAKEDICT with no parameters for brief documentation. }
- Uses Dict, Crt, Dos;
- Const
- DefaultBits = 14; { Default number of bits per word }
-
- Var
- Words : LongInt; { Number of input words }
- ExtraWords : LongInt; { Number of extra words to allow in dictionary }
- Bits : Byte; { Number of bits to represent each word }
- InFileName : PathStr; { Input file name }
- OutFileName : PathStr; { Output file name }
- InBuf : Array[1..16384] Of Char; { I/O buffer for input }
-
- Procedure InterpretParam;
- { Interpret input parameters }
- Var
- i : Integer;
- p : PathStr;
- d : DirStr;
- n : NameStr;
- e : ExtStr;
- Begin
- WriteLn('MAKEDICT v1.2 - A dictionary builder. Copyright (c) 1990,91 by Edwin T. Floyd.');
- If ParamCount < 1 Then Begin
- WriteLn('Run like this: ');
- WriteLn;
- WriteLn(' MAKEDICT <infile> [<bits>] [<extra>]');
- WriteLn;
- WriteLn('<infile> specifies the input file name, and optional <bits>');
- WriteLn('specifies the number of bits to use for each word. If <bits>');
- WriteLn('is omitted, the program will use ', DefaultBits,
- ' bits per word. Optional');
- WriteLn('<extra> is the number of extra words to allow for in the');
- WriteLn('dictionary. The output file has the same name as the input');
- WriteLn('file, but the extension will be ".DCT" (or ".DIC" if the input');
- WriteLn('file extension is ".DCT"). Examples:');
- WriteLn;
- WriteLn(' MAKEDICT atob.txt 15');
- WriteLn(' MAKEDICT ctod.txt');
- WriteLn(' MAKEDICT user.lst 12 2000');
- Halt(1);
- End;
- p := FExpand(ParamStr(1));
- FSplit(p, d, n, e);
- InFileName := p;
- If e = 'DCT' Then e := 'DIC' Else e := 'DCT';
- OutFileName := n + '.' + e;
- If ParamCount > 1 Then Begin
- Val(ParamStr(2), Bits, i);
- If (i <> 0) Or (Bits < 1) Then Begin
- WriteLn('Bits value is ', ParamStr(2), ', should be 1..255');
- Halt(1);
- End;
- End Else Bits := DefaultBits;
- If ParamCount > 2 Then Begin
- Val(ParamStr(3), ExtraWords, i);
- If (i <> 0) Or (ExtraWords < 0) Then Begin
- WriteLn('Extra words value is ', ParamStr(2), ', should be numeric > 0');
- Halt(1);
- End;
- End Else ExtraWords := 0;
- End;
-
- Procedure CountWords;
- { Read the input file and count the words }
- Var
- InFile : Text;
- s : String;
- Begin
- Assign(InFile, InFileName);
- SetTextBuf(InFile, InBuf);
- Reset(InFile);
- Words := 0;
- Repeat
- ReadLn(InFile, s);
- If s <> '' Then Inc(Words);
- Until Eof(InFile);
- Close(InFile);
- End;
-
- Procedure BuildDictionary;
- { Read the input file and build the dictionary }
- Var
- Count : LongInt;
- i : Word;
- d : Dictionary;
- InFile : Text;
- s : String;
- Begin
- Assign(InFile, InFileName);
- SetTextBuf(InFile, InBuf);
- Reset(InFile);
- Count := 0;
- d.Init(Words + ExtraWords, Bits);
- WriteLn('Reading ', InFileName, ' for ', Words, ' words, ', Bits, ' bits');
- Repeat
- ReadLn(InFile, s);
- Inc(Count);
- If (Count Mod 1000) = 0 Then Write(^M, Count);
- If d.InsertString(s) Then
- WriteLn(^M, Count, ' ', s, ' appears to be already in the dictionary.');
- Until Eof(InFile);
- Close(InFile);
- WriteLn(^M, Count, ' words read ', d.DictCount,
- ' words inserted in dictionary');
- WriteLn('Estimated error chance: 1/', 1/d.EstError:1:0);
- WriteLn('Actual error chance: 1/', 1/d.ActError:1:0);
- WriteLn('Saving ', OutFileName, ', ', d.DictionarySize, ' bytes');
- d.SaveDictionary(OutFileName);
- d.Done;
- End;
-
- Begin
- InterpretParam;
- CountWords;
- BuildDictionary;
- End.
-