home *** CD-ROM | disk | FTP | other *** search
- {
- This function uses a modified version of the soundex algorithm as implemented
- by Glen F. Marshall. The changes to Marshall's algorithm are that the
- end result is an integer rather than a string, and the algorithm simply
- ignores non-alphabet characters.
-
- To this, the NearMiss algorithm is added, which parses an input string
- and uses spaces as delimeters to break the string up into substrings, which it
- then sends to the soundex1 algorithm. The results are summed into the
- resulting real value, and returned to the calling program. In this manner,
- you can get a "close enough" match on very long strings, where minor spelling
- errors between strings can be accounted for by subtracting one NearMiss
- value from the other, and determining the size of the mis-match.
- In this manner you can determine how close a near-miss can come to be
- considered a match.
-
- Written and placed in the public domain by
- John Sims
- 1643 Calle Lindero
- Lompoc, CA 93436
-
- Use, modify, or do with it what you will.... enjoy!
- }
-
-
-
- Function NearMiss(input_string : anystr) : Real;
- var
- Beginning, Ending, Whoa : Integer;
- Temp : Real;
- SCode : anystr;
-
-
- function soundex1(var name: anystr): integer;
- var
- work: array[0..3] of char;
- code: char;
- counter, i,j: integer;
-
- function encode(var c: char): char;
- var
- r: char;
- begin
- case upcase(c) of
- 'B','F','P','V': r := '1';
- 'C','G','J','K','Q','S','X','Z': r := '2';
- 'D','T': r := '3';
- 'L': r := '4';
- 'M','N': r := '5';
- 'R': r := '6';
- 'A','E','I','O','U','Y': r := '7';
- 'H','W': r := '8';
- else r := ' ';
- end;
- encode := r;
- end; {encode}
- begin
- if length(name) > 0
- then work[0] := encode(name[1])
- else work[0] := ' ';
- if work[0] <> ' '
- then i := 2
- else i := length(name) + 1;
- j := 0; counter := 0;
- while (i <= length(name)) and (j < 3) do
- begin
- code := encode(name[i]);
- if code in ['1'..'6']
- then if work[j] <> code
- then begin
- counter := counter + 1;
- j := j+1;
- work[j] := code;
- end;
- i := i + 1;
- end;
- for j := j+1 to 3 do work[j] := '0';
- Val(work, counter, j);
- Soundex1 := counter;
- end; {soundex}
-
- Procedure FindNextBlank;
- begin
- While (Input_String[Ending] <> ' ') and (Ending <= Whoa) do
- Ending := Ending + 1;
- End;
-
- Procedure FindNextChar;
- begin
- While(Input_String[Beginning] = ' ') and (Beginning <= Whoa) do
- Beginning := Beginning + 1;
- If Beginning > Whoa then Beginning := Whoa
- else Ending := Beginning;
- End;
-
- begin
- Beginning := 1;
- Ending := 1;
- Whoa := Length(Input_String);
- Temp := 0.0;
- While Beginning <= Whoa do
- begin
- FindNextChar;
- FindNextBlank;
- SCode := Copy(Input_String, Beginning, (Ending - Beginning) + 1);
- Temp := Temp + Soundex1(SCode);
- Beginning := Ending + 1;
- end;
- NearMiss := Temp;
- End;