home *** CD-ROM | disk | FTP | other *** search
- ############################################################################
- #
- # File: soundex1.icn
- #
- # Subject: Procedures for Soundex algorithm
- #
- # Author: John David Stone
- #
- # Date: March 13, 1992
- #
- ###########################################################################
- #
- # When names are communicated by telephone, they are often transcribed
- # incorrectly. An organization that has to keep track of a lot of names has
- # a need, therefore, for some system of representing or encoding a name that
- # will mitigate the effects of transcription errors. One idea, originally
- # proposed by Margaret K. Odell and Robert C. Russell, uses the following
- # encoding system to try to bring together occurrences of the same surname,
- # variously spelled:
- #
- # Encode each of the letters of the name according to the
- # following equivalences:
- #
- # a, e, h, i, o, u, w, y -> *
- # b, f, p, v -> 1
- # c, g, j, k, q, s, x, z -> 2
- # d, t -> 3
- # l -> 4
- # m, n -> 5
- # r -> 6
- #
- #
- # If any two adjacent letters have the same code, change the code for the
- # second one to *.
- #
- # The Soundex representation consists of four characters: the initial letter
- # of the name, and the first three digit (non-asterisk) codes corresponding
- # to letters after the initial. If there are fewer than three such digit
- # codes, use all that there are, and add zeroes at the end to make up the
- # four-character representation.
- #
- ############################################################################
-
- procedure soundex(name)
- local
- coded_name, new_name
-
- coded_name := encode(strip(name))
- new_name := name[1]
- every pos := 2 to *coded_name do {
- if coded_name[pos] ~== "*" then
- new_name := new_name || coded_name[pos]
- if *new_name = 4 then
- break
- }
- return new_name || repl ("0", 4 - *new_name)
- end
-
- procedure encode(name)
-
- name := map(name, &ucase, &lcase)
- name := map(name, "aehiouwybfpvcgjkqsxzdtlmnr",
- "********111122222222334556")
- every pos := *name to 2 by -1 do
- if name[pos - 1] == name[pos] then
- name[pos] := "*"
- return name
- end
-
- procedure strip(name)
- local
- result
- static
- alphabet
- initial alphabet := string(&letters)
-
- result := ""
- every ch := !name do
- if find(ch, alphabet) then
- result ||:= ch
- return result
- end
-