home *** CD-ROM | disk | FTP | other *** search
- /*
- [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
-
- Perform soundex comparisons on strings.
-
- Soundex is an algorithm that hashes English strings into numerical value.
- Strings that sound the same are hashed to the same value. This allows
- for non-literal string matching.
-
- From: David Wayne Williams <dwwillia@iucf.indiana.edu>
-
- Apr 29 1996 - added get_soundex method that returns the soundex of a
- string (chrish@qnx.com)
- May 2 1996 - added doc strings (chrish@qnx.com)
- */
-
- #include <string.h>
- #include <ctype.h>
- #include "Python.h"
-
- #ifdef toupper
- /* do not use toupper MACRO */
- #undef toupper
- #endif
-
- static char soundex_module__doc__[] =
- "Perform Soundex comparisons on strings, allowing non-literal matching.";
-
- static void soundex_hash(char *str, char *result)
- {
- char *sptr = str; /* pointer into str */
- char *rptr = result; /* pointer into result */
-
- if(*str == NULL)
- {
- strcpy(result,"000000");
- return;
- }
-
- /* Preserve the first character of the input string.
- */
- *(rptr++) = toupper(*(sptr++));
-
- /* Translate the rest of the input string into result. The following
- transformations are used:
-
- 1) All vowles, W, and H, are skipped.
-
- 2) BFPV = 1
- CGJKQSXZ = 2
- DT = 3
- L = 4
- MN = 5
-
- 3) Only translate the first of adjacent equal translations. I.E.
- remove duplicate digits.
- */
-
- for(;(rptr - result) < 6 && *sptr != NULL;sptr++)
- {
- switch (toupper(*sptr))
- {
- case 'W':
- case 'H':
- case 'A':
- case 'I':
- case 'O':
- case 'U':
- case 'Y':
- break;
- case 'B':
- case 'F':
- case 'P':
- case 'V':
- if(*(rptr - 1) != '1')
- *(rptr++) = '1';
- break;
- case 'C':
- case 'G':
- case 'J':
- case 'K':
- case 'Q':
- case 'S':
- case 'X':
- case 'Z':
- if(*(rptr - 1) != '2')
- *(rptr++) = '2';
- break;
- case 'D':
- case 'T':
- if(*(rptr - 1) != '3')
- *(rptr++) = '3';
- break;
- case 'L':
- if(*(rptr - 1) != '4')
- *(rptr++) = '4';
- break;
- case 'M':
- case 'N':
- if(*(rptr - 1) != '5')
- *(rptr++) = '5';
- break;
- default:
- break;
- }
- }
-
- /* Pad 0's on right side of string out to 6 characters.
- */
- for(; rptr < result + 6; rptr++)
- *rptr = '0';
-
- /* Terminate the result string.
- */
- *(result + 6) = NULL;
- }
-
-
- /* Return the actual soundex value. */
- /* Added by Chris Herborth (chrish@qnx.com) */
- static char soundex_get_soundex__doc__[] =
- "Return the (English) Soundex hash value for a string.";
- static PyObject *
- get_soundex(PyObject *self, PyObject *args)
- {
- char *str;
- int retval;
- char sdx[7];
-
- if(!PyArg_ParseTuple( args, "s", &str))
- return NULL;
-
- soundex_hash(str, sdx);
-
- return PyString_FromString(sdx);
- }
-
- static char soundex_sound_similar__doc__[] =
- "Compare two strings to see if they sound similar (English).";
- static PyObject *
- sound_similar(PyObject *self, PyObject *args)
- {
- char *str1, *str2;
- int return_value;
- char res1[7], res2[7];
-
- if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
- return NULL;
-
- soundex_hash(str1, res1);
- soundex_hash(str2, res2);
-
- if(!strcmp(res1,res2))
- return Py_BuildValue("i",1);
- else
- return Py_BuildValue("i",0);
- }
-
- /* Python Method Table.
- */
- static PyMethodDef SoundexMethods[] =
- {
- {"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
- {"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
-
- {NULL, NULL } /* sentinel */
- };
-
-
- /* Register the method table.
- */
- void
- initsoundex()
- {
- (void) Py_InitModule4("soundex",
- SoundexMethods,
- soundex_module__doc__,
- (PyObject *)NULL,
- PYTHON_API_VERSION);
- }
-