Fix an amazing number of typos & malformed sentences reported by Detlef
[python/dscho.git] / Modules / soundex.c
blob3ef6824892afcb0a6c3b4e9282e764aa57239314
1 /*
2 [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
4 Perform soundex comparisons on strings.
6 Soundex is an algorithm that hashes English strings into numerical value.
7 Strings that sound the same are hashed to the same value. This allows
8 for non-literal string matching.
10 From: David Wayne Williams <dwwillia@iucf.indiana.edu>
12 Apr 29 1996 - added get_soundex method that returns the soundex of a
13 string (chrish@qnx.com)
14 May 2 1996 - added doc strings (chrish@qnx.com)
17 #include <string.h>
18 #include <ctype.h>
19 #include "Python.h"
21 static char soundex_module__doc__[] =
22 "Perform Soundex comparisons on strings, allowing non-literal matching.";
24 static void soundex_hash(char *str, char *result)
26 char *sptr = str; /* pointer into str */
27 char *rptr = result; /* pointer into result */
29 if(*str == '\0')
31 strcpy(result,"000000");
32 return;
35 /* Preserve the first character of the input string.
37 *(rptr++) = toupper(*(sptr++));
39 /* Translate the rest of the input string into result. The following
40 transformations are used:
42 1) All vowels, W, and H, are skipped.
44 2) BFPV = 1
45 CGJKQSXZ = 2
46 DT = 3
47 L = 4
48 MN = 5
49 R = 6
51 3) Only translate the first of adjacent equal translations. I.E.
52 remove duplicate digits.
55 for(;(rptr - result) < 6 && *sptr != '\0';sptr++)
57 switch (toupper(*sptr))
59 case 'W':
60 case 'H':
61 case 'A':
62 case 'I':
63 case 'O':
64 case 'U':
65 case 'Y':
66 break;
67 case 'B':
68 case 'F':
69 case 'P':
70 case 'V':
71 if(*(rptr - 1) != '1')
72 *(rptr++) = '1';
73 break;
74 case 'C':
75 case 'G':
76 case 'J':
77 case 'K':
78 case 'Q':
79 case 'S':
80 case 'X':
81 case 'Z':
82 if(*(rptr - 1) != '2')
83 *(rptr++) = '2';
84 break;
85 case 'D':
86 case 'T':
87 if(*(rptr - 1) != '3')
88 *(rptr++) = '3';
89 break;
90 case 'L':
91 if(*(rptr - 1) != '4')
92 *(rptr++) = '4';
93 break;
94 case 'M':
95 case 'N':
96 if(*(rptr - 1) != '5')
97 *(rptr++) = '5';
98 break;
99 case 'R':
100 if(*(rptr -1) != '6')
101 *(rptr++) = '6';
102 default:
103 break;
107 /* Pad 0's on right side of string out to 6 characters.
109 for(; rptr < result + 6; rptr++)
110 *rptr = '0';
112 /* Terminate the result string.
114 *(result + 6) = '\0';
118 /* Return the actual soundex value. */
119 /* Added by Chris Herborth (chrish@qnx.com) */
120 static char soundex_get_soundex__doc__[] =
121 "Return the (English) Soundex hash value for a string.";
122 static PyObject *
123 get_soundex(PyObject *self, PyObject *args)
125 char *str;
126 char sdx[7];
128 if(!PyArg_ParseTuple( args, "s", &str))
129 return NULL;
131 soundex_hash(str, sdx);
133 return PyString_FromString(sdx);
136 static char soundex_sound_similar__doc__[] =
137 "Compare two strings to see if they sound similar (English).";
138 static PyObject *
139 sound_similar(PyObject *self, PyObject *args)
141 char *str1, *str2;
142 char res1[7], res2[7];
144 if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
145 return NULL;
147 soundex_hash(str1, res1);
148 soundex_hash(str2, res2);
150 if(!strcmp(res1,res2))
151 return Py_BuildValue("i",1);
152 else
153 return Py_BuildValue("i",0);
156 /* Python Method Table.
158 static PyMethodDef SoundexMethods[] =
160 {"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
161 {"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
163 {NULL, NULL } /* sentinel */
167 /* Register the method table.
169 DL_EXPORT(void)
170 initsoundex()
172 (void) Py_InitModule4("soundex",
173 SoundexMethods,
174 soundex_module__doc__,
175 (PyObject *)NULL,
176 PYTHON_API_VERSION);