1 /* phonetic.c - routines to do phonetic matching */
2 /* $OpenLDAP: pkg/ldap/servers/slapd/phonetic.c,v 1.22.2.3 2008/02/11 23:26:44 kurt Exp $ */
3 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
5 * Copyright 1998-2008 The OpenLDAP Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
16 /* Portions Copyright (c) 1995 Regents of the University of Michigan.
17 * All rights reserved.
19 * Redistribution and use in source and binary forms are permitted
20 * provided that this notice is preserved and that due credit is given
21 * to the University of Michigan at Ann Arbor. The name of the University
22 * may not be used to endorse or promote products derived from this
23 * software without specific prior written permission. This software
24 * is provided ``as is'' without express or implied warranty.
32 #include <ac/string.h>
33 #include <ac/socket.h>
38 #if !defined(SLAPD_METAPHONE) && !defined(SLAPD_PHONETIC)
39 #define SLAPD_METAPHONE
42 #define iswordbreak(x) (!isascii(x) || isspace((unsigned char) (x)) || \
43 ispunct((unsigned char) (x)) || \
44 isdigit((unsigned char) (x)) || (x) == '\0')
54 while ( iswordbreak( *s
) ) {
72 while ( ! iswordbreak( *s
) ) {
76 while ( iswordbreak( *s
) ) {
93 for ( s
= w
; !iswordbreak( *s
); s
++ )
104 #ifndef MAXPHONEMELEN
105 #define MAXPHONEMELEN 4
108 #if defined(SLAPD_PHONETIC)
110 /* lifted from isode-8.0 */
114 char code
, adjacent
, ch
;
117 char phoneme
[MAXPHONEMELEN
+ 1];
120 if ( p
== NULL
|| *p
== '\0' ) {
125 phoneme
[0] = TOUPPER((unsigned char)*p
);
128 for ( i
= 0; i
< 99 && (! iswordbreak(*p
)); p
++ ) {
129 ch
= TOUPPER ((unsigned char)*p
);
138 code
= (adjacent
!= '1') ? '1' : '0';
148 code
= (adjacent
!= '2') ? '2' : '0';
152 code
= (adjacent
!= '3') ? '3' : '0';
155 code
= (adjacent
!= '4') ? '4' : '0';
159 code
= (adjacent
!= '5') ? '5' : '0';
162 code
= (adjacent
!= '6') ? '6' : '0';
171 } else if ( code
!= '0' ) {
172 if ( i
== MAXPHONEMELEN
)
174 adjacent
= phoneme
[i
] = code
;
182 return( ch_strdup( phoneme
) );
185 #elif defined(SLAPD_METAPHONE)
188 * Metaphone was originally developed by Lawrence Philips and
189 * published in the "Computer Language" magazine in 1990.
192 * Metaphone copied from C Gazette, June/July 1991, pp 56-57,
193 * author Gary A. Parker, with changes by Bernard Tiffany of the
194 * University of Michigan, and more changes by Tim Howes of the
195 * University of Michigan.
198 /* Character coding array */
199 static const char vsvfn
[26] = {
200 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2,
201 /* A B C D E F G H I J K L M */
202 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0};
203 /* N O P Q R S T U V W X Y Z */
205 /* Macros to access character coding array */
206 #define vowel(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 1) /* AEIOU */
207 #define same(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 2) /* FJLMNR */
208 #define varson(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 4) /* CGPST */
209 #define frontv(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 8) /* EIY */
210 #define noghf(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 16) /* BDH */
213 phonetic( char *Word
)
215 char *n
, *n_start
, *n_end
; /* pointers to string */
216 char *metaph_end
; /* pointers to metaph */
217 char ntrans
[40]; /* word with uppercase letters */
218 int KSflag
; /* state flag for X -> KS */
219 char buf
[MAXPHONEMELEN
+ 2];
223 * Copy Word to internal buffer, dropping non-alphabetic characters
224 * and converting to upper case
227 for (n
= ntrans
+ 4, n_end
= ntrans
+ 35; !iswordbreak( *Word
) &&
229 if (isalpha((unsigned char)*Word
))
230 *n
++ = TOUPPER((unsigned char)*Word
);
234 if (n
== ntrans
+ 4) {
235 return( ch_strdup( buf
) ); /* Return if null */
237 n_end
= n
; /* Set n_end to end of string */
239 /* ntrans[0] will always be == 0 */
247 *n
= 0; /* Pad with nulls */
248 n
= ntrans
+ 4; /* Assign pointer to start */
250 /* Check for PN, KN, GN, AE, WR, WH, and X at start */
255 /* 'PN', 'KN', 'GN' becomes 'N' */
260 /* 'AE' becomes 'E' */
265 /* 'WR' becomes 'R', and 'WH' to 'H' */
268 else if (*(n
+ 1) == 'H') {
274 /* 'X' becomes 'S' */
280 * Now, loop step through string, stopping at end of string or when
281 * the computed 'metaph' is MAXPHONEMELEN characters long
284 KSflag
= 0; /* state flag for KS translation */
285 for (metaph_end
= Metaph
+ MAXPHONEMELEN
, n_start
= n
;
286 n
<= n_end
&& Metaph
< metaph_end
; n
++) {
291 /* Drop duplicates except for CC */
292 if (*(n
- 1) == *n
&& *n
!= 'C')
294 /* Check for F J L M N R or first letter vowel */
295 if (same(*n
) || (n
== n_start
&& vowel(*n
)))
304 if (n
== (n_end
- 1) && *(n
- 1) != 'M')
310 * X if in -CIA-, -CH- else S if in
311 * -CI-, -CE-, -CY- else dropped if
312 * in -SCI-, -SCE-, -SCY- else K
314 if (*(n
- 1) != 'S' || !frontv(*(n
+ 1))) {
315 if (*(n
+ 1) == 'I' && *(n
+ 2) == 'A')
317 else if (frontv(*(n
+ 1)))
319 else if (*(n
+ 1) == 'H')
320 *Metaph
++ = ((n
== n_start
&& !vowel(*(n
+ 2)))
322 ? (char) 'K' : (char) 'X';
330 * J if in DGE or DGI or DGY else T
332 *Metaph
++ = (*(n
+ 1) == 'G' && frontv(*(n
+ 2)))
333 ? (char) 'J' : (char) 'T';
338 * F if in -GH and not B--GH, D--GH,
339 * -H--GH, -H---GH else dropped if
340 * -GNED, -GN, -DGE-, -DGI-, -DGY-
341 * else J if in -GE-, -GI-, -GY- and
344 if ((*(n
+ 1) != 'J' || vowel(*(n
+ 2))) &&
345 (*(n
+ 1) != 'N' || ((n
+ 1) < n_end
&&
346 (*(n
+ 2) != 'E' || *(n
+ 3) != 'D'))) &&
347 (*(n
- 1) != 'D' || !frontv(*(n
+ 1))))
348 *Metaph
++ = (frontv(*(n
+ 1)) &&
349 *(n
+ 2) != 'G') ? (char) 'G' : (char) 'K';
350 else if (*(n
+ 1) == 'H' && !noghf(*(n
- 3)) &&
357 * H if before a vowel and not after
358 * C, G, P, S, T else dropped
360 if (!varson(*(n
- 1)) && (!vowel(*(n
- 1)) ||
367 * dropped if after C else K
375 * F if before H, else P
377 *Metaph
++ = *(n
+ 1) == 'H' ?
378 (char) 'F' : (char) 'P';
390 * X in -SH-, -SIO- or -SIA- else S
392 *Metaph
++ = (*(n
+ 1) == 'H' ||
393 (*(n
+ 1) == 'I' && (*(n
+ 2) == 'O' ||
395 ? (char) 'X' : (char) 'S';
400 * X in -TIA- or -TIO- else 0 (zero)
401 * before H else dropped if in -TCH-
404 if (*(n
+ 1) == 'I' && (*(n
+ 2) == 'O' ||
407 else if (*(n
+ 1) == 'H')
409 else if (*(n
+ 1) != 'C' || *(n
+ 2) != 'H')
422 * W after a vowel, else dropped
427 * Y unless followed by a vowel
440 *Metaph
++ = 'K'; /* Insert K, then S */
455 *Metaph
= 0; /* Null terminate */
456 return( ch_strdup( buf
) );
459 #endif /* SLAPD_METAPHONE */