4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
12 ** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
14 ** This file implements an integration between the ICU library
15 ** ("International Components for Unicode", an open-source library
16 ** for handling unicode data) and SQLite. The integration uses
17 ** ICU to provide the following to SQLite:
19 ** * An implementation of the SQL regexp() function (and hence REGEXP
20 ** operator) using the ICU uregex_XX() APIs.
22 ** * Implementations of the SQL scalar upper() and lower() functions
25 ** * Integration of ICU and SQLite collation sequences.
27 ** * An implementation of the LIKE operator that uses ICU to
28 ** provide case-independent matching.
31 #if !defined(SQLITE_CORE) \
32 || defined(SQLITE_ENABLE_ICU) \
33 || defined(SQLITE_ENABLE_ICU_COLLATIONS)
35 /* Include ICU headers */
36 #include <unicode/utypes.h>
37 #include <unicode/uregex.h>
38 #include <unicode/ustring.h>
39 #include <unicode/ucol.h>
44 #include "sqlite3ext.h"
45 SQLITE_EXTENSION_INIT1
51 ** This function is called when an ICU function called from within
52 ** the implementation of an SQL scalar function returns an error.
54 ** The scalar function context passed as the first argument is
55 ** loaded with an error message based on the following two args.
57 static void icuFunctionError(
58 sqlite3_context
*pCtx
, /* SQLite scalar function context */
59 const char *zName
, /* Name of ICU function that failed */
60 UErrorCode e
/* Error code returned by ICU function */
63 sqlite3_snprintf(128, zBuf
, "ICU error: %s(): %s", zName
, u_errorName(e
));
65 sqlite3_result_error(pCtx
, zBuf
, -1);
68 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
71 ** Maximum length (in bytes) of the pattern in a LIKE or GLOB
74 #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
75 # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
79 ** Version of sqlite3_free() that is always a function, never a macro.
81 static void xFree(void *p
){
86 ** This lookup table is used to help decode the first byte of
87 ** a multi-byte UTF8 character. It is copied here from SQLite source
90 static const unsigned char icuUtf8Trans1
[] = {
91 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
92 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
93 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
94 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
95 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
96 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
97 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
98 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
101 #define SQLITE_ICU_READ_UTF8(zIn, c) \
104 c = icuUtf8Trans1[c-0xc0]; \
105 while( (*zIn & 0xc0)==0x80 ){ \
106 c = (c<<6) + (0x3f & *(zIn++)); \
110 #define SQLITE_ICU_SKIP_UTF8(zIn) \
112 if( *(zIn++)>=0xc0 ){ \
113 while( (*zIn & 0xc0)==0x80 ){zIn++;} \
118 ** Compare two UTF-8 strings for equality where the first string is
119 ** a "LIKE" expression. Return true (1) if they are the same and
120 ** false (0) if they are different.
122 static int icuLikeCompare(
123 const uint8_t *zPattern
, /* LIKE pattern */
124 const uint8_t *zString
, /* The UTF-8 string to compare against */
125 const UChar32 uEsc
/* The escape character */
127 static const uint32_t MATCH_ONE
= (uint32_t)'_';
128 static const uint32_t MATCH_ALL
= (uint32_t)'%';
130 int prevEscape
= 0; /* True if the previous character was uEsc */
134 /* Read (and consume) the next character from the input pattern. */
136 SQLITE_ICU_READ_UTF8(zPattern
, uPattern
);
137 if( uPattern
==0 ) break;
139 /* There are now 4 possibilities:
141 ** 1. uPattern is an unescaped match-all character "%",
142 ** 2. uPattern is an unescaped match-one character "_",
143 ** 3. uPattern is an unescaped escape character, or
144 ** 4. uPattern is to be handled as an ordinary character
146 if( uPattern
==MATCH_ALL
&& !prevEscape
&& uPattern
!=(uint32_t)uEsc
){
150 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
151 ** MATCH_ALL. For each MATCH_ONE, skip one character in the
154 while( (c
=*zPattern
) == MATCH_ALL
|| c
== MATCH_ONE
){
156 if( *zString
==0 ) return 0;
157 SQLITE_ICU_SKIP_UTF8(zString
);
162 if( *zPattern
==0 ) return 1;
165 if( icuLikeCompare(zPattern
, zString
, uEsc
) ){
168 SQLITE_ICU_SKIP_UTF8(zString
);
172 }else if( uPattern
==MATCH_ONE
&& !prevEscape
&& uPattern
!=(uint32_t)uEsc
){
174 if( *zString
==0 ) return 0;
175 SQLITE_ICU_SKIP_UTF8(zString
);
177 }else if( uPattern
==(uint32_t)uEsc
&& !prevEscape
){
184 SQLITE_ICU_READ_UTF8(zString
, uString
);
185 uString
= (uint32_t)u_foldCase((UChar32
)uString
, U_FOLD_CASE_DEFAULT
);
186 uPattern
= (uint32_t)u_foldCase((UChar32
)uPattern
, U_FOLD_CASE_DEFAULT
);
187 if( uString
!=uPattern
){
198 ** Implementation of the like() SQL function. This function implements
199 ** the build-in LIKE operator. The first argument to the function is the
200 ** pattern and the second argument is the string. So, the SQL statements:
204 ** is implemented as like(B, A). If there is an escape character E,
208 ** is mapped to like(B, A, E).
210 static void icuLikeFunc(
211 sqlite3_context
*context
,
215 const unsigned char *zA
= sqlite3_value_text(argv
[0]);
216 const unsigned char *zB
= sqlite3_value_text(argv
[1]);
219 /* Limit the length of the LIKE or GLOB pattern to avoid problems
220 ** of deep recursion and N*N behavior in patternCompare().
222 if( sqlite3_value_bytes(argv
[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH
){
223 sqlite3_result_error(context
, "LIKE or GLOB pattern too complex", -1);
229 /* The escape character string must consist of a single UTF-8 character.
230 ** Otherwise, return an error.
232 int nE
= sqlite3_value_bytes(argv
[2]);
233 const unsigned char *zE
= sqlite3_value_text(argv
[2]);
236 U8_NEXT(zE
, i
, nE
, uEsc
);
238 sqlite3_result_error(context
,
239 "ESCAPE expression must be a single character", -1);
245 sqlite3_result_int(context
, icuLikeCompare(zA
, zB
, uEsc
));
250 ** Function to delete compiled regexp objects. Registered as
251 ** a destructor function with sqlite3_set_auxdata().
253 static void icuRegexpDelete(void *p
){
254 URegularExpression
*pExpr
= (URegularExpression
*)p
;
259 ** Implementation of SQLite REGEXP operator. This scalar function takes
260 ** two arguments. The first is a regular expression pattern to compile
261 ** the second is a string to match against that pattern. If either
262 ** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
263 ** is 1 if the string matches the pattern, or 0 otherwise.
265 ** SQLite maps the regexp() function to the regexp() operator such
266 ** that the following two are equivalent:
268 ** zString REGEXP zPattern
269 ** regexp(zPattern, zString)
271 ** Uses the following ICU regexp APIs:
277 static void icuRegexpFunc(sqlite3_context
*p
, int nArg
, sqlite3_value
**apArg
){
278 UErrorCode status
= U_ZERO_ERROR
;
279 URegularExpression
*pExpr
;
281 const UChar
*zString
= sqlite3_value_text16(apArg
[1]);
283 (void)nArg
; /* Unused parameter */
285 /* If the left hand side of the regexp operator is NULL,
286 ** then the result is also NULL.
292 pExpr
= sqlite3_get_auxdata(p
, 0);
294 const UChar
*zPattern
= sqlite3_value_text16(apArg
[0]);
298 pExpr
= uregex_open(zPattern
, -1, 0, 0, &status
);
300 if( U_SUCCESS(status
) ){
301 sqlite3_set_auxdata(p
, 0, pExpr
, icuRegexpDelete
);
302 pExpr
= sqlite3_get_auxdata(p
, 0);
305 icuFunctionError(p
, "uregex_open", status
);
310 /* Configure the text that the regular expression operates on. */
311 uregex_setText(pExpr
, zString
, -1, &status
);
312 if( !U_SUCCESS(status
) ){
313 icuFunctionError(p
, "uregex_setText", status
);
317 /* Attempt the match */
318 res
= uregex_matches(pExpr
, 0, &status
);
319 if( !U_SUCCESS(status
) ){
320 icuFunctionError(p
, "uregex_matches", status
);
324 /* Set the text that the regular expression operates on to a NULL
325 ** pointer. This is not really necessary, but it is tidier than
326 ** leaving the regular expression object configured with an invalid
327 ** pointer after this function returns.
329 uregex_setText(pExpr
, 0, 0, &status
);
332 sqlite3_result_int(p
, res
? 1 : 0);
336 ** Implementations of scalar functions for case mapping - upper() and
337 ** lower(). Function upper() converts its input to upper-case (ABC).
338 ** Function lower() converts to lower-case (abc).
340 ** ICU provides two types of case mapping, "general" case mapping and
341 ** "language specific". Refer to ICU documentation for the differences
344 ** To utilise "general" case mapping, the upper() or lower() scalar
345 ** functions are invoked with one argument:
347 ** upper('ABC') -> 'abc'
348 ** lower('abc') -> 'ABC'
350 ** To access ICU "language specific" case mapping, upper() or lower()
351 ** should be invoked with two arguments. The second argument is the name
352 ** of the locale to use. Passing an empty string ("") or SQL NULL value
353 ** as the second argument is the same as invoking the 1 argument version
354 ** of upper() or lower().
356 ** lower('I', 'en_us') -> 'i'
357 ** lower('I', 'tr_tr') -> '\u131' (small dotless i)
359 ** http://www.icu-project.org/userguide/posix.html#case_mappings
361 static void icuCaseFunc16(sqlite3_context
*p
, int nArg
, sqlite3_value
**apArg
){
362 const UChar
*zInput
; /* Pointer to input string */
363 UChar
*zOutput
= 0; /* Pointer to output buffer */
364 int nInput
; /* Size of utf-16 input string in bytes */
365 int nOut
; /* Size of output buffer in bytes */
367 int bToUpper
; /* True for toupper(), false for tolower() */
369 const char *zLocale
= 0;
371 assert(nArg
==1 || nArg
==2);
372 bToUpper
= (sqlite3_user_data(p
)!=0);
374 zLocale
= (const char *)sqlite3_value_text(apArg
[1]);
377 zInput
= sqlite3_value_text16(apArg
[0]);
381 nOut
= nInput
= sqlite3_value_bytes16(apArg
[0]);
383 sqlite3_result_text16(p
, "", 0, SQLITE_STATIC
);
387 for(cnt
=0; cnt
<2; cnt
++){
388 UChar
*zNew
= sqlite3_realloc(zOutput
, nOut
);
390 sqlite3_free(zOutput
);
391 sqlite3_result_error_nomem(p
);
395 status
= U_ZERO_ERROR
;
397 nOut
= 2*u_strToUpper(zOutput
,nOut
/2,zInput
,nInput
/2,zLocale
,&status
);
399 nOut
= 2*u_strToLower(zOutput
,nOut
/2,zInput
,nInput
/2,zLocale
,&status
);
402 if( U_SUCCESS(status
) ){
403 sqlite3_result_text16(p
, zOutput
, nOut
, xFree
);
404 }else if( status
==U_BUFFER_OVERFLOW_ERROR
){
408 icuFunctionError(p
, bToUpper
? "u_strToUpper" : "u_strToLower", status
);
412 assert( 0 ); /* Unreachable */
415 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
418 ** Collation sequence destructor function. The pCtx argument points to
419 ** a UCollator structure previously allocated using ucol_open().
421 static void icuCollationDel(void *pCtx
){
422 UCollator
*p
= (UCollator
*)pCtx
;
427 ** Collation sequence comparison function. The pCtx argument points to
428 ** a UCollator structure previously allocated using ucol_open().
430 static int icuCollationColl(
437 UCollationResult res
;
438 UCollator
*p
= (UCollator
*)pCtx
;
439 res
= ucol_strcoll(p
, (UChar
*)zLeft
, nLeft
/2, (UChar
*)zRight
, nRight
/2);
441 case UCOL_LESS
: return -1;
442 case UCOL_GREATER
: return +1;
443 case UCOL_EQUAL
: return 0;
445 assert(!"Unexpected return value from ucol_strcoll()");
450 ** Implementation of the scalar function icu_load_collation().
452 ** This scalar function is used to add ICU collation based collation
453 ** types to an SQLite database connection. It is intended to be called
456 ** SELECT icu_load_collation(<locale>, <collation-name>);
458 ** Where <locale> is a string containing an ICU locale identifier (i.e.
459 ** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
460 ** collation sequence to create.
462 static void icuLoadCollation(
465 sqlite3_value
**apArg
467 sqlite3
*db
= (sqlite3
*)sqlite3_user_data(p
);
468 UErrorCode status
= U_ZERO_ERROR
;
469 const char *zLocale
; /* Locale identifier - (eg. "jp_JP") */
470 const char *zName
; /* SQL Collation sequence name (eg. "japanese") */
471 UCollator
*pUCollator
; /* ICU library collation object */
472 int rc
; /* Return code from sqlite3_create_collation_x() */
475 (void)nArg
; /* Unused parameter */
476 zLocale
= (const char *)sqlite3_value_text(apArg
[0]);
477 zName
= (const char *)sqlite3_value_text(apArg
[1]);
479 if( !zLocale
|| !zName
){
483 pUCollator
= ucol_open(zLocale
, &status
);
484 if( !U_SUCCESS(status
) ){
485 icuFunctionError(p
, "ucol_open", status
);
490 rc
= sqlite3_create_collation_v2(db
, zName
, SQLITE_UTF16
, (void *)pUCollator
,
491 icuCollationColl
, icuCollationDel
494 ucol_close(pUCollator
);
495 sqlite3_result_error(p
, "Error registering collation function", -1);
500 ** Register the ICU extension functions with database db.
502 int sqlite3IcuInit(sqlite3
*db
){
503 # define SQLITEICU_EXTRAFLAGS (SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS)
504 static const struct IcuScalar
{
505 const char *zName
; /* Function name */
506 unsigned char nArg
; /* Number of arguments */
507 unsigned int enc
; /* Optimal text encoding */
508 unsigned char iContext
; /* sqlite3_user_data() context */
509 void (*xFunc
)(sqlite3_context
*,int,sqlite3_value
**);
511 {"icu_load_collation",2,SQLITE_UTF8
|SQLITE_DIRECTONLY
,1, icuLoadCollation
},
512 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
513 {"regexp", 2, SQLITE_ANY
|SQLITEICU_EXTRAFLAGS
, 0, icuRegexpFunc
},
514 {"lower", 1, SQLITE_UTF16
|SQLITEICU_EXTRAFLAGS
, 0, icuCaseFunc16
},
515 {"lower", 2, SQLITE_UTF16
|SQLITEICU_EXTRAFLAGS
, 0, icuCaseFunc16
},
516 {"upper", 1, SQLITE_UTF16
|SQLITEICU_EXTRAFLAGS
, 1, icuCaseFunc16
},
517 {"upper", 2, SQLITE_UTF16
|SQLITEICU_EXTRAFLAGS
, 1, icuCaseFunc16
},
518 {"lower", 1, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 0, icuCaseFunc16
},
519 {"lower", 2, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 0, icuCaseFunc16
},
520 {"upper", 1, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 1, icuCaseFunc16
},
521 {"upper", 2, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 1, icuCaseFunc16
},
522 {"like", 2, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 0, icuLikeFunc
},
523 {"like", 3, SQLITE_UTF8
|SQLITEICU_EXTRAFLAGS
, 0, icuLikeFunc
},
524 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) */
529 for(i
=0; rc
==SQLITE_OK
&& i
<(int)(sizeof(scalars
)/sizeof(scalars
[0])); i
++){
530 const struct IcuScalar
*p
= &scalars
[i
];
531 rc
= sqlite3_create_function(
532 db
, p
->zName
, p
->nArg
, p
->enc
,
533 p
->iContext
? (void*)db
: (void*)0,
543 __declspec(dllexport
)
545 int sqlite3_icu_init(
548 const sqlite3_api_routines
*pApi
550 SQLITE_EXTENSION_INIT2(pApi
)
551 return sqlite3IcuInit(db
);