4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** The code in this file implements a compact but reasonably
14 ** efficient regular-expression matcher for posix extended regular
15 ** expressions against UTF8 text.
17 ** This file is an SQLite extension. It registers a single function
18 ** named "regexp(A,B)" where A is the regular expression and B is the
19 ** string to be matched. By registering this function, SQLite will also
20 ** then implement the "B regexp A" operator. Note that with the function
21 ** the regular expression comes first, but with the operator it comes
24 ** The following regular expression syntax is supported:
26 ** X* zero or more occurrences of X
27 ** X+ one or more occurrences of X
28 ** X? zero or one occurrences of X
29 ** X{p,q} between p and q occurrences of X
32 ** ^X X occurring at the beginning of the string
33 ** X$ X occurring at the end of the string
34 ** . Match any single character
35 ** \c Character c where c is one of \{}()[]|*+?.
36 ** \c C-language escapes for c in afnrtv. ex: \t or \n
37 ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
38 ** \xXX Where XX is exactly 2 hex digits, unicode value XX
39 ** [abc] Any single character from the set abc
40 ** [^abc] Any single character not in the set abc
41 ** [a-z] Any single character in the range a-z
42 ** [^a-z] Any single character not in the range a-z
44 ** \w Word character. [A-Za-z0-9_]
45 ** \W Non-word character
48 ** \s Whitespace character
49 ** \S Non-whitespace character
51 ** A nondeterministic finite automaton (NFA) is used for matching, so the
52 ** performance is bounded by O(N*M) where N is the size of the regular
53 ** expression and M is the size of the input string. The matcher never
54 ** exhibits exponential behavior. Note that the X{p,q} operator expands
55 ** to p copies of X following by q-p copies of X? and that the size of the
56 ** regular expression in the O(N*M) performance bound is computed after
61 #include "sqlite3ext.h"
62 SQLITE_EXTENSION_INIT1
65 ** The following #defines change the names of some functions implemented in
66 ** this file to prevent name collisions with C-library functions of the
69 #define re_match sqlite3re_match
70 #define re_compile sqlite3re_compile
71 #define re_free sqlite3re_free
73 /* The end-of-input character */
74 #define RE_EOF 0 /* End of input */
76 /* The NFA is implemented as sequence of opcodes taken from the following
77 ** set. Each opcode has a single integer argument.
79 #define RE_OP_MATCH 1 /* Match the one character in the argument */
80 #define RE_OP_ANY 2 /* Match any one character. (Implements ".") */
81 #define RE_OP_ANYSTAR 3 /* Special optimized version of .* */
82 #define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */
83 #define RE_OP_GOTO 5 /* Jump to opcode at iArg */
84 #define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */
85 #define RE_OP_CC_INC 7 /* Beginning of a [...] character class */
86 #define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */
87 #define RE_OP_CC_VALUE 9 /* Single value in a character class */
88 #define RE_OP_CC_RANGE 10 /* Range of values in a character class */
89 #define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */
90 #define RE_OP_NOTWORD 12 /* Not a perl word character */
91 #define RE_OP_DIGIT 13 /* digit: [0-9] */
92 #define RE_OP_NOTDIGIT 14 /* Not a digit */
93 #define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */
94 #define RE_OP_NOTSPACE 16 /* Not a digit */
95 #define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
97 /* Each opcode is a "state" in the NFA */
98 typedef unsigned short ReStateNumber
;
100 /* Because this is an NFA and not a DFA, multiple states can be active at
101 ** once. An instance of the following object records all active states in
102 ** the NFA. The implementation is optimized for the common case where the
103 ** number of actives states is small.
105 typedef struct ReStateSet
{
106 unsigned nState
; /* Number of current states */
107 ReStateNumber
*aState
; /* Current states */
110 /* An input string read one character at a time.
112 typedef struct ReInput ReInput
;
114 const unsigned char *z
; /* All text */
115 int i
; /* Next byte to read */
116 int mx
; /* EOF when i>=mx */
119 /* A compiled NFA (or an NFA that is in the process of being compiled) is
120 ** an instance of the following object.
122 typedef struct ReCompiled ReCompiled
;
124 ReInput sIn
; /* Regular expression text */
125 const char *zErr
; /* Error message to return */
126 char *aOp
; /* Operators for the virtual machine */
127 int *aArg
; /* Arguments to each operator */
128 unsigned (*xNextChar
)(ReInput
*); /* Next character function */
129 unsigned char zInit
[12]; /* Initial text to match */
130 int nInit
; /* Number of characters in zInit */
131 unsigned nState
; /* Number of entries in aOp[] and aArg[] */
132 unsigned nAlloc
; /* Slots allocated for aOp[] and aArg[] */
135 /* Add a state to the given state set if it is not already there */
136 static void re_add_state(ReStateSet
*pSet
, int newState
){
138 for(i
=0; i
<pSet
->nState
; i
++) if( pSet
->aState
[i
]==newState
) return;
139 pSet
->aState
[pSet
->nState
++] = newState
;
142 /* Extract the next unicode character from *pzIn and return it. Advance
143 ** *pzIn to the first byte past the end of the character returned. To
144 ** be clear: this routine converts utf8 to unicode. This routine is
145 ** optimized for the common case where the next character is a single byte.
147 static unsigned re_next_char(ReInput
*p
){
149 if( p
->i
>=p
->mx
) return 0;
152 if( (c
&0xe0)==0xc0 && p
->i
<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80 ){
153 c
= (c
&0x1f)<<6 | (p
->z
[p
->i
++]&0x3f);
154 if( c
<0x80 ) c
= 0xfffd;
155 }else if( (c
&0xf0)==0xe0 && p
->i
+1<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
156 && (p
->z
[p
->i
+1]&0xc0)==0x80 ){
157 c
= (c
&0x0f)<<12 | ((p
->z
[p
->i
]&0x3f)<<6) | (p
->z
[p
->i
+1]&0x3f);
159 if( c
<=0x3ff || (c
>=0xd800 && c
<=0xdfff) ) c
= 0xfffd;
160 }else if( (c
&0xf8)==0xf0 && p
->i
+3<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
161 && (p
->z
[p
->i
+1]&0xc0)==0x80 && (p
->z
[p
->i
+2]&0xc0)==0x80 ){
162 c
= (c
&0x07)<<18 | ((p
->z
[p
->i
]&0x3f)<<12) | ((p
->z
[p
->i
+1]&0x3f)<<6)
163 | (p
->z
[p
->i
+2]&0x3f);
165 if( c
<=0xffff || c
>0x10ffff ) c
= 0xfffd;
172 static unsigned re_next_char_nocase(ReInput
*p
){
173 unsigned c
= re_next_char(p
);
174 if( c
>='A' && c
<='Z' ) c
+= 'a' - 'A';
178 /* Return true if c is a perl "word" character: [A-Za-z0-9_] */
179 static int re_word_char(int c
){
180 return (c
>='0' && c
<='9') || (c
>='a' && c
<='z')
181 || (c
>='A' && c
<='Z') || c
=='_';
184 /* Return true if c is a "digit" character: [0-9] */
185 static int re_digit_char(int c
){
186 return (c
>='0' && c
<='9');
189 /* Return true if c is a perl "space" character: [ \t\r\n\v\f] */
190 static int re_space_char(int c
){
191 return c
==' ' || c
=='\t' || c
=='\n' || c
=='\r' || c
=='\v' || c
=='\f';
194 /* Run a compiled regular expression on the zero-terminated input
195 ** string zIn[]. Return true on a match and false if there is no match.
197 static int re_match(ReCompiled
*pRe
, const unsigned char *zIn
, int nIn
){
198 ReStateSet aStateSet
[2], *pThis
, *pNext
;
199 ReStateNumber aSpace
[100];
200 ReStateNumber
*pToFree
;
202 unsigned int iSwap
= 0;
210 in
.mx
= nIn
>=0 ? nIn
: (int)strlen((char const*)zIn
);
212 /* Look for the initial prefix match, if there is one. */
214 unsigned char x
= pRe
->zInit
[0];
215 while( in
.i
+pRe
->nInit
<=in
.mx
217 strncmp((const char*)zIn
+in
.i
, (const char*)pRe
->zInit
, pRe
->nInit
)!=0)
221 if( in
.i
+pRe
->nInit
>in
.mx
) return 0;
224 if( pRe
->nState
<=(sizeof(aSpace
)/(sizeof(aSpace
[0])*2)) ){
226 aStateSet
[0].aState
= aSpace
;
228 pToFree
= sqlite3_malloc( sizeof(ReStateNumber
)*2*pRe
->nState
);
229 if( pToFree
==0 ) return -1;
230 aStateSet
[0].aState
= pToFree
;
232 aStateSet
[1].aState
= &aStateSet
[0].aState
[pRe
->nState
];
233 pNext
= &aStateSet
[1];
235 re_add_state(pNext
, 0);
236 while( c
!=RE_EOF
&& pNext
->nState
>0 ){
238 c
= pRe
->xNextChar(&in
);
240 pNext
= &aStateSet
[iSwap
];
243 for(i
=0; i
<pThis
->nState
; i
++){
244 int x
= pThis
->aState
[i
];
245 switch( pRe
->aOp
[x
] ){
247 if( pRe
->aArg
[x
]==c
) re_add_state(pNext
, x
+1);
251 re_add_state(pNext
, x
+1);
255 if( re_word_char(c
) ) re_add_state(pNext
, x
+1);
258 case RE_OP_NOTWORD
: {
259 if( !re_word_char(c
) ) re_add_state(pNext
, x
+1);
263 if( re_digit_char(c
) ) re_add_state(pNext
, x
+1);
266 case RE_OP_NOTDIGIT
: {
267 if( !re_digit_char(c
) ) re_add_state(pNext
, x
+1);
271 if( re_space_char(c
) ) re_add_state(pNext
, x
+1);
274 case RE_OP_NOTSPACE
: {
275 if( !re_space_char(c
) ) re_add_state(pNext
, x
+1);
278 case RE_OP_BOUNDARY
: {
279 if( re_word_char(c
)!=re_word_char(cPrev
) ) re_add_state(pThis
, x
+1);
282 case RE_OP_ANYSTAR
: {
283 re_add_state(pNext
, x
);
284 re_add_state(pThis
, x
+1);
288 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
289 re_add_state(pThis
, x
+1);
293 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
303 int n
= pRe
->aArg
[x
];
305 for(j
=1; j
>0 && j
<n
; j
++){
306 if( pRe
->aOp
[x
+j
]==RE_OP_CC_VALUE
){
307 if( pRe
->aArg
[x
+j
]==c
){
312 if( pRe
->aArg
[x
+j
]<=c
&& pRe
->aArg
[x
+j
+1]>=c
){
320 if( pRe
->aOp
[x
]==RE_OP_CC_EXC
) hit
= !hit
;
321 if( hit
) re_add_state(pNext
, x
+n
);
327 for(i
=0; i
<pNext
->nState
; i
++){
328 if( pRe
->aOp
[pNext
->aState
[i
]]==RE_OP_ACCEPT
){ rc
= 1; break; }
331 sqlite3_free(pToFree
);
335 /* Resize the opcode and argument arrays for an RE under construction.
337 static int re_resize(ReCompiled
*p
, int N
){
340 aOp
= sqlite3_realloc(p
->aOp
, N
*sizeof(p
->aOp
[0]));
341 if( aOp
==0 ) return 1;
343 aArg
= sqlite3_realloc(p
->aArg
, N
*sizeof(p
->aArg
[0]));
344 if( aArg
==0 ) return 1;
350 /* Insert a new opcode and argument into an RE under construction. The
351 ** insertion point is just prior to existing opcode iBefore.
353 static int re_insert(ReCompiled
*p
, int iBefore
, int op
, int arg
){
355 if( p
->nAlloc
<=p
->nState
&& re_resize(p
, p
->nAlloc
*2) ) return 0;
356 for(i
=p
->nState
; i
>iBefore
; i
--){
357 p
->aOp
[i
] = p
->aOp
[i
-1];
358 p
->aArg
[i
] = p
->aArg
[i
-1];
361 p
->aOp
[iBefore
] = op
;
362 p
->aArg
[iBefore
] = arg
;
366 /* Append a new opcode and argument to the end of the RE under construction.
368 static int re_append(ReCompiled
*p
, int op
, int arg
){
369 return re_insert(p
, p
->nState
, op
, arg
);
372 /* Make a copy of N opcodes starting at iStart onto the end of the RE
373 ** under construction.
375 static void re_copy(ReCompiled
*p
, int iStart
, int N
){
376 if( p
->nState
+N
>=p
->nAlloc
&& re_resize(p
, p
->nAlloc
*2+N
) ) return;
377 memcpy(&p
->aOp
[p
->nState
], &p
->aOp
[iStart
], N
*sizeof(p
->aOp
[0]));
378 memcpy(&p
->aArg
[p
->nState
], &p
->aArg
[iStart
], N
*sizeof(p
->aArg
[0]));
382 /* Return true if c is a hexadecimal digit character: [0-9a-fA-F]
383 ** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If
384 ** c is not a hex digit *pV is unchanged.
386 static int re_hex(int c
, int *pV
){
387 if( c
>='0' && c
<='9' ){
389 }else if( c
>='a' && c
<='f' ){
391 }else if( c
>='A' && c
<='F' ){
396 *pV
= (*pV
)*16 + (c
& 0xff);
400 /* A backslash character has been seen, read the next character and
401 ** return its interpretation.
403 static unsigned re_esc_char(ReCompiled
*p
){
404 static const char zEsc
[] = "afnrtv\\()*.+?[$^{|}]";
405 static const char zTrans
[] = "\a\f\n\r\t\v";
408 if( p
->sIn
.i
>=p
->sIn
.mx
) return 0;
409 c
= p
->sIn
.z
[p
->sIn
.i
];
410 if( c
=='u' && p
->sIn
.i
+4<p
->sIn
.mx
){
411 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
412 if( re_hex(zIn
[1],&v
)
421 if( c
=='x' && p
->sIn
.i
+2<p
->sIn
.mx
){
422 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
423 if( re_hex(zIn
[1],&v
)
430 for(i
=0; zEsc
[i
] && zEsc
[i
]!=c
; i
++){}
432 if( i
<6 ) c
= zTrans
[i
];
435 p
->zErr
= "unknown \\ escape";
440 /* Forward declaration */
441 static const char *re_subcompile_string(ReCompiled
*);
443 /* Peek at the next byte of input */
444 static unsigned char rePeek(ReCompiled
*p
){
445 return p
->sIn
.i
<p
->sIn
.mx
? p
->sIn
.z
[p
->sIn
.i
] : 0;
448 /* Compile RE text into a sequence of opcodes. Continue up to the
449 ** first unmatched ")" character, then return. If an error is found,
450 ** return a pointer to the error message string.
452 static const char *re_subcompile_re(ReCompiled
*p
){
454 int iStart
, iEnd
, iGoto
;
456 zErr
= re_subcompile_string(p
);
457 if( zErr
) return zErr
;
458 while( rePeek(p
)=='|' ){
460 re_insert(p
, iStart
, RE_OP_FORK
, iEnd
+ 2 - iStart
);
461 iGoto
= re_append(p
, RE_OP_GOTO
, 0);
463 zErr
= re_subcompile_string(p
);
464 if( zErr
) return zErr
;
465 p
->aArg
[iGoto
] = p
->nState
- iGoto
;
470 /* Compile an element of regular expression text (anything that can be
471 ** an operand to the "|" operator). Return NULL on success or a pointer
472 ** to the error message if there is a problem.
474 static const char *re_subcompile_string(ReCompiled
*p
){
479 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
489 zErr
= re_subcompile_re(p
);
490 if( zErr
) return zErr
;
491 if( rePeek(p
)!=')' ) return "unmatched '('";
496 if( rePeek(p
)=='*' ){
497 re_append(p
, RE_OP_ANYSTAR
, 0);
500 re_append(p
, RE_OP_ANY
, 0);
505 if( iPrev
<0 ) return "'*' without operand";
506 re_insert(p
, iPrev
, RE_OP_GOTO
, p
->nState
- iPrev
+ 1);
507 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
+ 1);
511 if( iPrev
<0 ) return "'+' without operand";
512 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
);
516 if( iPrev
<0 ) return "'?' without operand";
517 re_insert(p
, iPrev
, RE_OP_FORK
, p
->nState
- iPrev
+1);
523 if( iPrev
<0 ) return "'{m,n}' without operand";
524 while( (c
=rePeek(p
))>='0' && c
<='9' ){ m
= m
*10 + c
- '0'; p
->sIn
.i
++; }
529 while( (c
=rePeek(p
))>='0' && c
<='9' ){ n
= n
*10 + c
-'0'; p
->sIn
.i
++; }
531 if( c
!='}' ) return "unmatched '{'";
532 if( n
>0 && n
<m
) return "n less than m in '{m,n}'";
534 sz
= p
->nState
- iPrev
;
536 if( n
==0 ) return "both m and n are zero in '{m,n}'";
537 re_insert(p
, iPrev
, RE_OP_FORK
, sz
+1);
540 for(j
=1; j
<m
; j
++) re_copy(p
, iPrev
, sz
);
543 re_append(p
, RE_OP_FORK
, sz
+1);
544 re_copy(p
, iPrev
, sz
);
547 re_append(p
, RE_OP_FORK
, -sz
);
552 int iFirst
= p
->nState
;
553 if( rePeek(p
)=='^' ){
554 re_append(p
, RE_OP_CC_EXC
, 0);
557 re_append(p
, RE_OP_CC_INC
, 0);
559 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
560 if( c
=='[' && rePeek(p
)==':' ){
561 return "POSIX character classes not supported";
563 if( c
=='\\' ) c
= re_esc_char(p
);
564 if( rePeek(p
)=='-' ){
565 re_append(p
, RE_OP_CC_RANGE
, c
);
567 c
= p
->xNextChar(&p
->sIn
);
568 if( c
=='\\' ) c
= re_esc_char(p
);
569 re_append(p
, RE_OP_CC_RANGE
, c
);
571 re_append(p
, RE_OP_CC_VALUE
, c
);
573 if( rePeek(p
)==']' ){ p
->sIn
.i
++; break; }
575 if( c
==0 ) return "unclosed '['";
576 p
->aArg
[iFirst
] = p
->nState
- iFirst
;
582 case 'b': specialOp
= RE_OP_BOUNDARY
; break;
583 case 'd': specialOp
= RE_OP_DIGIT
; break;
584 case 'D': specialOp
= RE_OP_NOTDIGIT
; break;
585 case 's': specialOp
= RE_OP_SPACE
; break;
586 case 'S': specialOp
= RE_OP_NOTSPACE
; break;
587 case 'w': specialOp
= RE_OP_WORD
; break;
588 case 'W': specialOp
= RE_OP_NOTWORD
; break;
592 re_append(p
, specialOp
, 0);
595 re_append(p
, RE_OP_MATCH
, c
);
600 re_append(p
, RE_OP_MATCH
, c
);
609 /* Free and reclaim all the memory used by a previously compiled
610 ** regular expression. Applications should invoke this routine once
611 ** for every call to re_compile() to avoid memory leaks.
613 void re_free(ReCompiled
*pRe
){
615 sqlite3_free(pRe
->aOp
);
616 sqlite3_free(pRe
->aArg
);
622 ** Compile a textual regular expression in zIn[] into a compiled regular
623 ** expression suitable for us by re_match() and return a pointer to the
624 ** compiled regular expression in *ppRe. Return NULL on success or an
625 ** error message if something goes wrong.
627 const char *re_compile(ReCompiled
**ppRe
, const char *zIn
, int noCase
){
633 pRe
= sqlite3_malloc( sizeof(*pRe
) );
635 return "out of memory";
637 memset(pRe
, 0, sizeof(*pRe
));
638 pRe
->xNextChar
= noCase
? re_next_char_nocase
: re_next_char
;
639 if( re_resize(pRe
, 30) ){
641 return "out of memory";
646 re_append(pRe
, RE_OP_ANYSTAR
, 0);
648 pRe
->sIn
.z
= (unsigned char*)zIn
;
650 pRe
->sIn
.mx
= (int)strlen(zIn
);
651 zErr
= re_subcompile_re(pRe
);
656 if( rePeek(pRe
)=='$' && pRe
->sIn
.i
+1>=pRe
->sIn
.mx
){
657 re_append(pRe
, RE_OP_MATCH
, RE_EOF
);
658 re_append(pRe
, RE_OP_ACCEPT
, 0);
660 }else if( pRe
->sIn
.i
>=pRe
->sIn
.mx
){
661 re_append(pRe
, RE_OP_ACCEPT
, 0);
665 return "unrecognized character";
668 /* The following is a performance optimization. If the regex begins with
669 ** ".*" (if the input regex lacks an initial "^") and afterwards there are
670 ** one or more matching characters, enter those matching characters into
671 ** zInit[]. The re_match() routine can then search ahead in the input
672 ** string looking for the initial match without having to run the whole
673 ** regex engine over the string. Do not worry able trying to match
674 ** unicode characters beyond plane 0 - those are very rare and this is
675 ** just an optimization. */
676 if( pRe
->aOp
[0]==RE_OP_ANYSTAR
){
677 for(j
=0, i
=1; j
<sizeof(pRe
->zInit
)-2 && pRe
->aOp
[i
]==RE_OP_MATCH
; i
++){
678 unsigned x
= pRe
->aArg
[i
];
681 }else if( x
<=0xfff ){
682 pRe
->zInit
[j
++] = 0xc0 | (x
>>6);
683 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
684 }else if( x
<=0xffff ){
685 pRe
->zInit
[j
++] = 0xd0 | (x
>>12);
686 pRe
->zInit
[j
++] = 0x80 | ((x
>>6)&0x3f);
687 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
692 if( j
>0 && pRe
->zInit
[j
-1]==0 ) j
--;
699 ** Implementation of the regexp() SQL function. This function implements
700 ** the build-in REGEXP operator. The first argument to the function is the
701 ** pattern and the second argument is the string. So, the SQL statements:
705 ** is implemented as regexp(B,A).
707 static void re_sql_func(
708 sqlite3_context
*context
,
712 ReCompiled
*pRe
; /* Compiled regular expression */
713 const char *zPattern
; /* The regular expression */
714 const unsigned char *zStr
;/* String being searched */
715 const char *zErr
; /* Compile error message */
716 int setAux
= 0; /* True to invoke sqlite3_set_auxdata() */
718 pRe
= sqlite3_get_auxdata(context
, 0);
720 zPattern
= (const char*)sqlite3_value_text(argv
[0]);
721 if( zPattern
==0 ) return;
722 zErr
= re_compile(&pRe
, zPattern
, 0);
725 sqlite3_result_error(context
, zErr
, -1);
729 sqlite3_result_error_nomem(context
);
734 zStr
= (const unsigned char*)sqlite3_value_text(argv
[1]);
736 sqlite3_result_int(context
, re_match(pRe
, zStr
, -1));
739 sqlite3_set_auxdata(context
, 0, pRe
, (void(*)(void*))re_free
);
744 ** Invoke this routine to register the regexp() function with the
745 ** SQLite database connection.
748 __declspec(dllexport
)
750 int sqlite3_regexp_init(
753 const sqlite3_api_routines
*pApi
756 SQLITE_EXTENSION_INIT2(pApi
);
757 rc
= sqlite3_create_function(db
, "regexp", 2, SQLITE_UTF8
, 0,