4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** The code in this file implements a compact but reasonably
14 ** efficient regular-expression matcher for posix extended regular
15 ** expressions against UTF8 text.
17 ** This file is an SQLite extension. It registers a single function
18 ** named "regexp(A,B)" where A is the regular expression and B is the
19 ** string to be matched. By registering this function, SQLite will also
20 ** then implement the "B regexp A" operator. Note that with the function
21 ** the regular expression comes first, but with the operator it comes
24 ** The following regular expression syntax is supported:
26 ** X* zero or more occurrences of X
27 ** X+ one or more occurrences of X
28 ** X? zero or one occurrences of X
29 ** X{p,q} between p and q occurrences of X
32 ** ^X X occurring at the beginning of the string
33 ** X$ X occurring at the end of the string
34 ** . Match any single character
35 ** \c Character c where c is one of \{}()[]|*+?.
36 ** \c C-language escapes for c in afnrtv. ex: \t or \n
37 ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
38 ** \xXX Where XX is exactly 2 hex digits, unicode value XX
39 ** [abc] Any single character from the set abc
40 ** [^abc] Any single character not in the set abc
41 ** [a-z] Any single character in the range a-z
42 ** [^a-z] Any single character not in the range a-z
44 ** \w Word character. [A-Za-z0-9_]
45 ** \W Non-word character
48 ** \s Whitespace character
49 ** \S Non-whitespace character
51 ** A nondeterministic finite automaton (NFA) is used for matching, so the
52 ** performance is bounded by O(N*M) where N is the size of the regular
53 ** expression and M is the size of the input string. The matcher never
54 ** exhibits exponential behavior. Note that the X{p,q} operator expands
55 ** to p copies of X following by q-p copies of X? and that the size of the
56 ** regular expression in the O(N*M) performance bound is computed after
61 #include "sqlite3ext.h"
62 SQLITE_EXTENSION_INIT1
65 ** The following #defines change the names of some functions implemented in
66 ** this file to prevent name collisions with C-library functions of the
69 #define re_match sqlite3re_match
70 #define re_compile sqlite3re_compile
71 #define re_free sqlite3re_free
73 /* The end-of-input character */
74 #define RE_EOF 0 /* End of input */
75 #define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */
77 /* The NFA is implemented as sequence of opcodes taken from the following
78 ** set. Each opcode has a single integer argument.
80 #define RE_OP_MATCH 1 /* Match the one character in the argument */
81 #define RE_OP_ANY 2 /* Match any one character. (Implements ".") */
82 #define RE_OP_ANYSTAR 3 /* Special optimized version of .* */
83 #define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */
84 #define RE_OP_GOTO 5 /* Jump to opcode at iArg */
85 #define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */
86 #define RE_OP_CC_INC 7 /* Beginning of a [...] character class */
87 #define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */
88 #define RE_OP_CC_VALUE 9 /* Single value in a character class */
89 #define RE_OP_CC_RANGE 10 /* Range of values in a character class */
90 #define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */
91 #define RE_OP_NOTWORD 12 /* Not a perl word character */
92 #define RE_OP_DIGIT 13 /* digit: [0-9] */
93 #define RE_OP_NOTDIGIT 14 /* Not a digit */
94 #define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */
95 #define RE_OP_NOTSPACE 16 /* Not a digit */
96 #define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
97 #define RE_OP_ATSTART 18 /* Currently at the start of the string */
99 #if defined(SQLITE_DEBUG)
100 /* Opcode names used for symbolic debugging */
101 static const char *ReOpName
[] = {
122 #endif /* SQLITE_DEBUG */
125 /* Each opcode is a "state" in the NFA */
126 typedef unsigned short ReStateNumber
;
128 /* Because this is an NFA and not a DFA, multiple states can be active at
129 ** once. An instance of the following object records all active states in
130 ** the NFA. The implementation is optimized for the common case where the
131 ** number of actives states is small.
133 typedef struct ReStateSet
{
134 unsigned nState
; /* Number of current states */
135 ReStateNumber
*aState
; /* Current states */
138 /* An input string read one character at a time.
140 typedef struct ReInput ReInput
;
142 const unsigned char *z
; /* All text */
143 int i
; /* Next byte to read */
144 int mx
; /* EOF when i>=mx */
147 /* A compiled NFA (or an NFA that is in the process of being compiled) is
148 ** an instance of the following object.
150 typedef struct ReCompiled ReCompiled
;
152 ReInput sIn
; /* Regular expression text */
153 const char *zErr
; /* Error message to return */
154 char *aOp
; /* Operators for the virtual machine */
155 int *aArg
; /* Arguments to each operator */
156 unsigned (*xNextChar
)(ReInput
*); /* Next character function */
157 unsigned char zInit
[12]; /* Initial text to match */
158 int nInit
; /* Number of bytes in zInit */
159 unsigned nState
; /* Number of entries in aOp[] and aArg[] */
160 unsigned nAlloc
; /* Slots allocated for aOp[] and aArg[] */
163 /* Add a state to the given state set if it is not already there */
164 static void re_add_state(ReStateSet
*pSet
, int newState
){
166 for(i
=0; i
<pSet
->nState
; i
++) if( pSet
->aState
[i
]==newState
) return;
167 pSet
->aState
[pSet
->nState
++] = (ReStateNumber
)newState
;
170 /* Extract the next unicode character from *pzIn and return it. Advance
171 ** *pzIn to the first byte past the end of the character returned. To
172 ** be clear: this routine converts utf8 to unicode. This routine is
173 ** optimized for the common case where the next character is a single byte.
175 static unsigned re_next_char(ReInput
*p
){
177 if( p
->i
>=p
->mx
) return 0;
180 if( (c
&0xe0)==0xc0 && p
->i
<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80 ){
181 c
= (c
&0x1f)<<6 | (p
->z
[p
->i
++]&0x3f);
182 if( c
<0x80 ) c
= 0xfffd;
183 }else if( (c
&0xf0)==0xe0 && p
->i
+1<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
184 && (p
->z
[p
->i
+1]&0xc0)==0x80 ){
185 c
= (c
&0x0f)<<12 | ((p
->z
[p
->i
]&0x3f)<<6) | (p
->z
[p
->i
+1]&0x3f);
187 if( c
<=0x7ff || (c
>=0xd800 && c
<=0xdfff) ) c
= 0xfffd;
188 }else if( (c
&0xf8)==0xf0 && p
->i
+2<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
189 && (p
->z
[p
->i
+1]&0xc0)==0x80 && (p
->z
[p
->i
+2]&0xc0)==0x80 ){
190 c
= (c
&0x07)<<18 | ((p
->z
[p
->i
]&0x3f)<<12) | ((p
->z
[p
->i
+1]&0x3f)<<6)
191 | (p
->z
[p
->i
+2]&0x3f);
193 if( c
<=0xffff || c
>0x10ffff ) c
= 0xfffd;
200 static unsigned re_next_char_nocase(ReInput
*p
){
201 unsigned c
= re_next_char(p
);
202 if( c
>='A' && c
<='Z' ) c
+= 'a' - 'A';
206 /* Return true if c is a perl "word" character: [A-Za-z0-9_] */
207 static int re_word_char(int c
){
208 return (c
>='0' && c
<='9') || (c
>='a' && c
<='z')
209 || (c
>='A' && c
<='Z') || c
=='_';
212 /* Return true if c is a "digit" character: [0-9] */
213 static int re_digit_char(int c
){
214 return (c
>='0' && c
<='9');
217 /* Return true if c is a perl "space" character: [ \t\r\n\v\f] */
218 static int re_space_char(int c
){
219 return c
==' ' || c
=='\t' || c
=='\n' || c
=='\r' || c
=='\v' || c
=='\f';
222 /* Run a compiled regular expression on the zero-terminated input
223 ** string zIn[]. Return true on a match and false if there is no match.
225 static int re_match(ReCompiled
*pRe
, const unsigned char *zIn
, int nIn
){
226 ReStateSet aStateSet
[2], *pThis
, *pNext
;
227 ReStateNumber aSpace
[100];
228 ReStateNumber
*pToFree
;
230 unsigned int iSwap
= 0;
238 in
.mx
= nIn
>=0 ? nIn
: (int)strlen((char const*)zIn
);
240 /* Look for the initial prefix match, if there is one. */
242 unsigned char x
= pRe
->zInit
[0];
243 while( in
.i
+pRe
->nInit
<=in
.mx
245 strncmp((const char*)zIn
+in
.i
, (const char*)pRe
->zInit
, pRe
->nInit
)!=0)
249 if( in
.i
+pRe
->nInit
>in
.mx
) return 0;
253 if( pRe
->nState
<=(sizeof(aSpace
)/(sizeof(aSpace
[0])*2)) ){
255 aStateSet
[0].aState
= aSpace
;
257 pToFree
= sqlite3_malloc64( sizeof(ReStateNumber
)*2*pRe
->nState
);
258 if( pToFree
==0 ) return -1;
259 aStateSet
[0].aState
= pToFree
;
261 aStateSet
[1].aState
= &aStateSet
[0].aState
[pRe
->nState
];
262 pNext
= &aStateSet
[1];
264 re_add_state(pNext
, 0);
265 while( c
!=RE_EOF
&& pNext
->nState
>0 ){
267 c
= pRe
->xNextChar(&in
);
269 pNext
= &aStateSet
[iSwap
];
272 for(i
=0; i
<pThis
->nState
; i
++){
273 int x
= pThis
->aState
[i
];
274 switch( pRe
->aOp
[x
] ){
276 if( pRe
->aArg
[x
]==c
) re_add_state(pNext
, x
+1);
279 case RE_OP_ATSTART
: {
280 if( cPrev
==RE_START
) re_add_state(pThis
, x
+1);
284 if( c
!=0 ) re_add_state(pNext
, x
+1);
288 if( re_word_char(c
) ) re_add_state(pNext
, x
+1);
291 case RE_OP_NOTWORD
: {
292 if( !re_word_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
296 if( re_digit_char(c
) ) re_add_state(pNext
, x
+1);
299 case RE_OP_NOTDIGIT
: {
300 if( !re_digit_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
304 if( re_space_char(c
) ) re_add_state(pNext
, x
+1);
307 case RE_OP_NOTSPACE
: {
308 if( !re_space_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
311 case RE_OP_BOUNDARY
: {
312 if( re_word_char(c
)!=re_word_char(cPrev
) ) re_add_state(pThis
, x
+1);
315 case RE_OP_ANYSTAR
: {
316 re_add_state(pNext
, x
);
317 re_add_state(pThis
, x
+1);
321 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
322 re_add_state(pThis
, x
+1);
326 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
335 /* fall-through */ goto re_op_cc_inc
;
337 case RE_OP_CC_INC
: re_op_cc_inc
: {
339 int n
= pRe
->aArg
[x
];
341 for(j
=1; j
>0 && j
<n
; j
++){
342 if( pRe
->aOp
[x
+j
]==RE_OP_CC_VALUE
){
343 if( pRe
->aArg
[x
+j
]==c
){
348 if( pRe
->aArg
[x
+j
]<=c
&& pRe
->aArg
[x
+j
+1]>=c
){
356 if( pRe
->aOp
[x
]==RE_OP_CC_EXC
) hit
= !hit
;
357 if( hit
) re_add_state(pNext
, x
+n
);
363 for(i
=0; i
<pNext
->nState
; i
++){
364 int x
= pNext
->aState
[i
];
365 while( pRe
->aOp
[x
]==RE_OP_GOTO
) x
+= pRe
->aArg
[x
];
366 if( pRe
->aOp
[x
]==RE_OP_ACCEPT
){ rc
= 1; break; }
369 sqlite3_free(pToFree
);
373 /* Resize the opcode and argument arrays for an RE under construction.
375 static int re_resize(ReCompiled
*p
, int N
){
378 aOp
= sqlite3_realloc64(p
->aOp
, N
*sizeof(p
->aOp
[0]));
379 if( aOp
==0 ) return 1;
381 aArg
= sqlite3_realloc64(p
->aArg
, N
*sizeof(p
->aArg
[0]));
382 if( aArg
==0 ) return 1;
388 /* Insert a new opcode and argument into an RE under construction. The
389 ** insertion point is just prior to existing opcode iBefore.
391 static int re_insert(ReCompiled
*p
, int iBefore
, int op
, int arg
){
393 if( p
->nAlloc
<=p
->nState
&& re_resize(p
, p
->nAlloc
*2) ) return 0;
394 for(i
=p
->nState
; i
>iBefore
; i
--){
395 p
->aOp
[i
] = p
->aOp
[i
-1];
396 p
->aArg
[i
] = p
->aArg
[i
-1];
399 p
->aOp
[iBefore
] = (char)op
;
400 p
->aArg
[iBefore
] = arg
;
404 /* Append a new opcode and argument to the end of the RE under construction.
406 static int re_append(ReCompiled
*p
, int op
, int arg
){
407 return re_insert(p
, p
->nState
, op
, arg
);
410 /* Make a copy of N opcodes starting at iStart onto the end of the RE
411 ** under construction.
413 static void re_copy(ReCompiled
*p
, int iStart
, int N
){
414 if( p
->nState
+N
>=p
->nAlloc
&& re_resize(p
, p
->nAlloc
*2+N
) ) return;
415 memcpy(&p
->aOp
[p
->nState
], &p
->aOp
[iStart
], N
*sizeof(p
->aOp
[0]));
416 memcpy(&p
->aArg
[p
->nState
], &p
->aArg
[iStart
], N
*sizeof(p
->aArg
[0]));
420 /* Return true if c is a hexadecimal digit character: [0-9a-fA-F]
421 ** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If
422 ** c is not a hex digit *pV is unchanged.
424 static int re_hex(int c
, int *pV
){
425 if( c
>='0' && c
<='9' ){
427 }else if( c
>='a' && c
<='f' ){
429 }else if( c
>='A' && c
<='F' ){
434 *pV
= (*pV
)*16 + (c
& 0xff);
438 /* A backslash character has been seen, read the next character and
439 ** return its interpretation.
441 static unsigned re_esc_char(ReCompiled
*p
){
442 static const char zEsc
[] = "afnrtv\\()*.+?[$^{|}]";
443 static const char zTrans
[] = "\a\f\n\r\t\v";
446 if( p
->sIn
.i
>=p
->sIn
.mx
) return 0;
447 c
= p
->sIn
.z
[p
->sIn
.i
];
448 if( c
=='u' && p
->sIn
.i
+4<p
->sIn
.mx
){
449 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
450 if( re_hex(zIn
[1],&v
)
459 if( c
=='x' && p
->sIn
.i
+2<p
->sIn
.mx
){
460 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
461 if( re_hex(zIn
[1],&v
)
468 for(i
=0; zEsc
[i
] && zEsc
[i
]!=c
; i
++){}
470 if( i
<6 ) c
= zTrans
[i
];
473 p
->zErr
= "unknown \\ escape";
478 /* Forward declaration */
479 static const char *re_subcompile_string(ReCompiled
*);
481 /* Peek at the next byte of input */
482 static unsigned char rePeek(ReCompiled
*p
){
483 return p
->sIn
.i
<p
->sIn
.mx
? p
->sIn
.z
[p
->sIn
.i
] : 0;
486 /* Compile RE text into a sequence of opcodes. Continue up to the
487 ** first unmatched ")" character, then return. If an error is found,
488 ** return a pointer to the error message string.
490 static const char *re_subcompile_re(ReCompiled
*p
){
492 int iStart
, iEnd
, iGoto
;
494 zErr
= re_subcompile_string(p
);
495 if( zErr
) return zErr
;
496 while( rePeek(p
)=='|' ){
498 re_insert(p
, iStart
, RE_OP_FORK
, iEnd
+ 2 - iStart
);
499 iGoto
= re_append(p
, RE_OP_GOTO
, 0);
501 zErr
= re_subcompile_string(p
);
502 if( zErr
) return zErr
;
503 p
->aArg
[iGoto
] = p
->nState
- iGoto
;
508 /* Compile an element of regular expression text (anything that can be
509 ** an operand to the "|" operator). Return NULL on success or a pointer
510 ** to the error message if there is a problem.
512 static const char *re_subcompile_string(ReCompiled
*p
){
517 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
526 zErr
= re_subcompile_re(p
);
527 if( zErr
) return zErr
;
528 if( rePeek(p
)!=')' ) return "unmatched '('";
533 if( rePeek(p
)=='*' ){
534 re_append(p
, RE_OP_ANYSTAR
, 0);
537 re_append(p
, RE_OP_ANY
, 0);
542 if( iPrev
<0 ) return "'*' without operand";
543 re_insert(p
, iPrev
, RE_OP_GOTO
, p
->nState
- iPrev
+ 1);
544 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
+ 1);
548 if( iPrev
<0 ) return "'+' without operand";
549 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
);
553 if( iPrev
<0 ) return "'?' without operand";
554 re_insert(p
, iPrev
, RE_OP_FORK
, p
->nState
- iPrev
+1);
558 re_append(p
, RE_OP_MATCH
, RE_EOF
);
562 re_append(p
, RE_OP_ATSTART
, 0);
568 if( iPrev
<0 ) return "'{m,n}' without operand";
569 while( (c
=rePeek(p
))>='0' && c
<='9' ){ m
= m
*10 + c
- '0'; p
->sIn
.i
++; }
574 while( (c
=rePeek(p
))>='0' && c
<='9' ){ n
= n
*10 + c
-'0'; p
->sIn
.i
++; }
576 if( c
!='}' ) return "unmatched '{'";
577 if( n
>0 && n
<m
) return "n less than m in '{m,n}'";
579 sz
= p
->nState
- iPrev
;
581 if( n
==0 ) return "both m and n are zero in '{m,n}'";
582 re_insert(p
, iPrev
, RE_OP_FORK
, sz
+1);
586 for(j
=1; j
<m
; j
++) re_copy(p
, iPrev
, sz
);
589 re_append(p
, RE_OP_FORK
, sz
+1);
590 re_copy(p
, iPrev
, sz
);
593 re_append(p
, RE_OP_FORK
, -sz
);
598 unsigned int iFirst
= p
->nState
;
599 if( rePeek(p
)=='^' ){
600 re_append(p
, RE_OP_CC_EXC
, 0);
603 re_append(p
, RE_OP_CC_INC
, 0);
605 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
606 if( c
=='[' && rePeek(p
)==':' ){
607 return "POSIX character classes not supported";
609 if( c
=='\\' ) c
= re_esc_char(p
);
610 if( rePeek(p
)=='-' ){
611 re_append(p
, RE_OP_CC_RANGE
, c
);
613 c
= p
->xNextChar(&p
->sIn
);
614 if( c
=='\\' ) c
= re_esc_char(p
);
615 re_append(p
, RE_OP_CC_RANGE
, c
);
617 re_append(p
, RE_OP_CC_VALUE
, c
);
619 if( rePeek(p
)==']' ){ p
->sIn
.i
++; break; }
621 if( c
==0 ) return "unclosed '['";
622 if( p
->nState
>iFirst
) p
->aArg
[iFirst
] = p
->nState
- iFirst
;
628 case 'b': specialOp
= RE_OP_BOUNDARY
; break;
629 case 'd': specialOp
= RE_OP_DIGIT
; break;
630 case 'D': specialOp
= RE_OP_NOTDIGIT
; break;
631 case 's': specialOp
= RE_OP_SPACE
; break;
632 case 'S': specialOp
= RE_OP_NOTSPACE
; break;
633 case 'w': specialOp
= RE_OP_WORD
; break;
634 case 'W': specialOp
= RE_OP_NOTWORD
; break;
638 re_append(p
, specialOp
, 0);
641 re_append(p
, RE_OP_MATCH
, c
);
646 re_append(p
, RE_OP_MATCH
, c
);
655 /* Free and reclaim all the memory used by a previously compiled
656 ** regular expression. Applications should invoke this routine once
657 ** for every call to re_compile() to avoid memory leaks.
659 static void re_free(ReCompiled
*pRe
){
661 sqlite3_free(pRe
->aOp
);
662 sqlite3_free(pRe
->aArg
);
668 ** Compile a textual regular expression in zIn[] into a compiled regular
669 ** expression suitable for us by re_match() and return a pointer to the
670 ** compiled regular expression in *ppRe. Return NULL on success or an
671 ** error message if something goes wrong.
673 static const char *re_compile(ReCompiled
**ppRe
, const char *zIn
, int noCase
){
679 pRe
= sqlite3_malloc( sizeof(*pRe
) );
681 return "out of memory";
683 memset(pRe
, 0, sizeof(*pRe
));
684 pRe
->xNextChar
= noCase
? re_next_char_nocase
: re_next_char
;
685 if( re_resize(pRe
, 30) ){
687 return "out of memory";
692 re_append(pRe
, RE_OP_ANYSTAR
, 0);
694 pRe
->sIn
.z
= (unsigned char*)zIn
;
696 pRe
->sIn
.mx
= (int)strlen(zIn
);
697 zErr
= re_subcompile_re(pRe
);
702 if( pRe
->sIn
.i
>=pRe
->sIn
.mx
){
703 re_append(pRe
, RE_OP_ACCEPT
, 0);
707 return "unrecognized character";
710 /* The following is a performance optimization. If the regex begins with
711 ** ".*" (if the input regex lacks an initial "^") and afterwards there are
712 ** one or more matching characters, enter those matching characters into
713 ** zInit[]. The re_match() routine can then search ahead in the input
714 ** string looking for the initial match without having to run the whole
715 ** regex engine over the string. Do not worry about trying to match
716 ** unicode characters beyond plane 0 - those are very rare and this is
717 ** just an optimization. */
718 if( pRe
->aOp
[0]==RE_OP_ANYSTAR
&& !noCase
){
719 for(j
=0, i
=1; j
<(int)sizeof(pRe
->zInit
)-2 && pRe
->aOp
[i
]==RE_OP_MATCH
; i
++){
720 unsigned x
= pRe
->aArg
[i
];
722 pRe
->zInit
[j
++] = (unsigned char)x
;
723 }else if( x
<=0x7ff ){
724 pRe
->zInit
[j
++] = (unsigned char)(0xc0 | (x
>>6));
725 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
726 }else if( x
<=0xffff ){
727 pRe
->zInit
[j
++] = (unsigned char)(0xe0 | (x
>>12));
728 pRe
->zInit
[j
++] = 0x80 | ((x
>>6)&0x3f);
729 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
734 if( j
>0 && pRe
->zInit
[j
-1]==0 ) j
--;
741 ** Implementation of the regexp() SQL function. This function implements
742 ** the build-in REGEXP operator. The first argument to the function is the
743 ** pattern and the second argument is the string. So, the SQL statements:
747 ** is implemented as regexp(B,A).
749 static void re_sql_func(
750 sqlite3_context
*context
,
754 ReCompiled
*pRe
; /* Compiled regular expression */
755 const char *zPattern
; /* The regular expression */
756 const unsigned char *zStr
;/* String being searched */
757 const char *zErr
; /* Compile error message */
758 int setAux
= 0; /* True to invoke sqlite3_set_auxdata() */
760 (void)argc
; /* Unused */
761 pRe
= sqlite3_get_auxdata(context
, 0);
763 zPattern
= (const char*)sqlite3_value_text(argv
[0]);
764 if( zPattern
==0 ) return;
765 zErr
= re_compile(&pRe
, zPattern
, sqlite3_user_data(context
)!=0);
768 sqlite3_result_error(context
, zErr
, -1);
772 sqlite3_result_error_nomem(context
);
777 zStr
= (const unsigned char*)sqlite3_value_text(argv
[1]);
779 sqlite3_result_int(context
, re_match(pRe
, zStr
, -1));
782 sqlite3_set_auxdata(context
, 0, pRe
, (void(*)(void*))re_free
);
786 #if defined(SQLITE_DEBUG)
788 ** This function is used for testing and debugging only. It is only available
789 ** if the SQLITE_DEBUG compile-time option is used.
791 ** Compile a regular expression and then convert the compiled expression into
792 ** text and return that text.
794 static void re_bytecode_func(
795 sqlite3_context
*context
,
799 const char *zPattern
;
808 zPattern
= (const char*)sqlite3_value_text(argv
[0]);
809 if( zPattern
==0 ) return;
810 zErr
= re_compile(&pRe
, zPattern
, sqlite3_user_data(context
)!=0);
813 sqlite3_result_error(context
, zErr
, -1);
817 sqlite3_result_error_nomem(context
);
820 pStr
= sqlite3_str_new(0);
821 if( pStr
==0 ) goto re_bytecode_func_err
;
823 sqlite3_str_appendf(pStr
, "INIT ");
824 for(i
=0; i
<pRe
->nInit
; i
++){
825 sqlite3_str_appendf(pStr
, "%02x", pRe
->zInit
[i
]);
827 sqlite3_str_appendf(pStr
, "\n");
829 for(i
=0; (unsigned)i
<pRe
->nState
; i
++){
830 sqlite3_str_appendf(pStr
, "%-8s %4d\n",
831 ReOpName
[(unsigned char)pRe
->aOp
[i
]], pRe
->aArg
[i
]);
833 n
= sqlite3_str_length(pStr
);
834 z
= sqlite3_str_finish(pStr
);
838 sqlite3_result_text(context
, z
, n
-1, sqlite3_free
);
841 re_bytecode_func_err
:
845 #endif /* SQLITE_DEBUG */
849 ** Invoke this routine to register the regexp() function with the
850 ** SQLite database connection.
853 __declspec(dllexport
)
855 int sqlite3_regexp_init(
858 const sqlite3_api_routines
*pApi
861 SQLITE_EXTENSION_INIT2(pApi
);
862 (void)pzErrMsg
; /* Unused */
863 rc
= sqlite3_create_function(db
, "regexp", 2,
864 SQLITE_UTF8
|SQLITE_INNOCUOUS
|SQLITE_DETERMINISTIC
,
865 0, re_sql_func
, 0, 0);
867 /* The regexpi(PATTERN,STRING) function is a case-insensitive version
868 ** of regexp(PATTERN,STRING). */
869 rc
= sqlite3_create_function(db
, "regexpi", 2,
870 SQLITE_UTF8
|SQLITE_INNOCUOUS
|SQLITE_DETERMINISTIC
,
871 (void*)db
, re_sql_func
, 0, 0);
872 #if defined(SQLITE_DEBUG)
874 rc
= sqlite3_create_function(db
, "regexp_bytecode", 1,
875 SQLITE_UTF8
|SQLITE_INNOCUOUS
|SQLITE_DETERMINISTIC
,
876 0, re_bytecode_func
, 0, 0);
878 #endif /* SQLITE_DEBUG */