4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file is not part of the production FTS code. It is only used for
14 ** testing. It contains a Tcl command that can be used to test if a document
15 ** matches an FTS NEAR expression.
17 ** As of March 2012, it also contains a version 1 tokenizer used for testing
18 ** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
21 #if defined(INCLUDE_SQLITE_TCL_H)
22 # include "sqlite_tcl.h"
25 # ifndef SQLITE_TCLAPI
26 # define SQLITE_TCLAPI
32 #if defined(SQLITE_TEST)
33 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
35 /* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
38 #define NM_MAX_TOKEN 12
40 typedef struct NearPhrase NearPhrase
;
41 typedef struct NearDocument NearDocument
;
42 typedef struct NearToken NearToken
;
45 int nToken
; /* Length of token in bytes */
46 NearToken
*aToken
; /* Token array */
50 int n
; /* Length of token in bytes */
51 const char *z
; /* Pointer to token string */
55 int nNear
; /* Preceding NEAR value */
56 int nToken
; /* Number of tokens in this phrase */
57 NearToken aToken
[NM_MAX_TOKEN
]; /* Array of tokens in this phrase */
60 static int nm_phrase_match(
66 for(ii
=0; ii
<p
->nToken
; ii
++){
67 NearToken
*pToken
= &p
->aToken
[ii
];
68 if( pToken
->n
>0 && pToken
->z
[pToken
->n
-1]=='*' ){
69 if( aToken
[ii
].n
<(pToken
->n
-1) ) return 0;
70 if( memcmp(aToken
[ii
].z
, pToken
->z
, pToken
->n
-1) ) return 0;
72 if( aToken
[ii
].n
!=pToken
->n
) return 0;
73 if( memcmp(aToken
[ii
].z
, pToken
->z
, pToken
->n
) ) return 0;
80 static int nm_near_chain(
81 int iDir
, /* Direction to iterate through aPhrase[] */
82 NearDocument
*pDoc
, /* Document to match against */
83 int iPos
, /* Position at which iPhrase was found */
84 int nPhrase
, /* Size of phrase array */
85 NearPhrase
*aPhrase
, /* Phrase array */
86 int iPhrase
/* Index of phrase found */
96 assert( iDir
==1 || iDir
==-1 );
99 if( (iPhrase
+1)==nPhrase
) return 1;
100 nNear
= aPhrase
[iPhrase
+1].nNear
;
102 if( iPhrase
==0 ) return 1;
103 nNear
= aPhrase
[iPhrase
].nNear
;
105 pPrev
= &aPhrase
[iPhrase
];
106 iPhrase2
= iPhrase
+iDir
;
107 p
= &aPhrase
[iPhrase2
];
109 iStart
= iPos
- nNear
- p
->nToken
;
110 iStop
= iPos
+ nNear
+ pPrev
->nToken
;
112 if( iStart
<0 ) iStart
= 0;
113 if( iStop
> pDoc
->nToken
- p
->nToken
) iStop
= pDoc
->nToken
- p
->nToken
;
115 for(ii
=iStart
; ii
<=iStop
; ii
++){
116 if( nm_phrase_match(p
, &pDoc
->aToken
[ii
]) ){
117 if( nm_near_chain(iDir
, pDoc
, ii
, nPhrase
, aPhrase
, iPhrase2
) ) return 1;
124 static int nm_match_count(
125 NearDocument
*pDoc
, /* Document to match against */
126 int nPhrase
, /* Size of phrase array */
127 NearPhrase
*aPhrase
, /* Phrase array */
128 int iPhrase
/* Index of phrase to count matches for */
132 NearPhrase
*p
= &aPhrase
[iPhrase
];
134 for(ii
=0; ii
<(pDoc
->nToken
+ 1 - p
->nToken
); ii
++){
135 if( nm_phrase_match(p
, &pDoc
->aToken
[ii
]) ){
136 /* Test forward NEAR chain (i>iPhrase) */
137 if( 0==nm_near_chain(1, pDoc
, ii
, nPhrase
, aPhrase
, iPhrase
) ) continue;
139 /* Test reverse NEAR chain (i<iPhrase) */
140 if( 0==nm_near_chain(-1, pDoc
, ii
, nPhrase
, aPhrase
, iPhrase
) ) continue;
142 /* This is a real match. Increment the counter. */
151 ** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
153 static int SQLITE_TCLAPI
fts3_near_match_cmd(
154 ClientData clientData
,
157 Tcl_Obj
*CONST objv
[]
163 NearPhrase
*aPhrase
= 0;
164 NearDocument doc
= {0, 0};
165 Tcl_Obj
**apDocToken
;
167 Tcl_Obj
*pPhrasecount
= 0;
169 Tcl_Obj
**apExprToken
;
172 UNUSED_PARAMETER(clientData
);
174 /* Must have 3 or more arguments. */
175 if( objc
<3 || (objc
%2)==0 ){
176 Tcl_WrongNumArgs(interp
, 1, objv
, "DOCUMENT EXPR ?OPTION VALUE?...");
181 for(ii
=3; ii
<objc
; ii
+=2){
182 enum NM_enum
{ NM_PHRASECOUNTS
};
183 struct TestnmSubcmd
{
187 { "-phrasecountvar", NM_PHRASECOUNTS
},
191 if( Tcl_GetIndexFromObjStruct(
192 interp
, objv
[ii
], aOpt
, sizeof(aOpt
[0]), "option", 0, &iOpt
)
197 switch( aOpt
[iOpt
].eOpt
){
198 case NM_PHRASECOUNTS
:
199 pPhrasecount
= objv
[ii
+1];
204 rc
= Tcl_ListObjGetElements(interp
, objv
[1], &doc
.nToken
, &apDocToken
);
205 if( rc
!=TCL_OK
) goto near_match_out
;
206 doc
.aToken
= (NearToken
*)ckalloc(doc
.nToken
*sizeof(NearToken
));
207 for(ii
=0; ii
<doc
.nToken
; ii
++){
208 doc
.aToken
[ii
].z
= Tcl_GetStringFromObj(apDocToken
[ii
], &doc
.aToken
[ii
].n
);
211 rc
= Tcl_ListObjGetElements(interp
, objv
[2], &nExprToken
, &apExprToken
);
212 if( rc
!=TCL_OK
) goto near_match_out
;
214 nPhrase
= (nExprToken
+ 1) / 2;
215 aPhrase
= (NearPhrase
*)ckalloc(nPhrase
* sizeof(NearPhrase
));
216 memset(aPhrase
, 0, nPhrase
* sizeof(NearPhrase
));
217 for(ii
=0; ii
<nPhrase
; ii
++){
218 Tcl_Obj
*pPhrase
= apExprToken
[ii
*2];
223 rc
= Tcl_ListObjGetElements(interp
, pPhrase
, &nToken
, &apToken
);
224 if( rc
!=TCL_OK
) goto near_match_out
;
225 if( nToken
>NM_MAX_TOKEN
){
226 Tcl_AppendResult(interp
, "Too many tokens in phrase", 0);
230 for(jj
=0; jj
<nToken
; jj
++){
231 NearToken
*pT
= &aPhrase
[ii
].aToken
[jj
];
232 pT
->z
= Tcl_GetStringFromObj(apToken
[jj
], &pT
->n
);
234 aPhrase
[ii
].nToken
= nToken
;
236 for(ii
=1; ii
<nPhrase
; ii
++){
237 Tcl_Obj
*pNear
= apExprToken
[2*ii
-1];
239 rc
= Tcl_GetIntFromObj(interp
, pNear
, &nNear
);
240 if( rc
!=TCL_OK
) goto near_match_out
;
241 aPhrase
[ii
].nNear
= nNear
;
245 Tcl_IncrRefCount(pRet
);
246 for(ii
=0; ii
<nPhrase
; ii
++){
247 int nOcc
= nm_match_count(&doc
, nPhrase
, aPhrase
, ii
);
248 Tcl_ListObjAppendElement(interp
, pRet
, Tcl_NewIntObj(nOcc
));
252 Tcl_ObjSetVar2(interp
, pPhrasecount
, 0, pRet
, 0);
254 Tcl_DecrRefCount(pRet
);
255 Tcl_SetObjResult(interp
, Tcl_NewBooleanObj(nTotal
>0));
258 ckfree((char *)aPhrase
);
259 ckfree((char *)doc
.aToken
);
264 ** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
266 ** Normally, FTS uses hard-coded values to determine the minimum doclist
267 ** size eligible for incremental loading, and the size of the chunks loaded
268 ** when a doclist is incrementally loaded. This command allows the built-in
269 ** values to be overridden for testing purposes.
271 ** If present, the first argument is the chunksize in bytes to load doclists
272 ** in. The second argument is the minimum doclist size in bytes to use
273 ** incremental loading with.
275 ** Whether or not the arguments are present, this command returns a list of
276 ** two integers - the initial chunksize and threshold when the command is
277 ** invoked. This can be used to restore the default behavior after running
278 ** tests. For example:
280 ** # Override incr-load settings for testing:
281 ** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
283 ** .... run tests ....
285 ** # Restore initial incr-load settings:
286 ** eval fts3_configure_incr_load $cfg
288 static int SQLITE_TCLAPI
fts3_configure_incr_load_cmd(
289 ClientData clientData
,
292 Tcl_Obj
*CONST objv
[]
294 #ifdef SQLITE_ENABLE_FTS3
295 extern int test_fts3_node_chunksize
;
296 extern int test_fts3_node_chunk_threshold
;
299 if( objc
!=1 && objc
!=3 ){
300 Tcl_WrongNumArgs(interp
, 1, objv
, "?CHUNKSIZE THRESHOLD?");
305 Tcl_IncrRefCount(pRet
);
306 Tcl_ListObjAppendElement(
307 interp
, pRet
, Tcl_NewIntObj(test_fts3_node_chunksize
));
308 Tcl_ListObjAppendElement(
309 interp
, pRet
, Tcl_NewIntObj(test_fts3_node_chunk_threshold
));
314 if( Tcl_GetIntFromObj(interp
, objv
[1], &iArg1
)
315 || Tcl_GetIntFromObj(interp
, objv
[2], &iArg2
)
317 Tcl_DecrRefCount(pRet
);
320 test_fts3_node_chunksize
= iArg1
;
321 test_fts3_node_chunk_threshold
= iArg2
;
324 Tcl_SetObjResult(interp
, pRet
);
325 Tcl_DecrRefCount(pRet
);
327 UNUSED_PARAMETER(clientData
);
331 #ifdef SQLITE_ENABLE_FTS3
332 /**************************************************************************
333 ** Beginning of test tokenizer code.
335 ** For language 0, this tokenizer is similar to the default 'simple'
336 ** tokenizer. For other languages L, the following:
338 ** * Odd numbered languages are case-sensitive. Even numbered
339 ** languages are not.
341 ** * Language ids 100 or greater are considered an error.
343 ** The implementation assumes that the input contains only ASCII characters
344 ** (i.e. those that may be encoded in UTF-8 using a single byte).
346 typedef struct test_tokenizer
{
347 sqlite3_tokenizer base
;
350 typedef struct test_tokenizer_cursor
{
351 sqlite3_tokenizer_cursor base
;
352 const char *aInput
; /* Input being tokenized */
353 int nInput
; /* Size of the input in bytes */
354 int iInput
; /* Current offset in aInput */
355 int iToken
; /* Index of next token to be returned */
356 char *aBuffer
; /* Buffer containing current token */
357 int nBuffer
; /* Number of bytes allocated at pToken */
358 int iLangid
; /* Configured language id */
359 } test_tokenizer_cursor
;
361 static int testTokenizerCreate(
362 int argc
, const char * const *argv
,
363 sqlite3_tokenizer
**ppTokenizer
365 test_tokenizer
*pNew
;
366 UNUSED_PARAMETER(argc
);
367 UNUSED_PARAMETER(argv
);
369 pNew
= sqlite3_malloc(sizeof(test_tokenizer
));
370 if( !pNew
) return SQLITE_NOMEM
;
371 memset(pNew
, 0, sizeof(test_tokenizer
));
373 *ppTokenizer
= (sqlite3_tokenizer
*)pNew
;
377 static int testTokenizerDestroy(sqlite3_tokenizer
*pTokenizer
){
378 test_tokenizer
*p
= (test_tokenizer
*)pTokenizer
;
383 static int testTokenizerOpen(
384 sqlite3_tokenizer
*pTokenizer
, /* The tokenizer */
385 const char *pInput
, int nBytes
, /* String to be tokenized */
386 sqlite3_tokenizer_cursor
**ppCursor
/* OUT: Tokenization cursor */
388 int rc
= SQLITE_OK
; /* Return code */
389 test_tokenizer_cursor
*pCsr
; /* New cursor object */
391 UNUSED_PARAMETER(pTokenizer
);
393 pCsr
= (test_tokenizer_cursor
*)sqlite3_malloc(sizeof(test_tokenizer_cursor
));
397 memset(pCsr
, 0, sizeof(test_tokenizer_cursor
));
398 pCsr
->aInput
= pInput
;
400 pCsr
->nInput
= (int)strlen(pInput
);
402 pCsr
->nInput
= nBytes
;
406 *ppCursor
= (sqlite3_tokenizer_cursor
*)pCsr
;
410 static int testTokenizerClose(sqlite3_tokenizer_cursor
*pCursor
){
411 test_tokenizer_cursor
*pCsr
= (test_tokenizer_cursor
*)pCursor
;
412 sqlite3_free(pCsr
->aBuffer
);
417 static int testIsTokenChar(char c
){
418 return (c
>='a' && c
<='z') || (c
>='A' && c
<='Z');
420 static int testTolower(char c
){
422 if( ret
>='A' && ret
<='Z') ret
= ret
- ('A'-'a');
426 static int testTokenizerNext(
427 sqlite3_tokenizer_cursor
*pCursor
, /* Cursor returned by testTokenizerOpen */
428 const char **ppToken
, /* OUT: *ppToken is the token text */
429 int *pnBytes
, /* OUT: Number of bytes in token */
430 int *piStartOffset
, /* OUT: Starting offset of token */
431 int *piEndOffset
, /* OUT: Ending offset of token */
432 int *piPosition
/* OUT: Position integer of token */
434 test_tokenizer_cursor
*pCsr
= (test_tokenizer_cursor
*)pCursor
;
439 p
= &pCsr
->aInput
[pCsr
->iInput
];
440 pEnd
= &pCsr
->aInput
[pCsr
->nInput
];
442 /* Skip past any white-space */
444 while( p
<pEnd
&& testIsTokenChar(*p
)==0 ) p
++;
449 /* Advance to the end of the token */
450 const char *pToken
= p
;
451 sqlite3_int64 nToken
;
452 while( p
<pEnd
&& testIsTokenChar(*p
) ) p
++;
453 nToken
= (sqlite3_int64
)(p
-pToken
);
455 /* Copy the token into the buffer */
456 if( nToken
>pCsr
->nBuffer
){
457 sqlite3_free(pCsr
->aBuffer
);
458 pCsr
->aBuffer
= sqlite3_malloc64(nToken
);
460 if( pCsr
->aBuffer
==0 ){
465 if( pCsr
->iLangid
& 0x00000001 ){
466 for(i
=0; i
<nToken
; i
++) pCsr
->aBuffer
[i
] = pToken
[i
];
468 for(i
=0; i
<nToken
; i
++) pCsr
->aBuffer
[i
] = (char)testTolower(pToken
[i
]);
471 pCsr
->iInput
= (int)(p
- pCsr
->aInput
);
473 *ppToken
= pCsr
->aBuffer
;
474 *pnBytes
= (int)nToken
;
475 *piStartOffset
= (int)(pToken
- pCsr
->aInput
);
476 *piEndOffset
= (int)(p
- pCsr
->aInput
);
477 *piPosition
= pCsr
->iToken
;
484 static int testTokenizerLanguage(
485 sqlite3_tokenizer_cursor
*pCursor
,
489 test_tokenizer_cursor
*pCsr
= (test_tokenizer_cursor
*)pCursor
;
490 pCsr
->iLangid
= iLangid
;
491 if( pCsr
->iLangid
>=100 ){
498 static int SQLITE_TCLAPI
fts3_test_tokenizer_cmd(
499 ClientData clientData
,
502 Tcl_Obj
*CONST objv
[]
504 #ifdef SQLITE_ENABLE_FTS3
505 static const sqlite3_tokenizer_module testTokenizerModule
= {
508 testTokenizerDestroy
,
512 testTokenizerLanguage
514 const sqlite3_tokenizer_module
*pPtr
= &testTokenizerModule
;
516 Tcl_WrongNumArgs(interp
, 1, objv
, "");
519 Tcl_SetObjResult(interp
, Tcl_NewByteArrayObj(
520 (const unsigned char *)&pPtr
, sizeof(sqlite3_tokenizer_module
*)
523 UNUSED_PARAMETER(clientData
);
527 static int SQLITE_TCLAPI
fts3_test_varint_cmd(
528 ClientData clientData
,
531 Tcl_Obj
*CONST objv
[]
533 #ifdef SQLITE_ENABLE_FTS3
541 Tcl_WrongNumArgs(interp
, 1, objv
, "INTEGER");
545 rc
= Tcl_GetWideIntFromObj(interp
, objv
[1], &w
);
546 if( rc
!=TCL_OK
) return rc
;
548 nByte
= sqlite3Fts3PutVarint(aBuf
, w
);
549 nByte2
= sqlite3Fts3GetVarint(aBuf
, &w2
);
550 if( w
!=w2
|| nByte
!=nByte2
){
551 char *zErr
= sqlite3_mprintf("error testing %lld", w
);
552 Tcl_ResetResult(interp
);
553 Tcl_AppendResult(interp
, zErr
, 0);
557 if( w
<=2147483647 && w
>=0 ){
559 nByte2
= fts3GetVarint32(aBuf
, &i
);
560 if( (int)w
!=i
|| nByte
!=nByte2
){
561 char *zErr
= sqlite3_mprintf("error testing %lld (32-bit)", w
);
562 Tcl_ResetResult(interp
);
563 Tcl_AppendResult(interp
, zErr
, 0);
569 UNUSED_PARAMETER(clientData
);
574 ** End of tokenizer code.
575 **************************************************************************/
578 ** sqlite3_fts3_may_be_corrupt BOOLEAN
580 ** Set or clear the global "may-be-corrupt" flag. Return the old value.
582 static int SQLITE_TCLAPI
fts3_may_be_corrupt(
586 Tcl_Obj
*CONST objv
[]
589 int bOld
= sqlite3_fts3_may_be_corrupt
;
591 if( objc
!=2 && objc
!=1 ){
592 Tcl_WrongNumArgs(interp
, 1, objv
, "?BOOLEAN?");
597 if( Tcl_GetBooleanFromObj(interp
, objv
[1], &bNew
) ) return TCL_ERROR
;
598 sqlite3_fts3_may_be_corrupt
= bNew
;
601 Tcl_SetObjResult(interp
, Tcl_NewIntObj(bOld
));
606 int Sqlitetestfts3_Init(Tcl_Interp
*interp
){
607 Tcl_CreateObjCommand(interp
, "fts3_near_match", fts3_near_match_cmd
, 0, 0);
608 Tcl_CreateObjCommand(interp
,
609 "fts3_configure_incr_load", fts3_configure_incr_load_cmd
, 0, 0
611 Tcl_CreateObjCommand(
612 interp
, "fts3_test_tokenizer", fts3_test_tokenizer_cmd
, 0, 0
614 Tcl_CreateObjCommand(
615 interp
, "fts3_test_varint", fts3_test_varint_cmd
, 0, 0
617 Tcl_CreateObjCommand(
618 interp
, "sqlite3_fts3_may_be_corrupt", fts3_may_be_corrupt
, 0, 0
622 #endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
623 #endif /* ifdef SQLITE_TEST */