4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** Code for a demonstration virtual table that generates variations
14 ** on an input word at increasing edit distances from the original.
16 ** A fuzzer virtual table is created like this:
18 ** CREATE VIRTUAL TABLE f USING fuzzer(<fuzzer-data-table>);
20 ** When it is created, the new fuzzer table must be supplied with the
21 ** name of a "fuzzer data table", which must reside in the same database
22 ** file as the new fuzzer table. The fuzzer data table contains the various
23 ** transformations and their costs that the fuzzer logic uses to generate
26 ** The fuzzer data table must contain exactly four columns (more precisely,
27 ** the statement "SELECT * FROM <fuzzer_data_table>" must return records
28 ** that consist of four columns). It does not matter what the columns are
31 ** Each row in the fuzzer data table represents a single character
32 ** transformation. The left most column of the row (column 0) contains an
33 ** integer value - the identifier of the ruleset to which the transformation
34 ** rule belongs (see "MULTIPLE RULE SETS" below). The second column of the
35 ** row (column 0) contains the input character or characters. The third
36 ** column contains the output character or characters. And the fourth column
37 ** contains the integer cost of making the transformation. For example:
39 ** CREATE TABLE f_data(ruleset, cFrom, cTo, Cost);
40 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, '', 'a', 100);
41 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'b', '', 87);
42 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38);
43 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40);
45 ** The first row inserted into the fuzzer data table by the SQL script
46 ** above indicates that the cost of inserting a letter 'a' is 100. (All
47 ** costs are integers. We recommend that costs be scaled so that the
48 ** average cost is around 100.) The second INSERT statement creates a rule
49 ** saying that the cost of deleting a single letter 'b' is 87. The third
50 ** and fourth INSERT statements mean that the cost of transforming a
51 ** single letter "o" into the two-letter sequence "oe" is 38 and that the
52 ** cost of transforming "oe" back into "o" is 40.
54 ** The contents of the fuzzer data table are loaded into main memory when
55 ** a fuzzer table is first created, and may be internally reloaded by the
56 ** system at any subsequent time. Therefore, the fuzzer data table should be
57 ** populated before the fuzzer table is created and not modified thereafter.
58 ** If you do need to modify the contents of the fuzzer data table, it is
59 ** recommended that the associated fuzzer table be dropped, the fuzzer data
60 ** table edited, and the fuzzer table recreated within a single transaction.
61 ** Alternatively, the fuzzer data table can be edited then the database
62 ** connection can be closed and reopened.
64 ** Once it has been created, the fuzzer table can be queried as follows:
66 ** SELECT word, distance FROM f
67 ** WHERE word MATCH 'abcdefg'
70 ** This first query outputs the string "abcdefg" and all strings that
71 ** can be derived from that string by appling the specified transformations.
72 ** The strings are output together with their total transformation cost
73 ** (called "distance") and appear in order of increasing cost. No string
74 ** is output more than once. If there are multiple ways to transform the
75 ** target string into the output string then the lowest cost transform is
76 ** the one that is returned. In the example, the search is limited to
77 ** strings with a total distance of less than 200.
79 ** The fuzzer is a read-only table. Any attempt to DELETE, INSERT, or
80 ** UPDATE on a fuzzer table will throw an error.
82 ** It is important to put some kind of a limit on the fuzzer output. This
83 ** can be either in the form of a LIMIT clause at the end of the query,
84 ** or better, a "distance<NNN" constraint where NNN is some number. The
85 ** running time and memory requirement is exponential in the value of NNN
86 ** so you want to make sure that NNN is not too big. A value of NNN that
87 ** is about twice the average transformation cost seems to give good results.
89 ** The fuzzer table can be useful for tasks such as spelling correction.
90 ** Suppose there is a second table vocabulary(w) where the w column contains
91 ** all correctly spelled words. Let $word be a word you want to look up.
93 ** SELECT vocabulary.w FROM f, vocabulary
94 ** WHERE f.word MATCH $word
95 ** AND f.distance<=200
96 ** AND f.word=vocabulary.w
99 ** The query above gives the 20 closest words to the $word being tested.
100 ** (Note that for good performance, the vocubulary.w column should be
103 ** A similar query can be used to find all words in the dictionary that
104 ** begin with some prefix $prefix:
106 ** SELECT vocabulary.w FROM f, vocabulary
107 ** WHERE f.word MATCH $prefix
108 ** AND f.distance<=200
109 ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
112 ** This last query will show up to 50 words out of the vocabulary that
113 ** match or nearly match the $prefix.
115 ** MULTIPLE RULE SETS
117 ** Normally, the "ruleset" value associated with all character transformations
118 ** in the fuzzer data table is zero. However, if required, the fuzzer table
119 ** allows multiple rulesets to be defined. Each query uses only a single
120 ** ruleset. This allows, for example, a single fuzzer table to support
121 ** multiple languages.
123 ** By default, only the rules from ruleset 0 are used. To specify an
124 ** alternative ruleset, a "ruleset = ?" expression must be added to the
125 ** WHERE clause of a SELECT, where ? is the identifier of the desired
126 ** ruleset. For example:
128 ** SELECT vocabulary.w FROM f, vocabulary
129 ** WHERE f.word MATCH $word
130 ** AND f.distance<=200
131 ** AND f.word=vocabulary.w
132 ** AND f.ruleset=1 -- Specify the ruleset to use here
135 ** If no "ruleset = ?" constraint is specified in the WHERE clause, ruleset
140 ** The maximum ruleset number is 2147483647. The maximum length of either
141 ** of the strings in the second or third column of the fuzzer data table
142 ** is 50 bytes. The maximum cost on a rule is 1000.
144 #include "sqlite3ext.h"
145 SQLITE_EXTENSION_INIT1
147 /* If SQLITE_DEBUG is not defined, disable assert statements. */
148 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
157 #ifndef SQLITE_OMIT_VIRTUALTABLE
160 ** Forward declaration of objects used by this implementation
162 typedef struct fuzzer_vtab fuzzer_vtab
;
163 typedef struct fuzzer_cursor fuzzer_cursor
;
164 typedef struct fuzzer_rule fuzzer_rule
;
165 typedef struct fuzzer_seen fuzzer_seen
;
166 typedef struct fuzzer_stem fuzzer_stem
;
171 ** fuzzer_cost is the "cost" of an edit operation.
173 ** fuzzer_len is the length of a matching string.
175 ** fuzzer_ruleid is an ruleset identifier.
177 typedef int fuzzer_cost
;
178 typedef signed char fuzzer_len
;
179 typedef int fuzzer_ruleid
;
184 #define FUZZER_MX_LENGTH 50 /* Maximum length of a rule string */
185 #define FUZZER_MX_RULEID 2147483647 /* Maximum rule ID */
186 #define FUZZER_MX_COST 1000 /* Maximum single-rule cost */
187 #define FUZZER_MX_OUTPUT_LENGTH 100 /* Maximum length of an output string */
191 ** Each transformation rule is stored as an instance of this object.
192 ** All rules are kept on a linked list sorted by rCost.
195 fuzzer_rule
*pNext
; /* Next rule in order of increasing rCost */
196 char *zFrom
; /* Transform from */
197 fuzzer_cost rCost
; /* Cost of this transformation */
198 fuzzer_len nFrom
, nTo
; /* Length of the zFrom and zTo strings */
199 fuzzer_ruleid iRuleset
; /* The rule set to which this rule belongs */
200 char zTo
[4]; /* Transform to (extra space appended) */
204 ** A stem object is used to generate variants. It is also used to record
205 ** previously generated outputs.
207 ** Every stem is added to a hash table as it is output. Generation of
208 ** duplicate stems is suppressed.
210 ** Active stems (those that might generate new outputs) are kepts on a linked
211 ** list sorted by increasing cost. The cost is the sum of rBaseCost and
215 char *zBasis
; /* Word being fuzzed */
216 const fuzzer_rule
*pRule
; /* Current rule to apply */
217 fuzzer_stem
*pNext
; /* Next stem in rCost order */
218 fuzzer_stem
*pHash
; /* Next stem with same hash on zBasis */
219 fuzzer_cost rBaseCost
; /* Base cost of getting to zBasis */
220 fuzzer_cost rCostX
; /* Precomputed rBaseCost + pRule->rCost */
221 fuzzer_len nBasis
; /* Length of the zBasis string */
222 fuzzer_len n
; /* Apply pRule at this character offset */
226 ** A fuzzer virtual-table object
229 sqlite3_vtab base
; /* Base class - must be first */
230 char *zClassName
; /* Name of this class. Default: "fuzzer" */
231 fuzzer_rule
*pRule
; /* All active rules in this fuzzer */
232 int nCursor
; /* Number of active cursors */
235 #define FUZZER_HASH 4001 /* Hash table size */
236 #define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */
238 /* A fuzzer cursor object */
239 struct fuzzer_cursor
{
240 sqlite3_vtab_cursor base
; /* Base class - must be first */
241 sqlite3_int64 iRowid
; /* The rowid of the current word */
242 fuzzer_vtab
*pVtab
; /* The virtual table this cursor belongs to */
243 fuzzer_cost rLimit
; /* Maximum cost of any term */
244 fuzzer_stem
*pStem
; /* Stem with smallest rCostX */
245 fuzzer_stem
*pDone
; /* Stems already processed to completion */
246 fuzzer_stem
*aQueue
[FUZZER_NQUEUE
]; /* Queue of stems with higher rCostX */
247 int mxQueue
; /* Largest used index in aQueue[] */
248 char *zBuf
; /* Temporary use buffer */
249 int nBuf
; /* Bytes allocated for zBuf */
250 int nStem
; /* Number of stems allocated */
251 int iRuleset
; /* Only process rules from this ruleset */
252 fuzzer_rule nullRule
; /* Null rule used first */
253 fuzzer_stem
*apHash
[FUZZER_HASH
]; /* Hash of previously generated terms */
257 ** The two input rule lists are both sorted in order of increasing
258 ** cost. Merge them together into a single list, sorted by cost, and
259 ** return a pointer to the head of that list.
261 static fuzzer_rule
*fuzzerMergeRules(fuzzer_rule
*pA
, fuzzer_rule
*pB
){
267 if( pA
->rCost
<=pB
->rCost
){
286 ** Statement pStmt currently points to a row in the fuzzer data table. This
287 ** function allocates and populates a fuzzer_rule structure according to
288 ** the content of the row.
290 ** If successful, *ppRule is set to point to the new object and SQLITE_OK
291 ** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point
292 ** to an error message and an SQLite error code returned.
294 static int fuzzerLoadOneRule(
295 fuzzer_vtab
*p
, /* Fuzzer virtual table handle */
296 sqlite3_stmt
*pStmt
, /* Base rule on statements current row */
297 fuzzer_rule
**ppRule
, /* OUT: New rule object */
298 char **pzErr
/* OUT: Error message */
300 sqlite3_int64 iRuleset
= sqlite3_column_int64(pStmt
, 0);
301 const char *zFrom
= (const char *)sqlite3_column_text(pStmt
, 1);
302 const char *zTo
= (const char *)sqlite3_column_text(pStmt
, 2);
303 int nCost
= sqlite3_column_int(pStmt
, 3);
305 int rc
= SQLITE_OK
; /* Return code */
306 int nFrom
; /* Size of string zFrom, in bytes */
307 int nTo
; /* Size of string zTo, in bytes */
308 fuzzer_rule
*pRule
= 0; /* New rule object to return */
310 if( zFrom
==0 ) zFrom
= "";
311 if( zTo
==0 ) zTo
= "";
312 nFrom
= (int)strlen(zFrom
);
313 nTo
= (int)strlen(zTo
);
315 /* Silently ignore null transformations */
316 if( strcmp(zFrom
, zTo
)==0 ){
321 if( nCost
<=0 || nCost
>FUZZER_MX_COST
){
322 *pzErr
= sqlite3_mprintf("%s: cost must be between 1 and %d",
323 p
->zClassName
, FUZZER_MX_COST
327 if( nFrom
>FUZZER_MX_LENGTH
|| nTo
>FUZZER_MX_LENGTH
){
328 *pzErr
= sqlite3_mprintf("%s: maximum string length is %d",
329 p
->zClassName
, FUZZER_MX_LENGTH
333 if( iRuleset
<0 || iRuleset
>FUZZER_MX_RULEID
){
334 *pzErr
= sqlite3_mprintf("%s: ruleset must be between 0 and %d",
335 p
->zClassName
, FUZZER_MX_RULEID
340 pRule
= sqlite3_malloc( sizeof(*pRule
) + nFrom
+ nTo
);
344 memset(pRule
, 0, sizeof(*pRule
));
345 pRule
->zFrom
= &pRule
->zTo
[nTo
+1];
346 pRule
->nFrom
= nFrom
;
347 memcpy(pRule
->zFrom
, zFrom
, nFrom
+1);
348 memcpy(pRule
->zTo
, zTo
, nTo
+1);
350 pRule
->rCost
= nCost
;
351 pRule
->iRuleset
= (int)iRuleset
;
360 ** Load the content of the fuzzer data table into memory.
362 static int fuzzerLoadRules(
363 sqlite3
*db
, /* Database handle */
364 fuzzer_vtab
*p
, /* Virtual fuzzer table to configure */
365 const char *zDb
, /* Database containing rules data */
366 const char *zData
, /* Table containing rules data */
367 char **pzErr
/* OUT: Error message */
369 int rc
= SQLITE_OK
; /* Return code */
370 char *zSql
; /* SELECT used to read from rules table */
371 fuzzer_rule
*pHead
= 0;
373 zSql
= sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb
, zData
);
377 int rc2
; /* finalize() return code */
378 sqlite3_stmt
*pStmt
= 0;
379 rc
= sqlite3_prepare_v2(db
, zSql
, -1, &pStmt
, 0);
381 *pzErr
= sqlite3_mprintf("%s: %s", p
->zClassName
, sqlite3_errmsg(db
));
382 }else if( sqlite3_column_count(pStmt
)!=4 ){
383 *pzErr
= sqlite3_mprintf("%s: %s has %d columns, expected 4",
384 p
->zClassName
, zData
, sqlite3_column_count(pStmt
)
388 while( rc
==SQLITE_OK
&& SQLITE_ROW
==sqlite3_step(pStmt
) ){
389 fuzzer_rule
*pRule
= 0;
390 rc
= fuzzerLoadOneRule(p
, pStmt
, &pRule
, pzErr
);
392 pRule
->pNext
= pHead
;
397 rc2
= sqlite3_finalize(pStmt
);
398 if( rc
==SQLITE_OK
) rc
= rc2
;
402 /* All rules are now in a singly linked list starting at pHead. This
403 ** block sorts them by cost and then sets fuzzer_vtab.pRule to point to
404 ** point to the head of the sorted list.
410 for(i
=0; i
<sizeof(a
)/sizeof(a
[0]); i
++) a
[i
] = 0;
411 while( (pX
= pHead
)!=0 ){
414 for(i
=0; a
[i
] && i
<sizeof(a
)/sizeof(a
[0])-1; i
++){
415 pX
= fuzzerMergeRules(a
[i
], pX
);
418 a
[i
] = fuzzerMergeRules(a
[i
], pX
);
420 for(pX
=a
[0], i
=1; i
<sizeof(a
)/sizeof(a
[0]); i
++){
421 pX
= fuzzerMergeRules(a
[i
], pX
);
423 p
->pRule
= fuzzerMergeRules(p
->pRule
, pX
);
425 /* An error has occurred. Setting p->pRule to point to the head of the
426 ** allocated list ensures that the list will be cleaned up in this case.
428 assert( p
->pRule
==0 );
436 ** This function converts an SQL quoted string into an unquoted string
437 ** and returns a pointer to a buffer allocated using sqlite3_malloc()
438 ** containing the result. The caller should eventually free this buffer
439 ** using sqlite3_free.
448 static char *fuzzerDequote(const char *zIn
){
449 int nIn
; /* Size of input string, in bytes */
450 char *zOut
; /* Output (dequoted) string */
452 nIn
= (int)strlen(zIn
);
453 zOut
= sqlite3_malloc(nIn
+1);
455 char q
= zIn
[0]; /* Quote character (if any ) */
457 if( q
!='[' && q
!= '\'' && q
!='"' && q
!='`' ){
458 memcpy(zOut
, zIn
, nIn
+1);
460 int iOut
= 0; /* Index of next byte to write to output */
461 int iIn
; /* Index of next byte to read from input */
463 if( q
=='[' ) q
= ']';
464 for(iIn
=1; iIn
<nIn
; iIn
++){
465 if( zIn
[iIn
]==q
) iIn
++;
466 zOut
[iOut
++] = zIn
[iIn
];
469 assert( (int)strlen(zOut
)<=nIn
);
475 ** xDisconnect/xDestroy method for the fuzzer module.
477 static int fuzzerDisconnect(sqlite3_vtab
*pVtab
){
478 fuzzer_vtab
*p
= (fuzzer_vtab
*)pVtab
;
479 assert( p
->nCursor
==0 );
481 fuzzer_rule
*pRule
= p
->pRule
;
482 p
->pRule
= pRule
->pNext
;
490 ** xConnect/xCreate method for the fuzzer module. Arguments are:
492 ** argv[0] -> module name ("fuzzer")
493 ** argv[1] -> database name
494 ** argv[2] -> table name
495 ** argv[3] -> fuzzer rule table name
497 static int fuzzerConnect(
500 int argc
, const char *const*argv
,
501 sqlite3_vtab
**ppVtab
,
504 int rc
= SQLITE_OK
; /* Return code */
505 fuzzer_vtab
*pNew
= 0; /* New virtual table */
506 const char *zModule
= argv
[0];
507 const char *zDb
= argv
[1];
510 *pzErr
= sqlite3_mprintf(
511 "%s: wrong number of CREATE VIRTUAL TABLE arguments", zModule
515 int nModule
; /* Length of zModule, in bytes */
517 nModule
= (int)strlen(zModule
);
518 pNew
= sqlite3_malloc( sizeof(*pNew
) + nModule
+ 1);
522 char *zTab
; /* Dequoted name of fuzzer data table */
524 memset(pNew
, 0, sizeof(*pNew
));
525 pNew
->zClassName
= (char*)&pNew
[1];
526 memcpy(pNew
->zClassName
, zModule
, nModule
+1);
528 zTab
= fuzzerDequote(argv
[3]);
532 rc
= fuzzerLoadRules(db
, pNew
, zDb
, zTab
, pzErr
);
537 rc
= sqlite3_declare_vtab(db
, "CREATE TABLE x(word,distance,ruleset)");
540 fuzzerDisconnect((sqlite3_vtab
*)pNew
);
546 *ppVtab
= (sqlite3_vtab
*)pNew
;
551 ** Open a new fuzzer cursor.
553 static int fuzzerOpen(sqlite3_vtab
*pVTab
, sqlite3_vtab_cursor
**ppCursor
){
554 fuzzer_vtab
*p
= (fuzzer_vtab
*)pVTab
;
556 pCur
= sqlite3_malloc( sizeof(*pCur
) );
557 if( pCur
==0 ) return SQLITE_NOMEM
;
558 memset(pCur
, 0, sizeof(*pCur
));
560 *ppCursor
= &pCur
->base
;
566 ** Free all stems in a list.
568 static void fuzzerClearStemList(fuzzer_stem
*pStem
){
570 fuzzer_stem
*pNext
= pStem
->pNext
;
577 ** Free up all the memory allocated by a cursor. Set it rLimit to 0
578 ** to indicate that it is at EOF.
580 static void fuzzerClearCursor(fuzzer_cursor
*pCur
, int clearHash
){
582 fuzzerClearStemList(pCur
->pStem
);
583 fuzzerClearStemList(pCur
->pDone
);
584 for(i
=0; i
<FUZZER_NQUEUE
; i
++) fuzzerClearStemList(pCur
->aQueue
[i
]);
585 pCur
->rLimit
= (fuzzer_cost
)0;
586 if( clearHash
&& pCur
->nStem
){
590 memset(pCur
->aQueue
, 0, sizeof(pCur
->aQueue
));
591 memset(pCur
->apHash
, 0, sizeof(pCur
->apHash
));
597 ** Close a fuzzer cursor.
599 static int fuzzerClose(sqlite3_vtab_cursor
*cur
){
600 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
601 fuzzerClearCursor(pCur
, 0);
602 sqlite3_free(pCur
->zBuf
);
603 pCur
->pVtab
->nCursor
--;
609 ** Compute the current output term for a fuzzer_stem.
611 static int fuzzerRender(
612 fuzzer_stem
*pStem
, /* The stem to be rendered */
613 char **pzBuf
, /* Write results into this buffer. realloc if needed */
614 int *pnBuf
/* Size of the buffer */
616 const fuzzer_rule
*pRule
= pStem
->pRule
;
617 int n
; /* Size of output term without nul-term */
618 char *z
; /* Buffer to assemble output term in */
620 n
= pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
;
622 (*pzBuf
) = sqlite3_realloc((*pzBuf
), n
+100);
623 if( (*pzBuf
)==0 ) return SQLITE_NOMEM
;
629 memcpy(z
, pStem
->zBasis
, pStem
->nBasis
+1);
631 memcpy(z
, pStem
->zBasis
, n
);
632 memcpy(&z
[n
], pRule
->zTo
, pRule
->nTo
);
633 memcpy(&z
[n
+pRule
->nTo
], &pStem
->zBasis
[n
+pRule
->nFrom
],
634 pStem
->nBasis
-n
-pRule
->nFrom
+1);
637 assert( z
[pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
]==0 );
642 ** Compute a hash on zBasis.
644 static unsigned int fuzzerHash(const char *z
){
646 while( *z
){ h
= (h
<<3) ^ (h
>>29) ^ *(z
++); }
647 return h
% FUZZER_HASH
;
651 ** Current cost of a stem
653 static fuzzer_cost
fuzzerCost(fuzzer_stem
*pStem
){
654 return pStem
->rCostX
= pStem
->rBaseCost
+ pStem
->pRule
->rCost
;
659 ** Print a description of a fuzzer_stem on stderr.
661 static void fuzzerStemPrint(
667 fprintf(stderr
, "%s[%s](%d)-->self%s",
669 pStem
->zBasis
, pStem
->rBaseCost
,
675 if( fuzzerRender(pStem
, &zBuf
, &nBuf
)!=SQLITE_OK
) return;
676 fprintf(stderr
, "%s[%s](%d)-->{%s}(%d)%s",
678 pStem
->zBasis
, pStem
->rBaseCost
, zBuf
, pStem
->,
687 ** Return 1 if the string to which the cursor is point has already
688 ** been emitted. Return 0 if not. Return -1 on a memory allocation
691 static int fuzzerSeen(fuzzer_cursor
*pCur
, fuzzer_stem
*pStem
){
693 fuzzer_stem
*pLookup
;
695 if( fuzzerRender(pStem
, &pCur
->zBuf
, &pCur
->nBuf
)==SQLITE_NOMEM
){
698 h
= fuzzerHash(pCur
->zBuf
);
699 pLookup
= pCur
->apHash
[h
];
700 while( pLookup
&& strcmp(pLookup
->zBasis
, pCur
->zBuf
)!=0 ){
701 pLookup
= pLookup
->pHash
;
707 ** If argument pRule is NULL, this function returns false.
709 ** Otherwise, it returns true if rule pRule should be skipped. A rule
710 ** should be skipped if it does not belong to rule-set iRuleset, or if
711 ** applying it to stem pStem would create a string longer than
712 ** FUZZER_MX_OUTPUT_LENGTH bytes.
714 static int fuzzerSkipRule(
715 const fuzzer_rule
*pRule
, /* Determine whether or not to skip this */
716 fuzzer_stem
*pStem
, /* Stem rule may be applied to */
717 int iRuleset
/* Rule-set used by the current query */
720 (pRule
->iRuleset
!=iRuleset
)
721 || (pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
)>FUZZER_MX_OUTPUT_LENGTH
726 ** Advance a fuzzer_stem to its next value. Return 0 if there are
727 ** no more values that can be generated by this fuzzer_stem. Return
728 ** -1 on a memory allocation failure.
730 static int fuzzerAdvance(fuzzer_cursor
*pCur
, fuzzer_stem
*pStem
){
731 const fuzzer_rule
*pRule
;
732 while( (pRule
= pStem
->pRule
)!=0 ){
733 assert( pRule
==&pCur
->nullRule
|| pRule
->iRuleset
==pCur
->iRuleset
);
734 while( pStem
->n
< pStem
->nBasis
- pRule
->nFrom
){
737 || memcmp(&pStem
->zBasis
[pStem
->n
], pRule
->zFrom
, pRule
->nFrom
)==0
739 /* Found a rewrite case. Make sure it is not a duplicate */
740 int rc
= fuzzerSeen(pCur
, pStem
);
741 if( rc
<0 ) return -1;
750 pRule
= pRule
->pNext
;
751 }while( fuzzerSkipRule(pRule
, pStem
, pCur
->iRuleset
) );
752 pStem
->pRule
= pRule
;
753 if( pRule
&& fuzzerCost(pStem
)>pCur
->rLimit
) pStem
->pRule
= 0;
759 ** The two input stem lists are both sorted in order of increasing
760 ** rCostX. Merge them together into a single list, sorted by rCostX, and
761 ** return a pointer to the head of that new list.
763 static fuzzer_stem
*fuzzerMergeStems(fuzzer_stem
*pA
, fuzzer_stem
*pB
){
769 if( pA
->rCostX
<=pB
->rCostX
){
788 ** Load pCur->pStem with the lowest-cost stem. Return a pointer
789 ** to the lowest-cost stem.
791 static fuzzer_stem
*fuzzerLowestCostStem(fuzzer_cursor
*pCur
){
792 fuzzer_stem
*pBest
, *pX
;
796 if( pCur
->pStem
==0 ){
799 for(i
=0; i
<=pCur
->mxQueue
; i
++){
800 pX
= pCur
->aQueue
[i
];
801 if( pX
==0 ) continue;
802 if( pBest
==0 || pBest
->rCostX
>pX
->rCostX
){
808 pCur
->aQueue
[iBest
] = pBest
->pNext
;
817 ** Insert pNew into queue of pending stems. Then find the stem
818 ** with the lowest rCostX and move it into pCur->pStem.
819 ** list. The insert is done such the pNew is in the correct order
820 ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
822 static fuzzer_stem
*fuzzerInsert(fuzzer_cursor
*pCur
, fuzzer_stem
*pNew
){
826 /* If pCur->pStem exists and is greater than pNew, then make pNew
827 ** the new pCur->pStem and insert the old pCur->pStem instead.
829 if( (pX
= pCur
->pStem
)!=0 && pX
->rCostX
>pNew
->rCostX
){
835 /* Insert the new value */
838 for(i
=0; i
<=pCur
->mxQueue
; i
++){
839 if( pCur
->aQueue
[i
] ){
840 pX
= fuzzerMergeStems(pX
, pCur
->aQueue
[i
]);
843 pCur
->aQueue
[i
] = pX
;
847 if( i
>pCur
->mxQueue
){
848 if( i
<FUZZER_NQUEUE
){
850 pCur
->aQueue
[i
] = pX
;
852 assert( pCur
->mxQueue
==FUZZER_NQUEUE
-1 );
853 pX
= fuzzerMergeStems(pX
, pCur
->aQueue
[FUZZER_NQUEUE
-1]);
854 pCur
->aQueue
[FUZZER_NQUEUE
-1] = pX
;
858 return fuzzerLowestCostStem(pCur
);
862 ** Allocate a new fuzzer_stem. Add it to the hash table but do not
863 ** link it into either the pCur->pStem or pCur->pDone lists.
865 static fuzzer_stem
*fuzzerNewStem(
868 fuzzer_cost rBaseCost
874 pNew
= sqlite3_malloc( sizeof(*pNew
) + (int)strlen(zWord
) + 1 );
875 if( pNew
==0 ) return 0;
876 memset(pNew
, 0, sizeof(*pNew
));
877 pNew
->zBasis
= (char*)&pNew
[1];
878 pNew
->nBasis
= (int)strlen(zWord
);
879 memcpy(pNew
->zBasis
, zWord
, pNew
->nBasis
+1);
880 pRule
= pCur
->pVtab
->pRule
;
881 while( fuzzerSkipRule(pRule
, pNew
, pCur
->iRuleset
) ){
882 pRule
= pRule
->pNext
;
886 pNew
->rBaseCost
= pNew
->rCostX
= rBaseCost
;
887 h
= fuzzerHash(pNew
->zBasis
);
888 pNew
->pHash
= pCur
->apHash
[h
];
889 pCur
->apHash
[h
] = pNew
;
896 ** Advance a cursor to its next row of output
898 static int fuzzerNext(sqlite3_vtab_cursor
*cur
){
899 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
901 fuzzer_stem
*pStem
, *pNew
;
905 /* Use the element the cursor is currently point to to create
906 ** a new stem and insert the new stem into the priority queue.
909 if( pStem
->rCostX
>0 ){
910 rc
= fuzzerRender(pStem
, &pCur
->zBuf
, &pCur
->nBuf
);
911 if( rc
==SQLITE_NOMEM
) return SQLITE_NOMEM
;
912 pNew
= fuzzerNewStem(pCur
, pCur
->zBuf
, pStem
->rCostX
);
914 if( fuzzerAdvance(pCur
, pNew
)==0 ){
915 pNew
->pNext
= pCur
->pDone
;
918 if( fuzzerInsert(pCur
, pNew
)==pNew
){
927 /* Adjust the priority queue so that the first element of the
928 ** stem list is the next lowest cost word.
930 while( (pStem
= pCur
->pStem
)!=0 ){
931 int res
= fuzzerAdvance(pCur
, pStem
);
936 pStem
= fuzzerInsert(pCur
, pStem
);
937 if( (rc
= fuzzerSeen(pCur
, pStem
))!=0 ){
938 if( rc
<0 ) return SQLITE_NOMEM
;
941 return SQLITE_OK
; /* New word found */
944 pStem
->pNext
= pCur
->pDone
;
946 if( fuzzerLowestCostStem(pCur
) ){
947 rc
= fuzzerSeen(pCur
, pCur
->pStem
);
948 if( rc
<0 ) return SQLITE_NOMEM
;
955 /* Reach this point only if queue has been exhausted and there is
956 ** nothing left to be output. */
957 pCur
->rLimit
= (fuzzer_cost
)0;
962 ** Called to "rewind" a cursor back to the beginning so that
963 ** it starts its output over again. Always called at least once
964 ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
966 static int fuzzerFilter(
967 sqlite3_vtab_cursor
*pVtabCursor
,
968 int idxNum
, const char *idxStr
,
969 int argc
, sqlite3_value
**argv
971 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)pVtabCursor
;
972 const char *zWord
= "";
976 fuzzerClearCursor(pCur
, 1);
977 pCur
->rLimit
= 2147483647;
980 zWord
= (const char*)sqlite3_value_text(argv
[0]);
984 pCur
->rLimit
= (fuzzer_cost
)sqlite3_value_int(argv
[idx
]);
988 pCur
->iRuleset
= (fuzzer_cost
)sqlite3_value_int(argv
[idx
]);
991 pCur
->nullRule
.pNext
= pCur
->pVtab
->pRule
;
992 pCur
->nullRule
.rCost
= 0;
993 pCur
->nullRule
.nFrom
= 0;
994 pCur
->nullRule
.nTo
= 0;
995 pCur
->nullRule
.zFrom
= "";
997 assert( pCur
->pStem
==0 );
999 /* If the query term is longer than FUZZER_MX_OUTPUT_LENGTH bytes, this
1000 ** query will return zero rows. */
1001 if( (int)strlen(zWord
)<FUZZER_MX_OUTPUT_LENGTH
){
1002 pCur
->pStem
= pStem
= fuzzerNewStem(pCur
, zWord
, (fuzzer_cost
)0);
1003 if( pStem
==0 ) return SQLITE_NOMEM
;
1004 pStem
->pRule
= &pCur
->nullRule
;
1005 pStem
->n
= pStem
->nBasis
;
1014 ** Only the word and distance columns have values. All other columns
1017 static int fuzzerColumn(sqlite3_vtab_cursor
*cur
, sqlite3_context
*ctx
, int i
){
1018 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1020 /* the "word" column */
1021 if( fuzzerRender(pCur
->pStem
, &pCur
->zBuf
, &pCur
->nBuf
)==SQLITE_NOMEM
){
1022 return SQLITE_NOMEM
;
1024 sqlite3_result_text(ctx
, pCur
->zBuf
, -1, SQLITE_TRANSIENT
);
1026 /* the "distance" column */
1027 sqlite3_result_int(ctx
, pCur
->pStem
->rCostX
);
1029 /* All other columns are NULL */
1030 sqlite3_result_null(ctx
);
1038 static int fuzzerRowid(sqlite3_vtab_cursor
*cur
, sqlite_int64
*pRowid
){
1039 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1040 *pRowid
= pCur
->iRowid
;
1045 ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
1046 ** that the cursor has nothing more to output.
1048 static int fuzzerEof(sqlite3_vtab_cursor
*cur
){
1049 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1050 return pCur
->rLimit
<=(fuzzer_cost
)0;
1054 ** Search for terms of these forms:
1056 ** (A) word MATCH $str
1057 ** (B1) distance < $value
1058 ** (B2) distance <= $value
1059 ** (C) ruleid == $ruleid
1061 ** The distance< and distance<= are both treated as distance<=.
1062 ** The query plan number is a bit vector:
1064 ** bit 1: Term of the form (A) found
1065 ** bit 2: Term like (B1) or (B2) found
1066 ** bit 3: Term like (C) found
1068 ** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set
1069 ** then $value is in filter.argv[0] if bit-1 is clear and is in
1070 ** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is
1071 ** in filter.argv[0] if bit-1 and bit-2 are both zero, is in
1072 ** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in
1073 ** filter.argv[2] if both bit-1 and bit-2 are set.
1075 static int fuzzerBestIndex(sqlite3_vtab
*tab
, sqlite3_index_info
*pIdxInfo
){
1078 int iRulesetTerm
= -1;
1081 const struct sqlite3_index_constraint
*pConstraint
;
1082 double rCost
= 1e12
;
1084 pConstraint
= pIdxInfo
->aConstraint
;
1085 for(i
=0; i
<pIdxInfo
->nConstraint
; i
++, pConstraint
++){
1086 if( pConstraint
->iColumn
==0
1087 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_MATCH
){
1090 if( pConstraint
->usable
==0 ) continue;
1092 && pConstraint
->iColumn
==0
1093 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_MATCH
1096 pIdxInfo
->aConstraintUsage
[i
].argvIndex
= 1;
1097 pIdxInfo
->aConstraintUsage
[i
].omit
= 1;
1101 && pConstraint
->iColumn
==1
1102 && (pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_LT
1103 || pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_LE
)
1110 && pConstraint
->iColumn
==2
1111 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_EQ
1114 pIdxInfo
->aConstraintUsage
[i
].omit
= 1;
1120 pIdxInfo
->aConstraintUsage
[iDistTerm
].argvIndex
= 1+((iPlan
&1)!=0);
1124 if( iPlan
& 1 ) idx
++;
1125 if( iPlan
& 2 ) idx
++;
1126 pIdxInfo
->aConstraintUsage
[iRulesetTerm
].argvIndex
= idx
;
1128 pIdxInfo
->idxNum
= iPlan
;
1129 if( pIdxInfo
->nOrderBy
==1
1130 && pIdxInfo
->aOrderBy
[0].iColumn
==1
1131 && pIdxInfo
->aOrderBy
[0].desc
==0
1133 pIdxInfo
->orderByConsumed
= 1;
1135 if( seenMatch
&& (iPlan
&1)==0 ) rCost
= 1e99
;
1136 pIdxInfo
->estimatedCost
= rCost
;
1142 ** A virtual table module that implements the "fuzzer".
1144 static sqlite3_module fuzzerModule
= {
1151 fuzzerOpen
, /* xOpen - open a cursor */
1152 fuzzerClose
, /* xClose - close a cursor */
1153 fuzzerFilter
, /* xFilter - configure scan constraints */
1154 fuzzerNext
, /* xNext - advance a cursor */
1155 fuzzerEof
, /* xEof - check for end of scan */
1156 fuzzerColumn
, /* xColumn - read data */
1157 fuzzerRowid
, /* xRowid - read data */
1163 0, /* xFindMethod */
1167 #endif /* SQLITE_OMIT_VIRTUALTABLE */
1171 __declspec(dllexport
)
1173 int sqlite3_fuzzer_init(
1176 const sqlite3_api_routines
*pApi
1179 SQLITE_EXTENSION_INIT2(pApi
);
1180 #ifndef SQLITE_OMIT_VIRTUALTABLE
1181 rc
= sqlite3_create_module(db
, "fuzzer", &fuzzerModule
, 0);