4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** Code for a demonstration virtual table that generates variations
14 ** on an input word at increasing edit distances from the original.
16 ** A fuzzer virtual table is created like this:
18 ** CREATE VIRTUAL TABLE f USING fuzzer(<fuzzer-data-table>);
20 ** When it is created, the new fuzzer table must be supplied with the
21 ** name of a "fuzzer data table", which must reside in the same database
22 ** file as the new fuzzer table. The fuzzer data table contains the various
23 ** transformations and their costs that the fuzzer logic uses to generate
26 ** The fuzzer data table must contain exactly four columns (more precisely,
27 ** the statement "SELECT * FROM <fuzzer_data_table>" must return records
28 ** that consist of four columns). It does not matter what the columns are
31 ** Each row in the fuzzer data table represents a single character
32 ** transformation. The left most column of the row (column 0) contains an
33 ** integer value - the identifier of the ruleset to which the transformation
34 ** rule belongs (see "MULTIPLE RULE SETS" below). The second column of the
35 ** row (column 0) contains the input character or characters. The third
36 ** column contains the output character or characters. And the fourth column
37 ** contains the integer cost of making the transformation. For example:
39 ** CREATE TABLE f_data(ruleset, cFrom, cTo, Cost);
40 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, '', 'a', 100);
41 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'b', '', 87);
42 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38);
43 ** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40);
45 ** The first row inserted into the fuzzer data table by the SQL script
46 ** above indicates that the cost of inserting a letter 'a' is 100. (All
47 ** costs are integers. We recommend that costs be scaled so that the
48 ** average cost is around 100.) The second INSERT statement creates a rule
49 ** saying that the cost of deleting a single letter 'b' is 87. The third
50 ** and fourth INSERT statements mean that the cost of transforming a
51 ** single letter "o" into the two-letter sequence "oe" is 38 and that the
52 ** cost of transforming "oe" back into "o" is 40.
54 ** The contents of the fuzzer data table are loaded into main memory when
55 ** a fuzzer table is first created, and may be internally reloaded by the
56 ** system at any subsequent time. Therefore, the fuzzer data table should be
57 ** populated before the fuzzer table is created and not modified thereafter.
58 ** If you do need to modify the contents of the fuzzer data table, it is
59 ** recommended that the associated fuzzer table be dropped, the fuzzer data
60 ** table edited, and the fuzzer table recreated within a single transaction.
61 ** Alternatively, the fuzzer data table can be edited then the database
62 ** connection can be closed and reopened.
64 ** Once it has been created, the fuzzer table can be queried as follows:
66 ** SELECT word, distance FROM f
67 ** WHERE word MATCH 'abcdefg'
70 ** This first query outputs the string "abcdefg" and all strings that
71 ** can be derived from that string by appling the specified transformations.
72 ** The strings are output together with their total transformation cost
73 ** (called "distance") and appear in order of increasing cost. No string
74 ** is output more than once. If there are multiple ways to transform the
75 ** target string into the output string then the lowest cost transform is
76 ** the one that is returned. In the example, the search is limited to
77 ** strings with a total distance of less than 200.
79 ** The fuzzer is a read-only table. Any attempt to DELETE, INSERT, or
80 ** UPDATE on a fuzzer table will throw an error.
82 ** It is important to put some kind of a limit on the fuzzer output. This
83 ** can be either in the form of a LIMIT clause at the end of the query,
84 ** or better, a "distance<NNN" constraint where NNN is some number. The
85 ** running time and memory requirement is exponential in the value of NNN
86 ** so you want to make sure that NNN is not too big. A value of NNN that
87 ** is about twice the average transformation cost seems to give good results.
89 ** The fuzzer table can be useful for tasks such as spelling correction.
90 ** Suppose there is a second table vocabulary(w) where the w column contains
91 ** all correctly spelled words. Let $word be a word you want to look up.
93 ** SELECT vocabulary.w FROM f, vocabulary
94 ** WHERE f.word MATCH $word
95 ** AND f.distance<=200
96 ** AND f.word=vocabulary.w
99 ** The query above gives the 20 closest words to the $word being tested.
100 ** (Note that for good performance, the vocubulary.w column should be
103 ** A similar query can be used to find all words in the dictionary that
104 ** begin with some prefix $prefix:
106 ** SELECT vocabulary.w FROM f, vocabulary
107 ** WHERE f.word MATCH $prefix
108 ** AND f.distance<=200
109 ** AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF')
112 ** This last query will show up to 50 words out of the vocabulary that
113 ** match or nearly match the $prefix.
115 ** MULTIPLE RULE SETS
117 ** Normally, the "ruleset" value associated with all character transformations
118 ** in the fuzzer data table is zero. However, if required, the fuzzer table
119 ** allows multiple rulesets to be defined. Each query uses only a single
120 ** ruleset. This allows, for example, a single fuzzer table to support
121 ** multiple languages.
123 ** By default, only the rules from ruleset 0 are used. To specify an
124 ** alternative ruleset, a "ruleset = ?" expression must be added to the
125 ** WHERE clause of a SELECT, where ? is the identifier of the desired
126 ** ruleset. For example:
128 ** SELECT vocabulary.w FROM f, vocabulary
129 ** WHERE f.word MATCH $word
130 ** AND f.distance<=200
131 ** AND f.word=vocabulary.w
132 ** AND f.ruleset=1 -- Specify the ruleset to use here
135 ** If no "ruleset = ?" constraint is specified in the WHERE clause, ruleset
140 ** The maximum ruleset number is 2147483647. The maximum length of either
141 ** of the strings in the second or third column of the fuzzer data table
142 ** is 50 bytes. The maximum cost on a rule is 1000.
144 #include "sqlite3ext.h"
145 SQLITE_EXTENSION_INIT1
147 /* If SQLITE_DEBUG is not defined, disable assert statements. */
148 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
157 #ifndef SQLITE_OMIT_VIRTUALTABLE
160 ** Forward declaration of objects used by this implementation
162 typedef struct fuzzer_vtab fuzzer_vtab
;
163 typedef struct fuzzer_cursor fuzzer_cursor
;
164 typedef struct fuzzer_rule fuzzer_rule
;
165 typedef struct fuzzer_seen fuzzer_seen
;
166 typedef struct fuzzer_stem fuzzer_stem
;
171 ** fuzzer_cost is the "cost" of an edit operation.
173 ** fuzzer_len is the length of a matching string.
175 ** fuzzer_ruleid is an ruleset identifier.
177 typedef int fuzzer_cost
;
178 typedef signed char fuzzer_len
;
179 typedef int fuzzer_ruleid
;
184 #define FUZZER_MX_LENGTH 50 /* Maximum length of a rule string */
185 #define FUZZER_MX_RULEID 2147483647 /* Maximum rule ID */
186 #define FUZZER_MX_COST 1000 /* Maximum single-rule cost */
187 #define FUZZER_MX_OUTPUT_LENGTH 100 /* Maximum length of an output string */
191 ** Each transformation rule is stored as an instance of this object.
192 ** All rules are kept on a linked list sorted by rCost.
195 fuzzer_rule
*pNext
; /* Next rule in order of increasing rCost */
196 char *zFrom
; /* Transform from */
197 fuzzer_cost rCost
; /* Cost of this transformation */
198 fuzzer_len nFrom
, nTo
; /* Length of the zFrom and zTo strings */
199 fuzzer_ruleid iRuleset
; /* The rule set to which this rule belongs */
200 char zTo
[4]; /* Transform to (extra space appended) */
204 ** A stem object is used to generate variants. It is also used to record
205 ** previously generated outputs.
207 ** Every stem is added to a hash table as it is output. Generation of
208 ** duplicate stems is suppressed.
210 ** Active stems (those that might generate new outputs) are kepts on a linked
211 ** list sorted by increasing cost. The cost is the sum of rBaseCost and
215 char *zBasis
; /* Word being fuzzed */
216 const fuzzer_rule
*pRule
; /* Current rule to apply */
217 fuzzer_stem
*pNext
; /* Next stem in rCost order */
218 fuzzer_stem
*pHash
; /* Next stem with same hash on zBasis */
219 fuzzer_cost rBaseCost
; /* Base cost of getting to zBasis */
220 fuzzer_cost rCostX
; /* Precomputed rBaseCost + pRule->rCost */
221 fuzzer_len nBasis
; /* Length of the zBasis string */
222 fuzzer_len n
; /* Apply pRule at this character offset */
226 ** A fuzzer virtual-table object
229 sqlite3_vtab base
; /* Base class - must be first */
230 char *zClassName
; /* Name of this class. Default: "fuzzer" */
231 fuzzer_rule
*pRule
; /* All active rules in this fuzzer */
232 int nCursor
; /* Number of active cursors */
235 #define FUZZER_HASH 4001 /* Hash table size */
236 #define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */
238 /* A fuzzer cursor object */
239 struct fuzzer_cursor
{
240 sqlite3_vtab_cursor base
; /* Base class - must be first */
241 sqlite3_int64 iRowid
; /* The rowid of the current word */
242 fuzzer_vtab
*pVtab
; /* The virtual table this cursor belongs to */
243 fuzzer_cost rLimit
; /* Maximum cost of any term */
244 fuzzer_stem
*pStem
; /* Stem with smallest rCostX */
245 fuzzer_stem
*pDone
; /* Stems already processed to completion */
246 fuzzer_stem
*aQueue
[FUZZER_NQUEUE
]; /* Queue of stems with higher rCostX */
247 int mxQueue
; /* Largest used index in aQueue[] */
248 char *zBuf
; /* Temporary use buffer */
249 int nBuf
; /* Bytes allocated for zBuf */
250 int nStem
; /* Number of stems allocated */
251 int iRuleset
; /* Only process rules from this ruleset */
252 fuzzer_rule nullRule
; /* Null rule used first */
253 fuzzer_stem
*apHash
[FUZZER_HASH
]; /* Hash of previously generated terms */
257 ** The two input rule lists are both sorted in order of increasing
258 ** cost. Merge them together into a single list, sorted by cost, and
259 ** return a pointer to the head of that list.
261 static fuzzer_rule
*fuzzerMergeRules(fuzzer_rule
*pA
, fuzzer_rule
*pB
){
267 if( pA
->rCost
<=pB
->rCost
){
286 ** Statement pStmt currently points to a row in the fuzzer data table. This
287 ** function allocates and populates a fuzzer_rule structure according to
288 ** the content of the row.
290 ** If successful, *ppRule is set to point to the new object and SQLITE_OK
291 ** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point
292 ** to an error message and an SQLite error code returned.
294 static int fuzzerLoadOneRule(
295 fuzzer_vtab
*p
, /* Fuzzer virtual table handle */
296 sqlite3_stmt
*pStmt
, /* Base rule on statements current row */
297 fuzzer_rule
**ppRule
, /* OUT: New rule object */
298 char **pzErr
/* OUT: Error message */
300 sqlite3_int64 iRuleset
= sqlite3_column_int64(pStmt
, 0);
301 const char *zFrom
= (const char *)sqlite3_column_text(pStmt
, 1);
302 const char *zTo
= (const char *)sqlite3_column_text(pStmt
, 2);
303 int nCost
= sqlite3_column_int(pStmt
, 3);
305 int rc
= SQLITE_OK
; /* Return code */
306 int nFrom
; /* Size of string zFrom, in bytes */
307 int nTo
; /* Size of string zTo, in bytes */
308 fuzzer_rule
*pRule
= 0; /* New rule object to return */
310 if( zFrom
==0 ) zFrom
= "";
311 if( zTo
==0 ) zTo
= "";
312 nFrom
= (int)strlen(zFrom
);
313 nTo
= (int)strlen(zTo
);
315 /* Silently ignore null transformations */
316 if( strcmp(zFrom
, zTo
)==0 ){
321 if( nCost
<=0 || nCost
>FUZZER_MX_COST
){
322 *pzErr
= sqlite3_mprintf("%s: cost must be between 1 and %d",
323 p
->zClassName
, FUZZER_MX_COST
327 if( nFrom
>FUZZER_MX_LENGTH
|| nTo
>FUZZER_MX_LENGTH
){
328 *pzErr
= sqlite3_mprintf("%s: maximum string length is %d",
329 p
->zClassName
, FUZZER_MX_LENGTH
333 if( iRuleset
<0 || iRuleset
>FUZZER_MX_RULEID
){
334 *pzErr
= sqlite3_mprintf("%s: ruleset must be between 0 and %d",
335 p
->zClassName
, FUZZER_MX_RULEID
340 pRule
= sqlite3_malloc64( sizeof(*pRule
) + nFrom
+ nTo
);
344 memset(pRule
, 0, sizeof(*pRule
));
345 pRule
->zFrom
= pRule
->zTo
;
346 pRule
->zFrom
+= nTo
+ 1;
347 pRule
->nFrom
= (fuzzer_len
)nFrom
;
348 memcpy(pRule
->zFrom
, zFrom
, nFrom
+1);
349 memcpy(pRule
->zTo
, zTo
, nTo
+1);
350 pRule
->nTo
= (fuzzer_len
)nTo
;
351 pRule
->rCost
= nCost
;
352 pRule
->iRuleset
= (int)iRuleset
;
361 ** Load the content of the fuzzer data table into memory.
363 static int fuzzerLoadRules(
364 sqlite3
*db
, /* Database handle */
365 fuzzer_vtab
*p
, /* Virtual fuzzer table to configure */
366 const char *zDb
, /* Database containing rules data */
367 const char *zData
, /* Table containing rules data */
368 char **pzErr
/* OUT: Error message */
370 int rc
= SQLITE_OK
; /* Return code */
371 char *zSql
; /* SELECT used to read from rules table */
372 fuzzer_rule
*pHead
= 0;
374 zSql
= sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb
, zData
);
378 int rc2
; /* finalize() return code */
379 sqlite3_stmt
*pStmt
= 0;
380 rc
= sqlite3_prepare_v2(db
, zSql
, -1, &pStmt
, 0);
382 *pzErr
= sqlite3_mprintf("%s: %s", p
->zClassName
, sqlite3_errmsg(db
));
383 }else if( sqlite3_column_count(pStmt
)!=4 ){
384 *pzErr
= sqlite3_mprintf("%s: %s has %d columns, expected 4",
385 p
->zClassName
, zData
, sqlite3_column_count(pStmt
)
389 while( rc
==SQLITE_OK
&& SQLITE_ROW
==sqlite3_step(pStmt
) ){
390 fuzzer_rule
*pRule
= 0;
391 rc
= fuzzerLoadOneRule(p
, pStmt
, &pRule
, pzErr
);
393 pRule
->pNext
= pHead
;
398 rc2
= sqlite3_finalize(pStmt
);
399 if( rc
==SQLITE_OK
) rc
= rc2
;
403 /* All rules are now in a singly linked list starting at pHead. This
404 ** block sorts them by cost and then sets fuzzer_vtab.pRule to point to
405 ** point to the head of the sorted list.
411 for(i
=0; i
<sizeof(a
)/sizeof(a
[0]); i
++) a
[i
] = 0;
412 while( (pX
= pHead
)!=0 ){
415 for(i
=0; a
[i
] && i
<sizeof(a
)/sizeof(a
[0])-1; i
++){
416 pX
= fuzzerMergeRules(a
[i
], pX
);
419 a
[i
] = fuzzerMergeRules(a
[i
], pX
);
421 for(pX
=a
[0], i
=1; i
<sizeof(a
)/sizeof(a
[0]); i
++){
422 pX
= fuzzerMergeRules(a
[i
], pX
);
424 p
->pRule
= fuzzerMergeRules(p
->pRule
, pX
);
426 /* An error has occurred. Setting p->pRule to point to the head of the
427 ** allocated list ensures that the list will be cleaned up in this case.
429 assert( p
->pRule
==0 );
437 ** This function converts an SQL quoted string into an unquoted string
438 ** and returns a pointer to a buffer allocated using sqlite3_malloc()
439 ** containing the result. The caller should eventually free this buffer
440 ** using sqlite3_free.
449 static char *fuzzerDequote(const char *zIn
){
450 sqlite3_int64 nIn
; /* Size of input string, in bytes */
451 char *zOut
; /* Output (dequoted) string */
454 zOut
= sqlite3_malloc64(nIn
+1);
456 char q
= zIn
[0]; /* Quote character (if any ) */
458 if( q
!='[' && q
!= '\'' && q
!='"' && q
!='`' ){
459 memcpy(zOut
, zIn
, (size_t)(nIn
+1));
461 int iOut
= 0; /* Index of next byte to write to output */
462 int iIn
; /* Index of next byte to read from input */
464 if( q
=='[' ) q
= ']';
465 for(iIn
=1; iIn
<nIn
; iIn
++){
466 if( zIn
[iIn
]==q
) iIn
++;
467 zOut
[iOut
++] = zIn
[iIn
];
470 assert( (int)strlen(zOut
)<=nIn
);
476 ** xDisconnect/xDestroy method for the fuzzer module.
478 static int fuzzerDisconnect(sqlite3_vtab
*pVtab
){
479 fuzzer_vtab
*p
= (fuzzer_vtab
*)pVtab
;
480 assert( p
->nCursor
==0 );
482 fuzzer_rule
*pRule
= p
->pRule
;
483 p
->pRule
= pRule
->pNext
;
491 ** xConnect/xCreate method for the fuzzer module. Arguments are:
493 ** argv[0] -> module name ("fuzzer")
494 ** argv[1] -> database name
495 ** argv[2] -> table name
496 ** argv[3] -> fuzzer rule table name
498 static int fuzzerConnect(
501 int argc
, const char *const*argv
,
502 sqlite3_vtab
**ppVtab
,
505 int rc
= SQLITE_OK
; /* Return code */
506 fuzzer_vtab
*pNew
= 0; /* New virtual table */
507 const char *zModule
= argv
[0];
508 const char *zDb
= argv
[1];
511 *pzErr
= sqlite3_mprintf(
512 "%s: wrong number of CREATE VIRTUAL TABLE arguments", zModule
516 sqlite3_int64 nModule
; /* Length of zModule, in bytes */
518 nModule
= strlen(zModule
);
519 pNew
= sqlite3_malloc64( sizeof(*pNew
) + nModule
+ 1);
523 char *zTab
; /* Dequoted name of fuzzer data table */
525 memset(pNew
, 0, sizeof(*pNew
));
526 pNew
->zClassName
= (char*)&pNew
[1];
527 memcpy(pNew
->zClassName
, zModule
, (size_t)(nModule
+1));
529 zTab
= fuzzerDequote(argv
[3]);
533 rc
= fuzzerLoadRules(db
, pNew
, zDb
, zTab
, pzErr
);
538 rc
= sqlite3_declare_vtab(db
, "CREATE TABLE x(word,distance,ruleset)");
541 fuzzerDisconnect((sqlite3_vtab
*)pNew
);
544 sqlite3_vtab_config(db
, SQLITE_VTAB_INNOCUOUS
);
549 *ppVtab
= (sqlite3_vtab
*)pNew
;
554 ** Open a new fuzzer cursor.
556 static int fuzzerOpen(sqlite3_vtab
*pVTab
, sqlite3_vtab_cursor
**ppCursor
){
557 fuzzer_vtab
*p
= (fuzzer_vtab
*)pVTab
;
559 pCur
= sqlite3_malloc( sizeof(*pCur
) );
560 if( pCur
==0 ) return SQLITE_NOMEM
;
561 memset(pCur
, 0, sizeof(*pCur
));
563 *ppCursor
= &pCur
->base
;
569 ** Free all stems in a list.
571 static void fuzzerClearStemList(fuzzer_stem
*pStem
){
573 fuzzer_stem
*pNext
= pStem
->pNext
;
580 ** Free up all the memory allocated by a cursor. Set it rLimit to 0
581 ** to indicate that it is at EOF.
583 static void fuzzerClearCursor(fuzzer_cursor
*pCur
, int clearHash
){
585 fuzzerClearStemList(pCur
->pStem
);
586 fuzzerClearStemList(pCur
->pDone
);
587 for(i
=0; i
<FUZZER_NQUEUE
; i
++) fuzzerClearStemList(pCur
->aQueue
[i
]);
588 pCur
->rLimit
= (fuzzer_cost
)0;
589 if( clearHash
&& pCur
->nStem
){
593 memset(pCur
->aQueue
, 0, sizeof(pCur
->aQueue
));
594 memset(pCur
->apHash
, 0, sizeof(pCur
->apHash
));
600 ** Close a fuzzer cursor.
602 static int fuzzerClose(sqlite3_vtab_cursor
*cur
){
603 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
604 fuzzerClearCursor(pCur
, 0);
605 sqlite3_free(pCur
->zBuf
);
606 pCur
->pVtab
->nCursor
--;
612 ** Compute the current output term for a fuzzer_stem.
614 static int fuzzerRender(
615 fuzzer_stem
*pStem
, /* The stem to be rendered */
616 char **pzBuf
, /* Write results into this buffer. realloc if needed */
617 int *pnBuf
/* Size of the buffer */
619 const fuzzer_rule
*pRule
= pStem
->pRule
;
620 int n
; /* Size of output term without nul-term */
621 char *z
; /* Buffer to assemble output term in */
623 n
= pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
;
625 (*pzBuf
) = sqlite3_realloc((*pzBuf
), n
+100);
626 if( (*pzBuf
)==0 ) return SQLITE_NOMEM
;
632 memcpy(z
, pStem
->zBasis
, pStem
->nBasis
+1);
634 memcpy(z
, pStem
->zBasis
, n
);
635 memcpy(&z
[n
], pRule
->zTo
, pRule
->nTo
);
636 memcpy(&z
[n
+pRule
->nTo
], &pStem
->zBasis
[n
+pRule
->nFrom
],
637 pStem
->nBasis
-n
-pRule
->nFrom
+1);
640 assert( z
[pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
]==0 );
645 ** Compute a hash on zBasis.
647 static unsigned int fuzzerHash(const char *z
){
649 while( *z
){ h
= (h
<<3) ^ (h
>>29) ^ *(z
++); }
650 return h
% FUZZER_HASH
;
654 ** Current cost of a stem
656 static fuzzer_cost
fuzzerCost(fuzzer_stem
*pStem
){
657 return pStem
->rCostX
= pStem
->rBaseCost
+ pStem
->pRule
->rCost
;
662 ** Print a description of a fuzzer_stem on stderr.
664 static void fuzzerStemPrint(
670 fprintf(stderr
, "%s[%s](%d)-->self%s",
672 pStem
->zBasis
, pStem
->rBaseCost
,
678 if( fuzzerRender(pStem
, &zBuf
, &nBuf
)!=SQLITE_OK
) return;
679 fprintf(stderr
, "%s[%s](%d)-->{%s}(%d)%s",
681 pStem
->zBasis
, pStem
->rBaseCost
, zBuf
, pStem
->,
690 ** Return 1 if the string to which the cursor is point has already
691 ** been emitted. Return 0 if not. Return -1 on a memory allocation
694 static int fuzzerSeen(fuzzer_cursor
*pCur
, fuzzer_stem
*pStem
){
696 fuzzer_stem
*pLookup
;
698 if( fuzzerRender(pStem
, &pCur
->zBuf
, &pCur
->nBuf
)==SQLITE_NOMEM
){
701 h
= fuzzerHash(pCur
->zBuf
);
702 pLookup
= pCur
->apHash
[h
];
703 while( pLookup
&& strcmp(pLookup
->zBasis
, pCur
->zBuf
)!=0 ){
704 pLookup
= pLookup
->pHash
;
710 ** If argument pRule is NULL, this function returns false.
712 ** Otherwise, it returns true if rule pRule should be skipped. A rule
713 ** should be skipped if it does not belong to rule-set iRuleset, or if
714 ** applying it to stem pStem would create a string longer than
715 ** FUZZER_MX_OUTPUT_LENGTH bytes.
717 static int fuzzerSkipRule(
718 const fuzzer_rule
*pRule
, /* Determine whether or not to skip this */
719 fuzzer_stem
*pStem
, /* Stem rule may be applied to */
720 int iRuleset
/* Rule-set used by the current query */
723 (pRule
->iRuleset
!=iRuleset
)
724 || (pStem
->nBasis
+ pRule
->nTo
- pRule
->nFrom
)>FUZZER_MX_OUTPUT_LENGTH
729 ** Advance a fuzzer_stem to its next value. Return 0 if there are
730 ** no more values that can be generated by this fuzzer_stem. Return
731 ** -1 on a memory allocation failure.
733 static int fuzzerAdvance(fuzzer_cursor
*pCur
, fuzzer_stem
*pStem
){
734 const fuzzer_rule
*pRule
;
735 while( (pRule
= pStem
->pRule
)!=0 ){
736 assert( pRule
==&pCur
->nullRule
|| pRule
->iRuleset
==pCur
->iRuleset
);
737 while( pStem
->n
< pStem
->nBasis
- pRule
->nFrom
){
740 || memcmp(&pStem
->zBasis
[pStem
->n
], pRule
->zFrom
, pRule
->nFrom
)==0
742 /* Found a rewrite case. Make sure it is not a duplicate */
743 int rc
= fuzzerSeen(pCur
, pStem
);
744 if( rc
<0 ) return -1;
753 pRule
= pRule
->pNext
;
754 }while( fuzzerSkipRule(pRule
, pStem
, pCur
->iRuleset
) );
755 pStem
->pRule
= pRule
;
756 if( pRule
&& fuzzerCost(pStem
)>pCur
->rLimit
) pStem
->pRule
= 0;
762 ** The two input stem lists are both sorted in order of increasing
763 ** rCostX. Merge them together into a single list, sorted by rCostX, and
764 ** return a pointer to the head of that new list.
766 static fuzzer_stem
*fuzzerMergeStems(fuzzer_stem
*pA
, fuzzer_stem
*pB
){
772 if( pA
->rCostX
<=pB
->rCostX
){
791 ** Load pCur->pStem with the lowest-cost stem. Return a pointer
792 ** to the lowest-cost stem.
794 static fuzzer_stem
*fuzzerLowestCostStem(fuzzer_cursor
*pCur
){
795 fuzzer_stem
*pBest
, *pX
;
799 if( pCur
->pStem
==0 ){
802 for(i
=0; i
<=pCur
->mxQueue
; i
++){
803 pX
= pCur
->aQueue
[i
];
804 if( pX
==0 ) continue;
805 if( pBest
==0 || pBest
->rCostX
>pX
->rCostX
){
811 pCur
->aQueue
[iBest
] = pBest
->pNext
;
820 ** Insert pNew into queue of pending stems. Then find the stem
821 ** with the lowest rCostX and move it into pCur->pStem.
822 ** list. The insert is done such the pNew is in the correct order
823 ** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost.
825 static fuzzer_stem
*fuzzerInsert(fuzzer_cursor
*pCur
, fuzzer_stem
*pNew
){
829 /* If pCur->pStem exists and is greater than pNew, then make pNew
830 ** the new pCur->pStem and insert the old pCur->pStem instead.
832 if( (pX
= pCur
->pStem
)!=0 && pX
->rCostX
>pNew
->rCostX
){
838 /* Insert the new value */
841 for(i
=0; i
<=pCur
->mxQueue
; i
++){
842 if( pCur
->aQueue
[i
] ){
843 pX
= fuzzerMergeStems(pX
, pCur
->aQueue
[i
]);
846 pCur
->aQueue
[i
] = pX
;
850 if( i
>pCur
->mxQueue
){
851 if( i
<FUZZER_NQUEUE
){
853 pCur
->aQueue
[i
] = pX
;
855 assert( pCur
->mxQueue
==FUZZER_NQUEUE
-1 );
856 pX
= fuzzerMergeStems(pX
, pCur
->aQueue
[FUZZER_NQUEUE
-1]);
857 pCur
->aQueue
[FUZZER_NQUEUE
-1] = pX
;
861 return fuzzerLowestCostStem(pCur
);
865 ** Allocate a new fuzzer_stem. Add it to the hash table but do not
866 ** link it into either the pCur->pStem or pCur->pDone lists.
868 static fuzzer_stem
*fuzzerNewStem(
871 fuzzer_cost rBaseCost
877 pNew
= sqlite3_malloc64( sizeof(*pNew
) + strlen(zWord
) + 1 );
878 if( pNew
==0 ) return 0;
879 memset(pNew
, 0, sizeof(*pNew
));
880 pNew
->zBasis
= (char*)&pNew
[1];
881 pNew
->nBasis
= (fuzzer_len
)strlen(zWord
);
882 memcpy(pNew
->zBasis
, zWord
, pNew
->nBasis
+1);
883 pRule
= pCur
->pVtab
->pRule
;
884 while( fuzzerSkipRule(pRule
, pNew
, pCur
->iRuleset
) ){
885 pRule
= pRule
->pNext
;
889 pNew
->rBaseCost
= pNew
->rCostX
= rBaseCost
;
890 h
= fuzzerHash(pNew
->zBasis
);
891 pNew
->pHash
= pCur
->apHash
[h
];
892 pCur
->apHash
[h
] = pNew
;
899 ** Advance a cursor to its next row of output
901 static int fuzzerNext(sqlite3_vtab_cursor
*cur
){
902 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
904 fuzzer_stem
*pStem
, *pNew
;
908 /* Use the element the cursor is currently point to to create
909 ** a new stem and insert the new stem into the priority queue.
912 if( pStem
->rCostX
>0 ){
913 rc
= fuzzerRender(pStem
, &pCur
->zBuf
, &pCur
->nBuf
);
914 if( rc
==SQLITE_NOMEM
) return SQLITE_NOMEM
;
915 pNew
= fuzzerNewStem(pCur
, pCur
->zBuf
, pStem
->rCostX
);
917 if( fuzzerAdvance(pCur
, pNew
)==0 ){
918 pNew
->pNext
= pCur
->pDone
;
921 if( fuzzerInsert(pCur
, pNew
)==pNew
){
930 /* Adjust the priority queue so that the first element of the
931 ** stem list is the next lowest cost word.
933 while( (pStem
= pCur
->pStem
)!=0 ){
934 int res
= fuzzerAdvance(pCur
, pStem
);
939 pStem
= fuzzerInsert(pCur
, pStem
);
940 if( (rc
= fuzzerSeen(pCur
, pStem
))!=0 ){
941 if( rc
<0 ) return SQLITE_NOMEM
;
944 return SQLITE_OK
; /* New word found */
947 pStem
->pNext
= pCur
->pDone
;
949 if( fuzzerLowestCostStem(pCur
) ){
950 rc
= fuzzerSeen(pCur
, pCur
->pStem
);
951 if( rc
<0 ) return SQLITE_NOMEM
;
958 /* Reach this point only if queue has been exhausted and there is
959 ** nothing left to be output. */
960 pCur
->rLimit
= (fuzzer_cost
)0;
965 ** Called to "rewind" a cursor back to the beginning so that
966 ** it starts its output over again. Always called at least once
967 ** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call.
969 static int fuzzerFilter(
970 sqlite3_vtab_cursor
*pVtabCursor
,
971 int idxNum
, const char *idxStr
,
972 int argc
, sqlite3_value
**argv
974 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)pVtabCursor
;
975 const char *zWord
= "";
979 fuzzerClearCursor(pCur
, 1);
980 pCur
->rLimit
= 2147483647;
983 zWord
= (const char*)sqlite3_value_text(argv
[0]);
987 pCur
->rLimit
= (fuzzer_cost
)sqlite3_value_int(argv
[idx
]);
991 pCur
->iRuleset
= (fuzzer_cost
)sqlite3_value_int(argv
[idx
]);
994 pCur
->nullRule
.pNext
= pCur
->pVtab
->pRule
;
995 pCur
->nullRule
.rCost
= 0;
996 pCur
->nullRule
.nFrom
= 0;
997 pCur
->nullRule
.nTo
= 0;
998 pCur
->nullRule
.zFrom
= "";
1000 assert( pCur
->pStem
==0 );
1002 /* If the query term is longer than FUZZER_MX_OUTPUT_LENGTH bytes, this
1003 ** query will return zero rows. */
1004 if( (int)strlen(zWord
)<FUZZER_MX_OUTPUT_LENGTH
){
1005 pCur
->pStem
= pStem
= fuzzerNewStem(pCur
, zWord
, (fuzzer_cost
)0);
1006 if( pStem
==0 ) return SQLITE_NOMEM
;
1007 pStem
->pRule
= &pCur
->nullRule
;
1008 pStem
->n
= pStem
->nBasis
;
1017 ** Only the word and distance columns have values. All other columns
1020 static int fuzzerColumn(sqlite3_vtab_cursor
*cur
, sqlite3_context
*ctx
, int i
){
1021 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1023 /* the "word" column */
1024 if( fuzzerRender(pCur
->pStem
, &pCur
->zBuf
, &pCur
->nBuf
)==SQLITE_NOMEM
){
1025 return SQLITE_NOMEM
;
1027 sqlite3_result_text(ctx
, pCur
->zBuf
, -1, SQLITE_TRANSIENT
);
1029 /* the "distance" column */
1030 sqlite3_result_int(ctx
, pCur
->pStem
->rCostX
);
1032 /* All other columns are NULL */
1033 sqlite3_result_null(ctx
);
1041 static int fuzzerRowid(sqlite3_vtab_cursor
*cur
, sqlite_int64
*pRowid
){
1042 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1043 *pRowid
= pCur
->iRowid
;
1048 ** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal
1049 ** that the cursor has nothing more to output.
1051 static int fuzzerEof(sqlite3_vtab_cursor
*cur
){
1052 fuzzer_cursor
*pCur
= (fuzzer_cursor
*)cur
;
1053 return pCur
->rLimit
<=(fuzzer_cost
)0;
1057 ** Search for terms of these forms:
1059 ** (A) word MATCH $str
1060 ** (B1) distance < $value
1061 ** (B2) distance <= $value
1062 ** (C) ruleid == $ruleid
1064 ** The distance< and distance<= are both treated as distance<=.
1065 ** The query plan number is a bit vector:
1067 ** bit 1: Term of the form (A) found
1068 ** bit 2: Term like (B1) or (B2) found
1069 ** bit 3: Term like (C) found
1071 ** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set
1072 ** then $value is in filter.argv[0] if bit-1 is clear and is in
1073 ** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is
1074 ** in filter.argv[0] if bit-1 and bit-2 are both zero, is in
1075 ** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in
1076 ** filter.argv[2] if both bit-1 and bit-2 are set.
1078 static int fuzzerBestIndex(sqlite3_vtab
*tab
, sqlite3_index_info
*pIdxInfo
){
1081 int iRulesetTerm
= -1;
1084 const struct sqlite3_index_constraint
*pConstraint
;
1085 double rCost
= 1e12
;
1087 pConstraint
= pIdxInfo
->aConstraint
;
1088 for(i
=0; i
<pIdxInfo
->nConstraint
; i
++, pConstraint
++){
1089 if( pConstraint
->iColumn
==0
1090 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_MATCH
){
1093 if( pConstraint
->usable
==0 ) continue;
1095 && pConstraint
->iColumn
==0
1096 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_MATCH
1099 pIdxInfo
->aConstraintUsage
[i
].argvIndex
= 1;
1100 pIdxInfo
->aConstraintUsage
[i
].omit
= 1;
1104 && pConstraint
->iColumn
==1
1105 && (pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_LT
1106 || pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_LE
)
1113 && pConstraint
->iColumn
==2
1114 && pConstraint
->op
==SQLITE_INDEX_CONSTRAINT_EQ
1117 pIdxInfo
->aConstraintUsage
[i
].omit
= 1;
1123 pIdxInfo
->aConstraintUsage
[iDistTerm
].argvIndex
= 1+((iPlan
&1)!=0);
1127 if( iPlan
& 1 ) idx
++;
1128 if( iPlan
& 2 ) idx
++;
1129 pIdxInfo
->aConstraintUsage
[iRulesetTerm
].argvIndex
= idx
;
1131 pIdxInfo
->idxNum
= iPlan
;
1132 if( pIdxInfo
->nOrderBy
==1
1133 && pIdxInfo
->aOrderBy
[0].iColumn
==1
1134 && pIdxInfo
->aOrderBy
[0].desc
==0
1136 pIdxInfo
->orderByConsumed
= 1;
1138 if( seenMatch
&& (iPlan
&1)==0 ) rCost
= 1e99
;
1139 pIdxInfo
->estimatedCost
= rCost
;
1145 ** A virtual table module that implements the "fuzzer".
1147 static sqlite3_module fuzzerModule
= {
1154 fuzzerOpen
, /* xOpen - open a cursor */
1155 fuzzerClose
, /* xClose - close a cursor */
1156 fuzzerFilter
, /* xFilter - configure scan constraints */
1157 fuzzerNext
, /* xNext - advance a cursor */
1158 fuzzerEof
, /* xEof - check for end of scan */
1159 fuzzerColumn
, /* xColumn - read data */
1160 fuzzerRowid
, /* xRowid - read data */
1166 0, /* xFindMethod */
1170 0, /* xRollbackTo */
1171 0, /* xShadowName */
1175 #endif /* SQLITE_OMIT_VIRTUALTABLE */
1179 __declspec(dllexport
)
1181 int sqlite3_fuzzer_init(
1184 const sqlite3_api_routines
*pApi
1187 SQLITE_EXTENSION_INIT2(pApi
);
1188 #ifndef SQLITE_OMIT_VIRTUALTABLE
1189 rc
= sqlite3_create_module(db
, "fuzzer", &fuzzerModule
, 0);