4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains code for the "fts3tokenize" virtual table module.
14 ** An fts3tokenize virtual table is created as follows:
16 ** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
17 ** <tokenizer-name>, <arg-1>, ...
20 ** The table created has the following schema:
22 ** CREATE TABLE <tbl>(input, token, start, end, position)
24 ** When queried, the query must include a WHERE clause of type:
28 ** The virtual table module tokenizes this <string>, using the FTS3
29 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
30 ** statement and returns one row for each token in the result. With
31 ** fields set as follows:
33 ** input: Always set to a copy of <string>
34 ** token: A token from the input.
35 ** start: Byte offset of the token within the input <string>.
36 ** end: Byte offset of the byte immediately following the end of the
37 ** token within the input string.
38 ** pos: Token offset of token within input.
42 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
47 typedef struct Fts3tokTable Fts3tokTable
;
48 typedef struct Fts3tokCursor Fts3tokCursor
;
51 ** Virtual table structure.
54 sqlite3_vtab base
; /* Base class used by SQLite core */
55 const sqlite3_tokenizer_module
*pMod
;
56 sqlite3_tokenizer
*pTok
;
60 ** Virtual table cursor structure.
62 struct Fts3tokCursor
{
63 sqlite3_vtab_cursor base
; /* Base class used by SQLite core */
64 char *zInput
; /* Input string */
65 sqlite3_tokenizer_cursor
*pCsr
; /* Cursor to iterate through zInput */
66 int iRowid
; /* Current 'rowid' value */
67 const char *zToken
; /* Current 'token' value */
68 int nToken
; /* Size of zToken in bytes */
69 int iStart
; /* Current 'start' value */
70 int iEnd
; /* Current 'end' value */
71 int iPos
; /* Current 'pos' value */
75 ** Query FTS for the tokenizer implementation named zName.
77 static int fts3tokQueryTokenizer(
80 const sqlite3_tokenizer_module
**pp
,
83 sqlite3_tokenizer_module
*p
;
84 int nName
= (int)strlen(zName
);
86 p
= (sqlite3_tokenizer_module
*)sqlite3Fts3HashFind(pHash
, zName
, nName
+1);
88 sqlite3Fts3ErrMsg(pzErr
, "unknown tokenizer: %s", zName
);
97 ** The second argument, argv[], is an array of pointers to nul-terminated
98 ** strings. This function makes a copy of the array and strings into a
99 ** single block of memory. It then dequotes any of the strings that appear
102 ** If successful, output parameter *pazDequote is set to point at the
103 ** array of dequoted strings and SQLITE_OK is returned. The caller is
104 ** responsible for eventually calling sqlite3_free() to free the array
105 ** in this case. Or, if an error occurs, an SQLite error code is returned.
106 ** The final value of *pazDequote is undefined in this case.
108 static int fts3tokDequoteArray(
109 int argc
, /* Number of elements in argv[] */
110 const char * const *argv
, /* Input array */
111 char ***pazDequote
/* Output array */
113 int rc
= SQLITE_OK
; /* Return code */
121 for(i
=0; i
<argc
; i
++){
122 nByte
+= (int)(strlen(argv
[i
]) + 1);
125 *pazDequote
= azDequote
= sqlite3_malloc64(sizeof(char *)*argc
+ nByte
);
129 char *pSpace
= (char *)&azDequote
[argc
];
130 for(i
=0; i
<argc
; i
++){
131 int n
= (int)strlen(argv
[i
]);
132 azDequote
[i
] = pSpace
;
133 memcpy(pSpace
, argv
[i
], n
+1);
134 sqlite3Fts3Dequote(pSpace
);
144 ** Schema of the tokenizer table.
146 #define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
149 ** This function does all the work for both the xConnect and xCreate methods.
150 ** These tables have no persistent representation of their own, so xConnect
151 ** and xCreate are identical operations.
153 ** argv[0]: module name
154 ** argv[1]: database name
155 ** argv[2]: table name
156 ** argv[3]: first argument (tokenizer name)
158 static int fts3tokConnectMethod(
159 sqlite3
*db
, /* Database connection */
160 void *pHash
, /* Hash table of tokenizers */
161 int argc
, /* Number of elements in argv array */
162 const char * const *argv
, /* xCreate/xConnect argument array */
163 sqlite3_vtab
**ppVtab
, /* OUT: New sqlite3_vtab object */
164 char **pzErr
/* OUT: sqlite3_malloc'd error message */
166 Fts3tokTable
*pTab
= 0;
167 const sqlite3_tokenizer_module
*pMod
= 0;
168 sqlite3_tokenizer
*pTok
= 0;
170 char **azDequote
= 0;
173 rc
= sqlite3_declare_vtab(db
, FTS3_TOK_SCHEMA
);
174 if( rc
!=SQLITE_OK
) return rc
;
177 rc
= fts3tokDequoteArray(nDequote
, &argv
[3], &azDequote
);
184 zModule
= azDequote
[0];
186 rc
= fts3tokQueryTokenizer((Fts3Hash
*)pHash
, zModule
, &pMod
, pzErr
);
189 assert( (rc
==SQLITE_OK
)==(pMod
!=0) );
191 const char * const *azArg
= 0;
192 if( nDequote
>1 ) azArg
= (const char * const *)&azDequote
[1];
193 rc
= pMod
->xCreate((nDequote
>1 ? nDequote
-1 : 0), azArg
, &pTok
);
197 pTab
= (Fts3tokTable
*)sqlite3_malloc(sizeof(Fts3tokTable
));
204 memset(pTab
, 0, sizeof(Fts3tokTable
));
207 *ppVtab
= &pTab
->base
;
210 pMod
->xDestroy(pTok
);
214 sqlite3_free(azDequote
);
219 ** This function does the work for both the xDisconnect and xDestroy methods.
220 ** These tables have no persistent representation of their own, so xDisconnect
221 ** and xDestroy are identical operations.
223 static int fts3tokDisconnectMethod(sqlite3_vtab
*pVtab
){
224 Fts3tokTable
*pTab
= (Fts3tokTable
*)pVtab
;
226 pTab
->pMod
->xDestroy(pTab
->pTok
);
232 ** xBestIndex - Analyze a WHERE and ORDER BY clause.
234 static int fts3tokBestIndexMethod(
236 sqlite3_index_info
*pInfo
239 UNUSED_PARAMETER(pVTab
);
241 for(i
=0; i
<pInfo
->nConstraint
; i
++){
242 if( pInfo
->aConstraint
[i
].usable
243 && pInfo
->aConstraint
[i
].iColumn
==0
244 && pInfo
->aConstraint
[i
].op
==SQLITE_INDEX_CONSTRAINT_EQ
247 pInfo
->aConstraintUsage
[i
].argvIndex
= 1;
248 pInfo
->aConstraintUsage
[i
].omit
= 1;
249 pInfo
->estimatedCost
= 1;
255 assert( pInfo
->estimatedCost
>1000000.0 );
261 ** xOpen - Open a cursor.
263 static int fts3tokOpenMethod(sqlite3_vtab
*pVTab
, sqlite3_vtab_cursor
**ppCsr
){
265 UNUSED_PARAMETER(pVTab
);
267 pCsr
= (Fts3tokCursor
*)sqlite3_malloc(sizeof(Fts3tokCursor
));
271 memset(pCsr
, 0, sizeof(Fts3tokCursor
));
273 *ppCsr
= (sqlite3_vtab_cursor
*)pCsr
;
278 ** Reset the tokenizer cursor passed as the only argument. As if it had
279 ** just been returned by fts3tokOpenMethod().
281 static void fts3tokResetCursor(Fts3tokCursor
*pCsr
){
283 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCsr
->base
.pVtab
);
284 pTab
->pMod
->xClose(pCsr
->pCsr
);
287 sqlite3_free(pCsr
->zInput
);
298 ** xClose - Close a cursor.
300 static int fts3tokCloseMethod(sqlite3_vtab_cursor
*pCursor
){
301 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
303 fts3tokResetCursor(pCsr
);
309 ** xNext - Advance the cursor to the next row, if any.
311 static int fts3tokNextMethod(sqlite3_vtab_cursor
*pCursor
){
312 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
313 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCursor
->pVtab
);
314 int rc
; /* Return code */
317 rc
= pTab
->pMod
->xNext(pCsr
->pCsr
,
318 &pCsr
->zToken
, &pCsr
->nToken
,
319 &pCsr
->iStart
, &pCsr
->iEnd
, &pCsr
->iPos
323 fts3tokResetCursor(pCsr
);
324 if( rc
==SQLITE_DONE
) rc
= SQLITE_OK
;
331 ** xFilter - Initialize a cursor to point at the start of its data.
333 static int fts3tokFilterMethod(
334 sqlite3_vtab_cursor
*pCursor
, /* The cursor used for this query */
335 int idxNum
, /* Strategy index */
336 const char *idxStr
, /* Unused */
337 int nVal
, /* Number of elements in apVal */
338 sqlite3_value
**apVal
/* Arguments for the indexing scheme */
340 int rc
= SQLITE_ERROR
;
341 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
342 Fts3tokTable
*pTab
= (Fts3tokTable
*)(pCursor
->pVtab
);
343 UNUSED_PARAMETER(idxStr
);
344 UNUSED_PARAMETER(nVal
);
346 fts3tokResetCursor(pCsr
);
348 const char *zByte
= (const char *)sqlite3_value_text(apVal
[0]);
349 int nByte
= sqlite3_value_bytes(apVal
[0]);
350 pCsr
->zInput
= sqlite3_malloc64(nByte
+1);
351 if( pCsr
->zInput
==0 ){
354 if( nByte
>0 ) memcpy(pCsr
->zInput
, zByte
, nByte
);
355 pCsr
->zInput
[nByte
] = 0;
356 rc
= pTab
->pMod
->xOpen(pTab
->pTok
, pCsr
->zInput
, nByte
, &pCsr
->pCsr
);
358 pCsr
->pCsr
->pTokenizer
= pTab
->pTok
;
363 if( rc
!=SQLITE_OK
) return rc
;
364 return fts3tokNextMethod(pCursor
);
368 ** xEof - Return true if the cursor is at EOF, or false otherwise.
370 static int fts3tokEofMethod(sqlite3_vtab_cursor
*pCursor
){
371 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
372 return (pCsr
->zToken
==0);
376 ** xColumn - Return a column value.
378 static int fts3tokColumnMethod(
379 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
380 sqlite3_context
*pCtx
, /* Context for sqlite3_result_xxx() calls */
381 int iCol
/* Index of column to read value from */
383 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
385 /* CREATE TABLE x(input, token, start, end, position) */
388 sqlite3_result_text(pCtx
, pCsr
->zInput
, -1, SQLITE_TRANSIENT
);
391 sqlite3_result_text(pCtx
, pCsr
->zToken
, pCsr
->nToken
, SQLITE_TRANSIENT
);
394 sqlite3_result_int(pCtx
, pCsr
->iStart
);
397 sqlite3_result_int(pCtx
, pCsr
->iEnd
);
401 sqlite3_result_int(pCtx
, pCsr
->iPos
);
408 ** xRowid - Return the current rowid for the cursor.
410 static int fts3tokRowidMethod(
411 sqlite3_vtab_cursor
*pCursor
, /* Cursor to retrieve value from */
412 sqlite_int64
*pRowid
/* OUT: Rowid value */
414 Fts3tokCursor
*pCsr
= (Fts3tokCursor
*)pCursor
;
415 *pRowid
= (sqlite3_int64
)pCsr
->iRowid
;
420 ** Register the fts3tok module with database connection db. Return SQLITE_OK
421 ** if successful or an error code if sqlite3_create_module() fails.
423 int sqlite3Fts3InitTok(sqlite3
*db
, Fts3Hash
*pHash
, void(*xDestroy
)(void*)){
424 static const sqlite3_module fts3tok_module
= {
426 fts3tokConnectMethod
, /* xCreate */
427 fts3tokConnectMethod
, /* xConnect */
428 fts3tokBestIndexMethod
, /* xBestIndex */
429 fts3tokDisconnectMethod
, /* xDisconnect */
430 fts3tokDisconnectMethod
, /* xDestroy */
431 fts3tokOpenMethod
, /* xOpen */
432 fts3tokCloseMethod
, /* xClose */
433 fts3tokFilterMethod
, /* xFilter */
434 fts3tokNextMethod
, /* xNext */
435 fts3tokEofMethod
, /* xEof */
436 fts3tokColumnMethod
, /* xColumn */
437 fts3tokRowidMethod
, /* xRowid */
443 0, /* xFindFunction */
451 int rc
; /* Return code */
453 rc
= sqlite3_create_module_v2(
454 db
, "fts3tokenize", &fts3tok_module
, (void*)pHash
, xDestroy
459 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */