Roll src/third_party/WebKit a3b4a2e:7441784 (svn 202551:202552)
[chromium-blink-merge.git] / third_party / sqlite / src / ext / fts3 / tool / fts3view.c
blob3dc1ba80fee088e6aefa9e068431abe5dae48ebb
1 /*
2 ** This program is a debugging and analysis utility that displays
3 ** information about an FTS3 or FTS4 index.
4 **
5 ** Link this program against the SQLite3 amalgamation with the
6 ** SQLITE_ENABLE_FTS4 compile-time option. Then run it as:
7 **
8 ** fts3view DATABASE
9 **
10 ** to get a list of all FTS3/4 tables in DATABASE, or do
12 ** fts3view DATABASE TABLE COMMAND ....
14 ** to see various aspects of the TABLE table. Type fts3view with no
15 ** arguments for a list of available COMMANDs.
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include "sqlite3.h"
25 ** Extra command-line arguments:
27 int nExtra;
28 char **azExtra;
31 ** Look for a command-line argument.
33 const char *findOption(const char *zName, int hasArg, const char *zDefault){
34 int i;
35 const char *zResult = zDefault;
36 for(i=0; i<nExtra; i++){
37 const char *z = azExtra[i];
38 while( z[0]=='-' ) z++;
39 if( strcmp(z, zName)==0 ){
40 int j = 1;
41 if( hasArg==0 || i==nExtra-1 ) j = 0;
42 zResult = azExtra[i+j];
43 while( i+j<nExtra ){
44 azExtra[i] = azExtra[i+j+1];
45 i++;
47 break;
50 return zResult;
55 ** Prepare an SQL query
57 static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
58 va_list ap;
59 char *zSql;
60 sqlite3_stmt *pStmt;
61 int rc;
63 va_start(ap, zFormat);
64 zSql = sqlite3_vmprintf(zFormat, ap);
65 va_end(ap);
66 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
67 if( rc ){
68 fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
69 exit(1);
71 sqlite3_free(zSql);
72 return pStmt;
76 ** Run an SQL statement
78 static int runSql(sqlite3 *db, const char *zFormat, ...){
79 va_list ap;
80 char *zSql;
81 int rc;
83 va_start(ap, zFormat);
84 zSql = sqlite3_vmprintf(zFormat, ap);
85 rc = sqlite3_exec(db, zSql, 0, 0, 0);
86 va_end(ap);
87 return rc;
91 ** Show the table schema
93 static void showSchema(sqlite3 *db, const char *zTab){
94 sqlite3_stmt *pStmt;
95 pStmt = prepare(db,
96 "SELECT sql FROM sqlite_master"
97 " WHERE name LIKE '%q%%'"
98 " ORDER BY 1",
99 zTab);
100 while( sqlite3_step(pStmt)==SQLITE_ROW ){
101 printf("%s;\n", sqlite3_column_text(pStmt, 0));
103 sqlite3_finalize(pStmt);
104 pStmt = prepare(db, "PRAGMA page_size");
105 while( sqlite3_step(pStmt)==SQLITE_ROW ){
106 printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
108 sqlite3_finalize(pStmt);
109 pStmt = prepare(db, "PRAGMA journal_mode");
110 while( sqlite3_step(pStmt)==SQLITE_ROW ){
111 printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
113 sqlite3_finalize(pStmt);
114 pStmt = prepare(db, "PRAGMA auto_vacuum");
115 while( sqlite3_step(pStmt)==SQLITE_ROW ){
116 const char *zType = "???";
117 switch( sqlite3_column_int(pStmt, 0) ){
118 case 0: zType = "OFF"; break;
119 case 1: zType = "FULL"; break;
120 case 2: zType = "INCREMENTAL"; break;
122 printf("PRAGMA auto_vacuum=%s;\n", zType);
124 sqlite3_finalize(pStmt);
125 pStmt = prepare(db, "PRAGMA encoding");
126 while( sqlite3_step(pStmt)==SQLITE_ROW ){
127 printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
129 sqlite3_finalize(pStmt);
133 ** Read a 64-bit variable-length integer from memory starting at p[0].
134 ** Return the number of bytes read, or 0 on error.
135 ** The value is stored in *v.
137 int getVarint(const unsigned char *p, sqlite_int64 *v){
138 const unsigned char *q = p;
139 sqlite_uint64 x = 0, y = 1;
140 while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
141 x += y * (*q++ & 0x7f);
142 y <<= 7;
144 x += y * (*q++);
145 *v = (sqlite_int64) x;
146 return (int) (q - (unsigned char *)p);
150 /* Show the content of the %_stat table
152 static void showStat(sqlite3 *db, const char *zTab){
153 sqlite3_stmt *pStmt;
154 pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
155 while( sqlite3_step(pStmt)==SQLITE_ROW ){
156 printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
157 switch( sqlite3_column_type(pStmt, 1) ){
158 case SQLITE_INTEGER: {
159 printf(" %d\n", sqlite3_column_int(pStmt, 1));
160 break;
162 case SQLITE_BLOB: {
163 unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
164 int len = sqlite3_column_bytes(pStmt, 1);
165 int i = 0;
166 sqlite3_int64 v;
167 while( i<len ){
168 i += getVarint(x, &v);
169 printf(" %lld", v);
171 printf("\n");
172 break;
176 sqlite3_finalize(pStmt);
180 ** Report on the vocabulary. This creates an fts4aux table with a random
181 ** name, but deletes it in the end.
183 static void showVocabulary(sqlite3 *db, const char *zTab){
184 char *zAux;
185 sqlite3_uint64 r;
186 sqlite3_stmt *pStmt;
187 int nDoc = 0;
188 int nToken = 0;
189 int nOccurrence = 0;
190 int nTop;
191 int n, i;
193 sqlite3_randomness(sizeof(r), &r);
194 zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
195 runSql(db, "BEGIN");
196 pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
197 while( sqlite3_step(pStmt)==SQLITE_ROW ){
198 nDoc = sqlite3_column_int(pStmt, 0);
200 sqlite3_finalize(pStmt);
201 printf("Number of documents...................... %9d\n", nDoc);
203 runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
204 pStmt = prepare(db,
205 "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
206 zAux);
207 while( sqlite3_step(pStmt)==SQLITE_ROW ){
208 nToken = sqlite3_column_int(pStmt, 0);
209 nOccurrence = sqlite3_column_int(pStmt, 1);
211 sqlite3_finalize(pStmt);
212 printf("Total tokens in all documents............ %9d\n", nOccurrence);
213 printf("Total number of distinct tokens.......... %9d\n", nToken);
214 if( nToken==0 ) goto end_vocab;
216 n = 0;
217 pStmt = prepare(db, "SELECT count(*) FROM %s"
218 " WHERE col='*' AND occurrences==1", zAux);
219 while( sqlite3_step(pStmt)==SQLITE_ROW ){
220 n = sqlite3_column_int(pStmt, 0);
222 sqlite3_finalize(pStmt);
223 printf("Tokens used exactly once................. %9d %5.2f%%\n",
224 n, n*100.0/nToken);
226 n = 0;
227 pStmt = prepare(db, "SELECT count(*) FROM %s"
228 " WHERE col='*' AND documents==1", zAux);
229 while( sqlite3_step(pStmt)==SQLITE_ROW ){
230 n = sqlite3_column_int(pStmt, 0);
232 sqlite3_finalize(pStmt);
233 printf("Tokens used in only one document......... %9d %5.2f%%\n",
234 n, n*100.0/nToken);
236 if( nDoc>=2000 ){
237 n = 0;
238 pStmt = prepare(db, "SELECT count(*) FROM %s"
239 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
240 while( sqlite3_step(pStmt)==SQLITE_ROW ){
241 n = sqlite3_column_int(pStmt, 0);
243 sqlite3_finalize(pStmt);
244 printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
245 n, n*100.0/nToken);
248 if( nDoc>=200 ){
249 n = 0;
250 pStmt = prepare(db, "SELECT count(*) FROM %s"
251 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
252 while( sqlite3_step(pStmt)==SQLITE_ROW ){
253 n = sqlite3_column_int(pStmt, 0);
255 sqlite3_finalize(pStmt);
256 printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
257 n, n*100.0/nToken);
260 nTop = atoi(findOption("top", 1, "25"));
261 printf("The %d most common tokens:\n", nTop);
262 pStmt = prepare(db,
263 "SELECT term, documents FROM %s"
264 " WHERE col='*'"
265 " ORDER BY documents DESC, term"
266 " LIMIT %d", zAux, nTop);
267 i = 0;
268 while( sqlite3_step(pStmt)==SQLITE_ROW ){
269 i++;
270 n = sqlite3_column_int(pStmt, 1);
271 printf(" %2d. %-30s %9d docs %5.2f%%\n", i,
272 sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
274 sqlite3_finalize(pStmt);
276 end_vocab:
277 runSql(db, "ROLLBACK");
278 sqlite3_free(zAux);
282 ** Report on the number and sizes of segments
284 static void showSegmentStats(sqlite3 *db, const char *zTab){
285 sqlite3_stmt *pStmt;
286 int nSeg = 0;
287 sqlite3_int64 szSeg = 0, mxSeg = 0;
288 int nIdx = 0;
289 sqlite3_int64 szIdx = 0, mxIdx = 0;
290 int nRoot = 0;
291 sqlite3_int64 szRoot = 0, mxRoot = 0;
292 sqlite3_int64 mx;
293 int nLeaf;
294 int n;
295 int pgsz;
296 int mxLevel;
297 int i;
299 pStmt = prepare(db,
300 "SELECT count(*), sum(length(block)), max(length(block))"
301 " FROM '%q_segments'",
302 zTab);
303 while( sqlite3_step(pStmt)==SQLITE_ROW ){
304 nSeg = sqlite3_column_int(pStmt, 0);
305 szSeg = sqlite3_column_int64(pStmt, 1);
306 mxSeg = sqlite3_column_int64(pStmt, 2);
308 sqlite3_finalize(pStmt);
309 pStmt = prepare(db,
310 "SELECT count(*), sum(length(block)), max(length(block))"
311 " FROM '%q_segments' a JOIN '%q_segdir' b"
312 " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
313 zTab, zTab);
314 while( sqlite3_step(pStmt)==SQLITE_ROW ){
315 nIdx = sqlite3_column_int(pStmt, 0);
316 szIdx = sqlite3_column_int64(pStmt, 1);
317 mxIdx = sqlite3_column_int64(pStmt, 2);
319 sqlite3_finalize(pStmt);
320 pStmt = prepare(db,
321 "SELECT count(*), sum(length(root)), max(length(root))"
322 " FROM '%q_segdir'",
323 zTab);
324 while( sqlite3_step(pStmt)==SQLITE_ROW ){
325 nRoot = sqlite3_column_int(pStmt, 0);
326 szRoot = sqlite3_column_int64(pStmt, 1);
327 mxRoot = sqlite3_column_int64(pStmt, 2);
329 sqlite3_finalize(pStmt);
331 printf("Number of segments....................... %9d\n", nSeg+nRoot);
332 printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
333 printf("Number of index segments................. %9d\n", nIdx);
334 printf("Number of root segments.................. %9d\n", nRoot);
335 printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
336 printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
337 printf("Total size of all index segments......... %9lld\n", szIdx);
338 printf("Total size of all root segments.......... %9lld\n", szRoot);
339 if( nSeg>0 ){
340 printf("Average size of all segments............. %11.1f\n",
341 (double)(szSeg+szRoot)/(double)(nSeg+nRoot));
342 printf("Average size of leaf segments............ %11.1f\n",
343 (double)(szSeg-szIdx)/(double)(nSeg-nIdx));
345 if( nIdx>0 ){
346 printf("Average size of index segments........... %11.1f\n",
347 (double)szIdx/(double)nIdx);
349 if( nRoot>0 ){
350 printf("Average size of root segments............ %11.1f\n",
351 (double)szRoot/(double)nRoot);
353 mx = mxSeg;
354 if( mx<mxRoot ) mx = mxRoot;
355 printf("Maximum segment size..................... %9lld\n", mx);
356 printf("Maximum index segment size............... %9lld\n", mxIdx);
357 printf("Maximum root segment size................ %9lld\n", mxRoot);
359 pStmt = prepare(db, "PRAGMA page_size");
360 pgsz = 1024;
361 while( sqlite3_step(pStmt)==SQLITE_ROW ){
362 pgsz = sqlite3_column_int(pStmt, 0);
364 sqlite3_finalize(pStmt);
365 printf("Database page size....................... %9d\n", pgsz);
366 pStmt = prepare(db,
367 "SELECT count(*)"
368 " FROM '%q_segments' a JOIN '%q_segdir' b"
369 " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
370 " AND length(a.block)>%d",
371 zTab, zTab, pgsz-45);
372 n = 0;
373 while( sqlite3_step(pStmt)==SQLITE_ROW ){
374 n = sqlite3_column_int(pStmt, 0);
376 sqlite3_finalize(pStmt);
377 nLeaf = nSeg - nIdx;
378 printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
379 pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
381 pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
382 mxLevel = 0;
383 while( sqlite3_step(pStmt)==SQLITE_ROW ){
384 mxLevel = sqlite3_column_int(pStmt, 0);
386 sqlite3_finalize(pStmt);
388 for(i=0; i<=mxLevel; i++){
389 pStmt = prepare(db,
390 "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
391 " count(distinct idx)"
392 " FROM (SELECT length(a.block) AS len, idx"
393 " FROM '%q_segments' a JOIN '%q_segdir' b"
394 " WHERE (a.blockid BETWEEN b.start_block"
395 " AND b.leaves_end_block)"
396 " AND (b.level%%1024)==%d)",
397 pgsz-45, zTab, zTab, i);
398 if( sqlite3_step(pStmt)==SQLITE_ROW
399 && (nLeaf = sqlite3_column_int(pStmt, 0))>0
401 int nIdx = sqlite3_column_int(pStmt, 5);
402 sqlite3_int64 sz;
403 printf("For level %d:\n", i);
404 printf(" Number of indexes...................... %9d\n", nIdx);
405 printf(" Number of leaf segments................ %9d\n", nLeaf);
406 if( nIdx>1 ){
407 printf(" Average leaf segments per index........ %11.1f\n",
408 (double)nLeaf/(double)nIdx);
410 printf(" Total size of all leaf segments........ %9lld\n",
411 (sz = sqlite3_column_int64(pStmt, 1)));
412 printf(" Average size of leaf segments.......... %11.1f\n",
413 sqlite3_column_double(pStmt, 2));
414 if( nIdx>1 ){
415 printf(" Average leaf segment size per index.... %11.1f\n",
416 (double)sz/(double)nIdx);
418 printf(" Maximum leaf segment size.............. %9lld\n",
419 sqlite3_column_int64(pStmt, 3));
420 n = sqlite3_column_int(pStmt, 4);
421 printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n",
422 pgsz-45, n, n*100.0/nLeaf);
424 sqlite3_finalize(pStmt);
429 ** Print a single "tree" line of the segdir map output.
431 static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
432 printf(" tree %9lld", iLower);
433 if( iUpper>iLower ){
434 printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1);
436 printf("\n");
440 ** Check to see if the block of a %_segments entry is NULL.
442 static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
443 sqlite3_stmt *pStmt;
444 int rc = 1;
446 pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
447 " WHERE blockid=%lld", zTab, iBlockId);
448 if( sqlite3_step(pStmt)==SQLITE_ROW ){
449 rc = sqlite3_column_int(pStmt, 0);
451 sqlite3_finalize(pStmt);
452 return rc;
456 ** Show a map of segments derived from the %_segdir table.
458 static void showSegdirMap(sqlite3 *db, const char *zTab){
459 int mxIndex, iIndex;
460 sqlite3_stmt *pStmt = 0;
461 sqlite3_stmt *pStmt2 = 0;
462 int prevLevel;
464 pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
465 if( sqlite3_step(pStmt)==SQLITE_ROW ){
466 mxIndex = sqlite3_column_int(pStmt, 0);
467 }else{
468 mxIndex = 0;
470 sqlite3_finalize(pStmt);
472 printf("Number of inverted indices............... %3d\n", mxIndex+1);
473 pStmt = prepare(db,
474 "SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
475 " FROM '%q_segdir'"
476 " WHERE level/1024==?"
477 " ORDER BY level DESC, idx",
478 zTab);
479 pStmt2 = prepare(db,
480 "SELECT blockid FROM '%q_segments'"
481 " WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
482 zTab);
483 for(iIndex=0; iIndex<=mxIndex; iIndex++){
484 if( mxIndex>0 ){
485 printf("**************************** Index %d "
486 "****************************\n", iIndex);
488 sqlite3_bind_int(pStmt, 1, iIndex);
489 prevLevel = -1;
490 while( sqlite3_step(pStmt)==SQLITE_ROW ){
491 int iLevel = sqlite3_column_int(pStmt, 0)%1024;
492 int iIdx = sqlite3_column_int(pStmt, 1);
493 sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
494 sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
495 sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
496 char rtag[20];
497 if( iLevel!=prevLevel ){
498 printf("level %2d idx %2d", iLevel, iIdx);
499 prevLevel = iLevel;
500 }else{
501 printf(" idx %2d", iIdx);
503 sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
504 sqlite3_column_int64(pStmt,5));
505 printf(" root %9s\n", rtag);
506 if( iLEnd>iStart ){
507 sqlite3_int64 iLower, iPrev, iX;
508 if( iLEnd+1<=iEnd ){
509 sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
510 sqlite3_bind_int64(pStmt2, 2, iEnd);
511 iLower = -1;
512 while( sqlite3_step(pStmt2)==SQLITE_ROW ){
513 iX = sqlite3_column_int64(pStmt2, 0);
514 if( iLower<0 ){
515 iLower = iPrev = iX;
516 }else if( iX==iPrev+1 ){
517 iPrev = iX;
518 }else{
519 printTreeLine(iLower, iPrev);
520 iLower = iPrev = iX;
523 sqlite3_reset(pStmt2);
524 if( iLower>=0 ){
525 if( iLower==iPrev && iLower==iEnd
526 && isNullSegment(db,zTab,iLower)
528 printf(" null %9lld\n", iLower);
529 }else{
530 printTreeLine(iLower, iPrev);
534 printf(" leaves %9lld thru %9lld (%lld blocks)\n",
535 iStart, iLEnd, iLEnd - iStart + 1);
538 sqlite3_reset(pStmt);
540 sqlite3_finalize(pStmt);
541 sqlite3_finalize(pStmt2);
545 ** Decode a single segment block and display the results on stdout.
547 static void decodeSegment(
548 const unsigned char *aData, /* Content to print */
549 int nData /* Number of bytes of content */
551 sqlite3_int64 iChild;
552 sqlite3_int64 iPrefix;
553 sqlite3_int64 nTerm;
554 sqlite3_int64 n;
555 sqlite3_int64 iDocsz;
556 int iHeight;
557 sqlite3_int64 i = 0;
558 int cnt = 0;
559 char zTerm[1000];
561 i += getVarint(aData, &n);
562 iHeight = (int)n;
563 printf("height: %d\n", iHeight);
564 if( iHeight>0 ){
565 i += getVarint(aData+i, &iChild);
566 printf("left-child: %lld\n", iChild);
568 while( i<nData ){
569 if( (cnt++)>0 ){
570 i += getVarint(aData+i, &iPrefix);
571 }else{
572 iPrefix = 0;
574 i += getVarint(aData+i, &nTerm);
575 if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
576 fprintf(stderr, "term to long\n");
577 exit(1);
579 memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
580 zTerm[iPrefix+nTerm] = 0;
581 i += nTerm;
582 if( iHeight==0 ){
583 i += getVarint(aData+i, &iDocsz);
584 printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
585 i += iDocsz;
586 }else{
587 printf("term: %-25s child %lld\n", zTerm, ++iChild);
594 ** Print a a blob as hex and ascii.
596 static void printBlob(
597 const unsigned char *aData, /* Content to print */
598 int nData /* Number of bytes of content */
600 int i, j;
601 const char *zOfstFmt;
602 const int perLine = 16;
604 if( (nData&~0xfff)==0 ){
605 zOfstFmt = " %03x: ";
606 }else if( (nData&~0xffff)==0 ){
607 zOfstFmt = " %04x: ";
608 }else if( (nData&~0xfffff)==0 ){
609 zOfstFmt = " %05x: ";
610 }else if( (nData&~0xffffff)==0 ){
611 zOfstFmt = " %06x: ";
612 }else{
613 zOfstFmt = " %08x: ";
616 for(i=0; i<nData; i += perLine){
617 fprintf(stdout, zOfstFmt, i);
618 for(j=0; j<perLine; j++){
619 if( i+j>nData ){
620 fprintf(stdout, " ");
621 }else{
622 fprintf(stdout,"%02x ", aData[i+j]);
625 for(j=0; j<perLine; j++){
626 if( i+j>nData ){
627 fprintf(stdout, " ");
628 }else{
629 fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
632 fprintf(stdout,"\n");
637 ** Convert text to a 64-bit integer
639 static sqlite3_int64 atoi64(const char *z){
640 sqlite3_int64 v = 0;
641 while( z[0]>='0' && z[0]<='9' ){
642 v = v*10 + z[0] - '0';
643 z++;
645 return v;
649 ** Return a prepared statement which, when stepped, will return in its
650 ** first column the blob associated with segment zId. If zId begins with
651 ** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a
652 ** %_segment entry.
654 static sqlite3_stmt *prepareToGetSegment(
655 sqlite3 *db, /* The database */
656 const char *zTab, /* The FTS3/4 table name */
657 const char *zId /* ID of the segment to open */
659 sqlite3_stmt *pStmt;
660 if( zId[0]=='r' ){
661 pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
662 zTab, atoi64(zId+1));
663 }else{
664 pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
665 zTab, atoi64(zId));
667 return pStmt;
671 ** Print the content of a segment or of the root of a segdir. The segment
672 ** or root is identified by azExtra[0]. If the first character of azExtra[0]
673 ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
674 ** If the first character of azExtra[0] is not 'r' then, then all of
675 ** azExtra[0] is an integer which is the block number.
677 ** If the --raw option is present in azExtra, then a hex dump is provided.
678 ** Otherwise a decoding is shown.
680 static void showSegment(sqlite3 *db, const char *zTab){
681 const unsigned char *aData;
682 int nData;
683 sqlite3_stmt *pStmt;
685 pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
686 if( sqlite3_step(pStmt)!=SQLITE_ROW ){
687 sqlite3_finalize(pStmt);
688 return;
690 nData = sqlite3_column_bytes(pStmt, 0);
691 aData = sqlite3_column_blob(pStmt, 0);
692 printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
693 if( findOption("raw", 0, 0)!=0 ){
694 printBlob(aData, nData);
695 }else{
696 decodeSegment(aData, nData);
698 sqlite3_finalize(pStmt);
702 ** Decode a single doclist and display the results on stdout.
704 static void decodeDoclist(
705 const unsigned char *aData, /* Content to print */
706 int nData /* Number of bytes of content */
708 sqlite3_int64 iPrevDocid = 0;
709 sqlite3_int64 iDocid;
710 sqlite3_int64 iPos;
711 sqlite3_int64 iPrevPos = 0;
712 sqlite3_int64 iCol;
713 int i = 0;
715 while( i<nData ){
716 i += getVarint(aData+i, &iDocid);
717 printf("docid %lld col0", iDocid+iPrevDocid);
718 iPrevDocid += iDocid;
719 iPrevPos = 0;
720 while( 1 ){
721 i += getVarint(aData+i, &iPos);
722 if( iPos==1 ){
723 i += getVarint(aData+i, &iCol);
724 printf(" col%lld", iCol);
725 iPrevPos = 0;
726 }else if( iPos==0 ){
727 printf("\n");
728 break;
729 }else{
730 iPrevPos += iPos - 2;
731 printf(" %lld", iPrevPos);
739 ** Print the content of a doclist. The segment or segdir-root is
740 ** identified by azExtra[0]. If the first character of azExtra[0]
741 ** is 'r' then the remainder is the integer rowid of the %_segdir entry.
742 ** If the first character of azExtra[0] is not 'r' then, then all of
743 ** azExtra[0] is an integer which is the block number. The offset
744 ** into the segment is identified by azExtra[1]. The size of the doclist
745 ** is azExtra[2].
747 ** If the --raw option is present in azExtra, then a hex dump is provided.
748 ** Otherwise a decoding is shown.
750 static void showDoclist(sqlite3 *db, const char *zTab){
751 const unsigned char *aData;
752 sqlite3_int64 offset;
753 int nData;
754 sqlite3_stmt *pStmt;
756 offset = atoi64(azExtra[1]);
757 nData = atoi(azExtra[2]);
758 pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
759 if( sqlite3_step(pStmt)!=SQLITE_ROW ){
760 sqlite3_finalize(pStmt);
761 return;
763 aData = sqlite3_column_blob(pStmt, 0);
764 printf("Doclist at %s offset %lld of size %d bytes:\n",
765 azExtra[0], offset, nData);
766 if( findOption("raw", 0, 0)!=0 ){
767 printBlob(aData+offset, nData);
768 }else{
769 decodeDoclist(aData+offset, nData);
771 sqlite3_finalize(pStmt);
775 ** Show the top N largest segments
777 static void listBigSegments(sqlite3 *db, const char *zTab){
778 int nTop, i;
779 sqlite3_stmt *pStmt;
780 sqlite3_int64 sz;
781 sqlite3_int64 id;
783 nTop = atoi(findOption("top", 1, "25"));
784 printf("The %d largest segments:\n", nTop);
785 pStmt = prepare(db,
786 "SELECT blockid, length(block) AS len FROM '%q_segments'"
787 " ORDER BY 2 DESC, 1"
788 " LIMIT %d", zTab, nTop);
789 i = 0;
790 while( sqlite3_step(pStmt)==SQLITE_ROW ){
791 i++;
792 id = sqlite3_column_int64(pStmt, 0);
793 sz = sqlite3_column_int64(pStmt, 1);
794 printf(" %2d. %9lld size %lld\n", i, id, sz);
796 sqlite3_finalize(pStmt);
801 static void usage(const char *argv0){
802 fprintf(stderr, "Usage: %s DATABASE\n"
803 " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
804 fprintf(stderr,
805 "ARGS:\n"
806 " big-segments [--top N] show the largest segments\n"
807 " doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n"
808 " schema FTS table schema\n"
809 " segdir directory of segments\n"
810 " segment BLOCKID [--raw] content of a segment\n"
811 " segment-stats info on segment sizes\n"
812 " stat the %%_stat table\n"
813 " vocabulary [--top N] document vocabulary\n"
815 exit(1);
818 int main(int argc, char **argv){
819 sqlite3 *db;
820 int rc;
821 const char *zTab;
822 const char *zCmd;
824 if( argc<2 ) usage(argv[0]);
825 rc = sqlite3_open(argv[1], &db);
826 if( rc ){
827 fprintf(stderr, "Cannot open %s\n", argv[1]);
828 exit(1);
830 if( argc==2 ){
831 sqlite3_stmt *pStmt;
832 int cnt = 0;
833 pStmt = prepare(db, "SELECT b.sql"
834 " FROM sqlite_master a, sqlite_master b"
835 " WHERE a.name GLOB '*_segdir'"
836 " AND b.name=substr(a.name,1,length(a.name)-7)"
837 " ORDER BY 1");
838 while( sqlite3_step(pStmt)==SQLITE_ROW ){
839 cnt++;
840 printf("%s;\n", sqlite3_column_text(pStmt, 0));
842 sqlite3_finalize(pStmt);
843 if( cnt==0 ){
844 printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
846 return 0;
848 if( argc<4 ) usage(argv[0]);
849 zTab = argv[2];
850 zCmd = argv[3];
851 nExtra = argc-4;
852 azExtra = argv+4;
853 if( strcmp(zCmd,"big-segments")==0 ){
854 listBigSegments(db, zTab);
855 }else if( strcmp(zCmd,"doclist")==0 ){
856 if( argc<7 ) usage(argv[0]);
857 showDoclist(db, zTab);
858 }else if( strcmp(zCmd,"schema")==0 ){
859 showSchema(db, zTab);
860 }else if( strcmp(zCmd,"segdir")==0 ){
861 showSegdirMap(db, zTab);
862 }else if( strcmp(zCmd,"segment")==0 ){
863 if( argc<5 ) usage(argv[0]);
864 showSegment(db, zTab);
865 }else if( strcmp(zCmd,"segment-stats")==0 ){
866 showSegmentStats(db, zTab);
867 }else if( strcmp(zCmd,"stat")==0 ){
868 showStat(db, zTab);
869 }else if( strcmp(zCmd,"vocabulary")==0 ){
870 showVocabulary(db, zTab);
871 }else{
872 usage(argv[0]);
874 return 0;