2 ** Compile and run this standalone program in order to generate code that
3 ** implements a function that will translate alphabetic identifiers into
12 ** A header comment placed at the beginning of generated code.
14 static const char zHdr
[] =
15 "/***** This file contains automatically generated code ******\n"
17 "** The code in this file has been automatically generated by\n"
19 "** sqlite/tool/mkkeywordhash.c\n"
21 "** The code in this file implements a function that determines whether\n"
22 "** or not a given identifier is really an SQL keyword. The same thing\n"
23 "** might be implemented more directly using a hand-written hash table.\n"
24 "** But by using this automatically generated code, the size of the code\n"
25 "** is substantially reduced. This is important for embedded applications\n"
26 "** on platforms with limited memory.\n"
31 ** All the keywords of the SQL language are stored in a hash
32 ** table composed of instances of the following structure.
34 typedef struct Keyword Keyword
;
36 char *zName
; /* The keyword name */
37 char *zTokenType
; /* Token value for this keyword */
38 int mask
; /* Code this keyword if non-zero */
39 int id
; /* Unique ID for this record */
40 int hash
; /* Hash on the keyword */
41 int offset
; /* Offset to start of name string */
42 int len
; /* Length of this keyword, not counting final \000 */
43 int prefix
; /* Number of characters in prefix */
44 int longestSuffix
; /* Longest suffix that is a prefix on another word */
45 int iNext
; /* Index in aKeywordTable[] of next with same hash */
46 int substrId
; /* Id to another keyword this keyword is embedded in */
47 int substrOffset
; /* Offset into substrId for start of this keyword */
48 char zOrigName
[20]; /* Original keyword name before processing */
52 ** Define masks used to determine which keywords are allowed
54 #ifdef SQLITE_OMIT_ALTERTABLE
57 # define ALTER 0x00000001
59 #define ALWAYS 0x00000002
60 #ifdef SQLITE_OMIT_ANALYZE
63 # define ANALYZE 0x00000004
65 #ifdef SQLITE_OMIT_ATTACH
68 # define ATTACH 0x00000008
70 #ifdef SQLITE_OMIT_AUTOINCREMENT
73 # define AUTOINCR 0x00000010
75 #ifdef SQLITE_OMIT_CAST
78 # define CAST 0x00000020
80 #ifdef SQLITE_OMIT_COMPOUND_SELECT
83 # define COMPOUND 0x00000040
85 #ifdef SQLITE_OMIT_CONFLICT_CLAUSE
88 # define CONFLICT 0x00000080
90 #ifdef SQLITE_OMIT_EXPLAIN
93 # define EXPLAIN 0x00000100
95 #ifdef SQLITE_OMIT_FOREIGN_KEY
98 # define FKEY 0x00000200
100 #ifdef SQLITE_OMIT_PRAGMA
103 # define PRAGMA 0x00000400
105 #ifdef SQLITE_OMIT_REINDEX
108 # define REINDEX 0x00000800
110 #ifdef SQLITE_OMIT_SUBQUERY
113 # define SUBQUERY 0x00001000
115 #ifdef SQLITE_OMIT_TRIGGER
118 # define TRIGGER 0x00002000
120 #if defined(SQLITE_OMIT_AUTOVACUUM) && \
121 (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH))
124 # define VACUUM 0x00004000
126 #ifdef SQLITE_OMIT_VIEW
129 # define VIEW 0x00008000
131 #ifdef SQLITE_OMIT_VIRTUALTABLE
134 # define VTAB 0x00010000
136 #ifdef SQLITE_OMIT_AUTOVACUUM
137 # define AUTOVACUUM 0
139 # define AUTOVACUUM 0x00020000
141 #ifdef SQLITE_OMIT_CTE
144 # define CTE 0x00040000
148 ** These are the keywords
150 static Keyword aKeywordTable
[] = {
151 { "ABORT", "TK_ABORT", CONFLICT
|TRIGGER
},
152 { "ACTION", "TK_ACTION", FKEY
},
153 { "ADD", "TK_ADD", ALTER
},
154 { "AFTER", "TK_AFTER", TRIGGER
},
155 { "ALL", "TK_ALL", ALWAYS
},
156 { "ALTER", "TK_ALTER", ALTER
},
157 { "ANALYZE", "TK_ANALYZE", ANALYZE
},
158 { "AND", "TK_AND", ALWAYS
},
159 { "AS", "TK_AS", ALWAYS
},
160 { "ASC", "TK_ASC", ALWAYS
},
161 { "ATTACH", "TK_ATTACH", ATTACH
},
162 { "AUTOINCREMENT", "TK_AUTOINCR", AUTOINCR
},
163 { "BEFORE", "TK_BEFORE", TRIGGER
},
164 { "BEGIN", "TK_BEGIN", ALWAYS
},
165 { "BETWEEN", "TK_BETWEEN", ALWAYS
},
166 { "BY", "TK_BY", ALWAYS
},
167 { "CASCADE", "TK_CASCADE", FKEY
},
168 { "CASE", "TK_CASE", ALWAYS
},
169 { "CAST", "TK_CAST", CAST
},
170 { "CHECK", "TK_CHECK", ALWAYS
},
171 { "COLLATE", "TK_COLLATE", ALWAYS
},
172 { "COLUMN", "TK_COLUMNKW", ALTER
},
173 { "COMMIT", "TK_COMMIT", ALWAYS
},
174 { "CONFLICT", "TK_CONFLICT", CONFLICT
},
175 { "CONSTRAINT", "TK_CONSTRAINT", ALWAYS
},
176 { "CREATE", "TK_CREATE", ALWAYS
},
177 { "CROSS", "TK_JOIN_KW", ALWAYS
},
178 { "CURRENT_DATE", "TK_CTIME_KW", ALWAYS
},
179 { "CURRENT_TIME", "TK_CTIME_KW", ALWAYS
},
180 { "CURRENT_TIMESTAMP","TK_CTIME_KW", ALWAYS
},
181 { "DATABASE", "TK_DATABASE", ATTACH
},
182 { "DEFAULT", "TK_DEFAULT", ALWAYS
},
183 { "DEFERRED", "TK_DEFERRED", ALWAYS
},
184 { "DEFERRABLE", "TK_DEFERRABLE", FKEY
},
185 { "DELETE", "TK_DELETE", ALWAYS
},
186 { "DESC", "TK_DESC", ALWAYS
},
187 { "DETACH", "TK_DETACH", ATTACH
},
188 { "DISTINCT", "TK_DISTINCT", ALWAYS
},
189 { "DROP", "TK_DROP", ALWAYS
},
190 { "END", "TK_END", ALWAYS
},
191 { "EACH", "TK_EACH", TRIGGER
},
192 { "ELSE", "TK_ELSE", ALWAYS
},
193 { "ESCAPE", "TK_ESCAPE", ALWAYS
},
194 { "EXCEPT", "TK_EXCEPT", COMPOUND
},
195 { "EXCLUSIVE", "TK_EXCLUSIVE", ALWAYS
},
196 { "EXISTS", "TK_EXISTS", ALWAYS
},
197 { "EXPLAIN", "TK_EXPLAIN", EXPLAIN
},
198 { "FAIL", "TK_FAIL", CONFLICT
|TRIGGER
},
199 { "FOR", "TK_FOR", TRIGGER
},
200 { "FOREIGN", "TK_FOREIGN", FKEY
},
201 { "FROM", "TK_FROM", ALWAYS
},
202 { "FULL", "TK_JOIN_KW", ALWAYS
},
203 { "GLOB", "TK_LIKE_KW", ALWAYS
},
204 { "GROUP", "TK_GROUP", ALWAYS
},
205 { "HAVING", "TK_HAVING", ALWAYS
},
206 { "IF", "TK_IF", ALWAYS
},
207 { "IGNORE", "TK_IGNORE", CONFLICT
|TRIGGER
},
208 { "IMMEDIATE", "TK_IMMEDIATE", ALWAYS
},
209 { "IN", "TK_IN", ALWAYS
},
210 { "INDEX", "TK_INDEX", ALWAYS
},
211 { "INDEXED", "TK_INDEXED", ALWAYS
},
212 { "INITIALLY", "TK_INITIALLY", FKEY
},
213 { "INNER", "TK_JOIN_KW", ALWAYS
},
214 { "INSERT", "TK_INSERT", ALWAYS
},
215 { "INSTEAD", "TK_INSTEAD", TRIGGER
},
216 { "INTERSECT", "TK_INTERSECT", COMPOUND
},
217 { "INTO", "TK_INTO", ALWAYS
},
218 { "IS", "TK_IS", ALWAYS
},
219 { "ISNULL", "TK_ISNULL", ALWAYS
},
220 { "JOIN", "TK_JOIN", ALWAYS
},
221 { "KEY", "TK_KEY", ALWAYS
},
222 { "LEFT", "TK_JOIN_KW", ALWAYS
},
223 { "LIKE", "TK_LIKE_KW", ALWAYS
},
224 { "LIMIT", "TK_LIMIT", ALWAYS
},
225 { "MATCH", "TK_MATCH", ALWAYS
},
226 { "NATURAL", "TK_JOIN_KW", ALWAYS
},
227 { "NO", "TK_NO", FKEY
},
228 { "NOT", "TK_NOT", ALWAYS
},
229 { "NOTNULL", "TK_NOTNULL", ALWAYS
},
230 { "NULL", "TK_NULL", ALWAYS
},
231 { "OF", "TK_OF", ALWAYS
},
232 { "OFFSET", "TK_OFFSET", ALWAYS
},
233 { "ON", "TK_ON", ALWAYS
},
234 { "OR", "TK_OR", ALWAYS
},
235 { "ORDER", "TK_ORDER", ALWAYS
},
236 { "OUTER", "TK_JOIN_KW", ALWAYS
},
237 { "PLAN", "TK_PLAN", EXPLAIN
},
238 { "PRAGMA", "TK_PRAGMA", PRAGMA
},
239 { "PRIMARY", "TK_PRIMARY", ALWAYS
},
240 { "QUERY", "TK_QUERY", EXPLAIN
},
241 { "RAISE", "TK_RAISE", TRIGGER
},
242 { "RECURSIVE", "TK_RECURSIVE", CTE
},
243 { "REFERENCES", "TK_REFERENCES", FKEY
},
244 { "REGEXP", "TK_LIKE_KW", ALWAYS
},
245 { "REINDEX", "TK_REINDEX", REINDEX
},
246 { "RELEASE", "TK_RELEASE", ALWAYS
},
247 { "RENAME", "TK_RENAME", ALTER
},
248 { "REPLACE", "TK_REPLACE", CONFLICT
},
249 { "RESTRICT", "TK_RESTRICT", FKEY
},
250 { "RIGHT", "TK_JOIN_KW", ALWAYS
},
251 { "ROLLBACK", "TK_ROLLBACK", ALWAYS
},
252 { "ROW", "TK_ROW", TRIGGER
},
253 { "SAVEPOINT", "TK_SAVEPOINT", ALWAYS
},
254 { "SELECT", "TK_SELECT", ALWAYS
},
255 { "SET", "TK_SET", ALWAYS
},
256 { "TABLE", "TK_TABLE", ALWAYS
},
257 { "TEMP", "TK_TEMP", ALWAYS
},
258 { "TEMPORARY", "TK_TEMP", ALWAYS
},
259 { "THEN", "TK_THEN", ALWAYS
},
260 { "TO", "TK_TO", ALWAYS
},
261 { "TRANSACTION", "TK_TRANSACTION", ALWAYS
},
262 { "TRIGGER", "TK_TRIGGER", TRIGGER
},
263 { "UNION", "TK_UNION", COMPOUND
},
264 { "UNIQUE", "TK_UNIQUE", ALWAYS
},
265 { "UPDATE", "TK_UPDATE", ALWAYS
},
266 { "USING", "TK_USING", ALWAYS
},
267 { "VACUUM", "TK_VACUUM", VACUUM
},
268 { "VALUES", "TK_VALUES", ALWAYS
},
269 { "VIEW", "TK_VIEW", VIEW
},
270 { "VIRTUAL", "TK_VIRTUAL", VTAB
},
271 { "WITH", "TK_WITH", CTE
},
272 { "WITHOUT", "TK_WITHOUT", ALWAYS
},
273 { "WHEN", "TK_WHEN", ALWAYS
},
274 { "WHERE", "TK_WHERE", ALWAYS
},
277 /* Number of keywords */
278 static int nKeyword
= (sizeof(aKeywordTable
)/sizeof(aKeywordTable
[0]));
280 /* An array to map all upper-case characters into their corresponding
281 ** lower-case character.
283 const unsigned char sqlite3UpperToLower
[] = {
284 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
285 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
286 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
287 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
288 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
289 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
290 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
291 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
292 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
293 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
294 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
295 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
296 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
297 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
300 #define UpperToLower sqlite3UpperToLower
303 ** Comparision function for two Keyword records
305 static int keywordCompare1(const void *a
, const void *b
){
306 const Keyword
*pA
= (Keyword
*)a
;
307 const Keyword
*pB
= (Keyword
*)b
;
308 int n
= pA
->len
- pB
->len
;
310 n
= strcmp(pA
->zName
, pB
->zName
);
315 static int keywordCompare2(const void *a
, const void *b
){
316 const Keyword
*pA
= (Keyword
*)a
;
317 const Keyword
*pB
= (Keyword
*)b
;
318 int n
= pB
->longestSuffix
- pA
->longestSuffix
;
320 n
= strcmp(pA
->zName
, pB
->zName
);
325 static int keywordCompare3(const void *a
, const void *b
){
326 const Keyword
*pA
= (Keyword
*)a
;
327 const Keyword
*pB
= (Keyword
*)b
;
328 int n
= pA
->offset
- pB
->offset
;
329 if( n
==0 ) n
= pB
->id
- pA
->id
;
335 ** Return a KeywordTable entry with the given id
337 static Keyword
*findById(int id
){
339 for(i
=0; i
<nKeyword
; i
++){
340 if( aKeywordTable
[i
].id
==id
) break;
342 return &aKeywordTable
[i
];
346 ** This routine does the work. The generated code is printed on standard
349 int main(int argc
, char **argv
){
351 int bestSize
, bestCount
;
355 int aHash
[1000]; /* 1000 is much bigger than nKeyword */
358 /* Remove entries from the list of keywords that have mask==0 */
359 for(i
=j
=0; i
<nKeyword
; i
++){
360 if( aKeywordTable
[i
].mask
==0 ) continue;
362 aKeywordTable
[j
] = aKeywordTable
[i
];
368 /* Fill in the lengths of strings and hashes for all entries. */
369 for(i
=0; i
<nKeyword
; i
++){
370 Keyword
*p
= &aKeywordTable
[i
];
371 p
->len
= (int)strlen(p
->zName
);
372 assert( p
->len
<sizeof(p
->zOrigName
) );
373 memcpy(p
->zOrigName
, p
->zName
, p
->len
+1);
375 p
->hash
= (UpperToLower
[(int)p
->zName
[0]]*4) ^
376 (UpperToLower
[(int)p
->zName
[p
->len
-1]]*3) ^ p
->len
;
380 /* Sort the table from shortest to longest keyword */
381 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare1
);
383 /* Look for short keywords embedded in longer keywords */
384 for(i
=nKeyword
-2; i
>=0; i
--){
385 Keyword
*p
= &aKeywordTable
[i
];
386 for(j
=nKeyword
-1; j
>i
&& p
->substrId
==0; j
--){
387 Keyword
*pOther
= &aKeywordTable
[j
];
388 if( pOther
->substrId
) continue;
389 if( pOther
->len
<=p
->len
) continue;
390 for(k
=0; k
<=pOther
->len
-p
->len
; k
++){
391 if( memcmp(p
->zName
, &pOther
->zName
[k
], p
->len
)==0 ){
392 p
->substrId
= pOther
->id
;
400 /* Compute the longestSuffix value for every word */
401 for(i
=0; i
<nKeyword
; i
++){
402 Keyword
*p
= &aKeywordTable
[i
];
403 if( p
->substrId
) continue;
404 for(j
=0; j
<nKeyword
; j
++){
407 pOther
= &aKeywordTable
[j
];
408 if( pOther
->substrId
) continue;
409 for(k
=p
->longestSuffix
+1; k
<p
->len
&& k
<pOther
->len
; k
++){
410 if( memcmp(&p
->zName
[p
->len
-k
], pOther
->zName
, k
)==0 ){
411 p
->longestSuffix
= k
;
417 /* Sort the table into reverse order by length */
418 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare2
);
420 /* Fill in the offset for all entries */
422 for(i
=0; i
<nKeyword
; i
++){
423 Keyword
*p
= &aKeywordTable
[i
];
424 if( p
->offset
>0 || p
->substrId
) continue;
427 for(k
=p
->len
-1; k
>=1; k
--){
428 for(j
=i
+1; j
<nKeyword
; j
++){
429 Keyword
*pOther
= &aKeywordTable
[j
];
430 if( pOther
->offset
>0 || pOther
->substrId
) continue;
431 if( pOther
->len
<=k
) continue;
432 if( memcmp(&p
->zName
[p
->len
-k
], pOther
->zName
, k
)==0 ){
434 p
->offset
= nChar
- k
;
435 nChar
= p
->offset
+ p
->len
;
445 for(i
=0; i
<nKeyword
; i
++){
446 Keyword
*p
= &aKeywordTable
[i
];
448 p
->offset
= findById(p
->substrId
)->offset
+ p
->substrOffset
;
452 /* Sort the table by offset */
453 qsort(aKeywordTable
, nKeyword
, sizeof(aKeywordTable
[0]), keywordCompare3
);
455 /* Figure out how big to make the hash table in order to minimize the
456 ** number of collisions */
458 bestCount
= nKeyword
*nKeyword
;
459 for(i
=nKeyword
/2; i
<=2*nKeyword
; i
++){
460 for(j
=0; j
<i
; j
++) aHash
[j
] = 0;
461 for(j
=0; j
<nKeyword
; j
++){
462 h
= aKeywordTable
[j
].hash
% i
;
466 for(j
=count
=0; j
<i
; j
++) count
+= aHash
[j
];
467 if( count
<bestCount
){
473 /* Compute the hash */
474 for(i
=0; i
<bestSize
; i
++) aHash
[i
] = 0;
475 for(i
=0; i
<nKeyword
; i
++){
476 h
= aKeywordTable
[i
].hash
% bestSize
;
477 aKeywordTable
[i
].iNext
= aHash
[h
];
481 /* Begin generating code */
483 printf("/* Hash score: %d */\n", bestCount
);
484 printf("static int keywordCode(const char *z, int n){\n");
485 printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n",
486 totalLen
+ nKeyword
, nChar
+1 );
487 for(i
=j
=k
=0; i
<nKeyword
; i
++){
488 Keyword
*p
= &aKeywordTable
[i
];
489 if( p
->substrId
) continue;
490 memcpy(&zText
[k
], p
->zName
, p
->len
);
493 printf("%*s */\n", 74-j
, "");
500 printf("%s", p
->zName
);
504 printf("%*s */\n", 74-j
, "");
506 printf(" static const char zText[%d] = {\n", nChar
);
508 for(i
=j
=0; i
<k
; i
++){
515 printf("'%c',", zText
[i
]);
523 if( j
>0 ) printf("\n");
526 printf(" static const unsigned char aHash[%d] = {\n", bestSize
);
527 for(i
=j
=0; i
<bestSize
; i
++){
528 if( j
==0 ) printf(" ");
529 printf(" %3d,", aHash
[i
]);
536 printf("%s };\n", j
==0 ? "" : "\n");
538 printf(" static const unsigned char aNext[%d] = {\n", nKeyword
);
539 for(i
=j
=0; i
<nKeyword
; i
++){
540 if( j
==0 ) printf(" ");
541 printf(" %3d,", aKeywordTable
[i
].iNext
);
548 printf("%s };\n", j
==0 ? "" : "\n");
550 printf(" static const unsigned char aLen[%d] = {\n", nKeyword
);
551 for(i
=j
=0; i
<nKeyword
; i
++){
552 if( j
==0 ) printf(" ");
553 printf(" %3d,", aKeywordTable
[i
].len
+aKeywordTable
[i
].prefix
);
560 printf("%s };\n", j
==0 ? "" : "\n");
562 printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword
);
563 for(i
=j
=0; i
<nKeyword
; i
++){
564 if( j
==0 ) printf(" ");
565 printf(" %3d,", aKeywordTable
[i
].offset
);
572 printf("%s };\n", j
==0 ? "" : "\n");
574 printf(" static const unsigned char aCode[%d] = {\n", nKeyword
);
575 for(i
=j
=0; i
<nKeyword
; i
++){
576 char *zToken
= aKeywordTable
[i
].zTokenType
;
577 if( j
==0 ) printf(" ");
578 printf("%s,%*s", zToken
, (int)(14-strlen(zToken
)), "");
585 printf("%s };\n", j
==0 ? "" : "\n");
587 printf(" int h, i;\n");
588 printf(" if( n<2 ) return TK_ID;\n");
589 printf(" h = ((charMap(z[0])*4) ^\n"
590 " (charMap(z[n-1])*3) ^\n"
591 " n) %% %d;\n", bestSize
);
592 printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n");
593 printf(" if( aLen[i]==n &&"
594 " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n");
595 for(i
=0; i
<nKeyword
; i
++){
596 printf(" testcase( i==%d ); /* %s */\n",
597 i
, aKeywordTable
[i
].zOrigName
);
599 printf(" return aCode[i];\n");
602 printf(" return TK_ID;\n");
604 printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n");
605 printf(" return keywordCode((char*)z, n);\n");
607 printf("#define SQLITE_N_KEYWORD %d\n", nKeyword
);