3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
10 #***********************************************************************
11 # This file implements regression tests for SQLite library. The focus of
12 # this file is testing the SQLite routines used for converting between the
13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
17 set testdir [file dirname $argv0]
18 source $testdir/tester.tcl
20 # If UTF16 support is disabled, ignore the tests in this file
27 # The rough organisation of tests in this file is:
29 # enc2.1.*: Simple tests with a UTF-8 db.
30 # enc2.2.*: Simple tests with a UTF-16LE db.
31 # enc2.3.*: Simple tests with a UTF-16BE db.
32 # enc2.4.*: Test that attached databases must have the same text encoding
33 # as the main database.
34 # enc2.5.*: Test the behavior of the library when a collation sequence is
35 # not available for the most desirable text encoding.
36 # enc2.6.*: Similar test for user functions.
37 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
38 # wrong text encoding for the database.
39 # enc2.8.*: Test sqlite3_complete16()
44 # Return the UTF-8 representation of the supplied UTF-16 string $str.
46 # If $str ends in two 0x00 0x00 bytes, knock these off before
47 # converting to UTF-8 using TCL.
48 binary scan $str \c* vals
49 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
50 set str [binary format \c* [lrange $vals 0 end-2]]
53 set r [encoding convertfrom unicode $str]
58 # This proc contains all the tests in this file. It is run
59 # three times. Each time the file 'test.db' contains a database
60 # with the following contents:
62 CREATE TABLE t1(a PRIMARY KEY, b, c);
63 INSERT INTO t1 VALUES('one', 'I', 1);
65 # This proc tests that we can open and manipulate the test.db
66 # database, and that it is possible to retreive values in
67 # various text encodings.
69 proc run_test_script {t enc} {
71 # Open the database and pull out a (the) row.
73 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
74 execsql {SELECT * FROM t1}
79 execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
80 execsql {SELECT * FROM t1}
86 INSERT INTO t1 VALUES('three','III',3);
87 INSERT INTO t1 VALUES('four','IV',4);
88 INSERT INTO t1 VALUES('five','V',5);
90 execsql {SELECT * FROM t1}
91 } {one I 1 two II 2 three III 3 four IV 4 five V 5}
96 SELECT * FROM t1 WHERE a = 'one';
101 SELECT * FROM t1 WHERE a = 'four';
107 SELECT * FROM t1 WHERE a IN ('one', 'two');
112 # Now check that we can retrieve data in both UTF-16 and UTF-8
114 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
116 sqlite3_column_text $STMT 0
121 utf8 [sqlite3_column_text16 $STMT 0]
125 sqlite3_finalize $STMT
133 db eval {PRAGMA encoding}
138 # The three unicode encodings understood by SQLite.
139 set encodings [list UTF-8 UTF-16le UTF-16be]
141 set sqlite_os_trace 0
143 foreach enc $encodings {
146 db eval "PRAGMA encoding = \"$enc\""
148 do_test enc2-$i.0.1 {
149 db eval {PRAGMA encoding}
151 do_test enc2-$i.0.2 {
152 db eval {PRAGMA encoding=UTF8}
153 db eval {PRAGMA encoding}
155 do_test enc2-$i.0.3 {
156 db eval {PRAGMA encoding=UTF16le}
157 db eval {PRAGMA encoding}
159 do_test enc2-$i.0.4 {
160 db eval {PRAGMA encoding=UTF16be}
161 db eval {PRAGMA encoding}
165 run_test_script enc2-$i $enc
170 # Test that it is an error to try to attach a database with a different
171 # encoding to the main database.
176 db eval "PRAGMA encoding = 'UTF-8'"
177 db eval "CREATE TABLE abc(a, b, c);"
182 db2 eval "PRAGMA encoding = 'UTF-16'"
183 db2 eval "CREATE TABLE abc(a, b, c);"
187 ATTACH 'test2.db' as aux;
189 } {1 {attached databases must use the same text encoding as main database}}
194 # The following tests - enc2-5.* - test that SQLite selects the correct
195 # collation sequence when more than one is available.
197 set ::values [list one two three four five]
198 set ::test_collate_enc INVALID
199 proc test_collate {enc lhs rhs} {
200 set ::test_collate_enc $enc
201 set l [lsearch -exact $::values $lhs]
202 set r [lsearch -exact $::values $rhs]
203 set res [expr $l - $r]
204 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
209 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
213 INSERT INTO t5 VALUES('one');
214 INSERT INTO t5 VALUES('two');
215 INSERT INTO t5 VALUES('five');
216 INSERT INTO t5 VALUES('three');
217 INSERT INTO t5 VALUES('four');
221 add_test_collate $DB 1 1 1
222 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
223 lappend res $::test_collate_enc
224 } {one two three four five UTF-8}
226 add_test_collate $DB 0 1 0
227 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
228 lappend res $::test_collate_enc
229 } {one two three four five UTF-16LE}
231 add_test_collate $DB 0 0 1
232 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
233 lappend res $::test_collate_enc
234 } {one two three four five UTF-16BE}
238 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
239 execsql {pragma encoding = 'UTF-16LE'}
243 INSERT INTO t5 VALUES('one');
244 INSERT INTO t5 VALUES('two');
245 INSERT INTO t5 VALUES('five');
246 INSERT INTO t5 VALUES('three');
247 INSERT INTO t5 VALUES('four');
251 add_test_collate $DB 1 1 1
252 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
253 lappend res $::test_collate_enc
254 } {one two three four five UTF-16LE}
256 add_test_collate $DB 1 0 1
257 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
258 lappend res $::test_collate_enc
259 } {one two three four five UTF-16BE}
261 add_test_collate $DB 1 0 0
262 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
263 lappend res $::test_collate_enc
264 } {one two three four five UTF-8}
268 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
269 execsql {pragma encoding = 'UTF-16BE'}
273 INSERT INTO t5 VALUES('one');
274 INSERT INTO t5 VALUES('two');
275 INSERT INTO t5 VALUES('five');
276 INSERT INTO t5 VALUES('three');
277 INSERT INTO t5 VALUES('four');
281 add_test_collate $DB 1 1 1
282 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
283 lappend res $::test_collate_enc
284 } {one two three four five UTF-16BE}
286 add_test_collate $DB 1 1 0
287 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
288 lappend res $::test_collate_enc
289 } {one two three four five UTF-16LE}
291 add_test_collate $DB 1 0 0
292 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
293 lappend res $::test_collate_enc
294 } {one two three four five UTF-8}
296 # Also test that a UTF-16 collation factory works.
298 add_test_collate $DB 0 0 0
300 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
302 } {1 {no such collation sequence: test_collate}}
304 add_test_collate_needed $DB
305 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
306 lappend res $::test_collate_enc
307 } {one two three four five UTF-16BE}
309 set ::sqlite_last_needed_collation
316 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
317 add_test_collate_needed $::DB
318 set ::sqlite_last_needed_collation
321 execsql {CREATE TABLE t1(a varchar collate test_collate);}
324 set ::sqlite_last_needed_collation
327 # The following tests - enc2-6.* - test that SQLite selects the correct
328 # user function when more than one is available.
330 proc test_function {enc arg} {
336 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
337 execsql {pragma encoding = 'UTF-8'}
341 INSERT INTO t5 VALUES('one');
345 add_test_function $DB 1 1 1
347 SELECT test_function('sqlite')
351 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
353 add_test_function $DB 0 1 0
355 SELECT test_function('sqlite')
357 } {{UTF-16LE sqlite}}
359 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
361 add_test_function $DB 0 0 1
363 SELECT test_function('sqlite')
365 } {{UTF-16BE sqlite}}
369 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
370 execsql {pragma encoding = 'UTF-16LE'}
374 INSERT INTO t5 VALUES('sqlite');
378 add_test_function $DB 1 1 1
380 SELECT test_function('sqlite')
382 } {{UTF-16LE sqlite}}
384 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
386 add_test_function $DB 0 1 0
388 SELECT test_function('sqlite')
390 } {{UTF-16LE sqlite}}
392 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
394 add_test_function $DB 0 0 1
396 SELECT test_function('sqlite')
398 } {{UTF-16BE sqlite}}
402 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
403 execsql {pragma encoding = 'UTF-16BE'}
407 INSERT INTO t5 VALUES('sqlite');
411 add_test_function $DB 1 1 1
413 SELECT test_function('sqlite')
415 } {{UTF-16BE sqlite}}
417 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
419 add_test_function $DB 0 1 0
421 SELECT test_function('sqlite')
423 } {{UTF-16LE sqlite}}
425 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
427 add_test_function $DB 0 0 1
429 SELECT test_function('sqlite')
431 } {{UTF-16BE sqlite}}
437 # The following tests - enc2-7.* - function as follows:
439 # 1: Open an empty database file assuming UTF-16 encoding.
440 # 2: Open the same database with a different handle assuming UTF-8. Create
441 # a table using this handle.
442 # 3: Read the sqlite_master table from the first handle.
443 # 4: Ensure the first handle recognises the database encoding is UTF-8.
448 PRAGMA encoding = 'UTF-16';
449 SELECT * FROM sqlite_master;
456 string range $enc 0 end-2 ;# Chop off the "le" or "be"
461 PRAGMA encoding = 'UTF-8';
462 CREATE TABLE abc(a, b, c);
467 SELECT * FROM sqlite_master;
469 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
480 set utf16 [encoding convertto unicode $utf8]
481 append utf16 "\x00\x00"
484 ifcapable {complete} {
486 sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
489 sqlite3_complete16 [utf16 "SELECT * FROM"]
493 # Test that the encoding of an empty database may still be set after the
494 # (empty) schema has been initialized.
499 PRAGMA encoding = 'UTF-8';
506 PRAGMA encoding = 'UTF-16le';
513 SELECT * FROM sqlite_master;
514 PRAGMA encoding = 'UTF-8';
521 PRAGMA encoding = 'UTF-16le';
522 CREATE TABLE abc(a, b, c);
529 PRAGMA encoding = 'UTF-8';
535 # Disallow encoding changes once the encoding has been set.
539 forcedelete test.db test.db-journal
542 PRAGMA encoding=UTF16;
544 PRAGMA encoding=UTF8;
550 SELECT name FROM sqlite_master
554 # 2020-01-15 ticket a08879a4a476eea9
555 # Do not allow a database connection encoding change unless *all*
556 # attached databases are empty.
559 do_execsql_test enc2-11.10 {
560 PRAGMA encoding=UTF8;
561 CREATE TEMP TABLE t1(x);
562 INSERT INTO t1 VALUES('this is a test');
563 PRAGMA encoding=UTF16;