2 * utf-test.c -- test the utf functions
4 * ====================================================================
5 * Copyright (c) 2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
19 #include "../svn_test.h"
20 #include "../../libsvn_subr/utf_impl.h"
22 #include "svn_pools.h"
24 /* Random number seed. Yes, it's global, just pretend you can't see it. */
25 static apr_uint32_t diff_diff3_seed
;
27 /* Return the value of the current random number seed, initializing it if
32 static svn_boolean_t first
= TRUE
;
36 diff_diff3_seed
= (apr_uint32_t
) apr_time_now();
40 return diff_diff3_seed
;
43 /* Return a random number N such that MIN_VAL <= N <= MAX_VAL */
45 range_rand(apr_uint32_t min_val
,
48 apr_uint64_t diff
= max_val
- min_val
;
49 apr_uint64_t val
= diff
* svn_test_rand(&diff_diff3_seed
);
51 return min_val
+ (apr_uint32_t
) val
;
54 /* Explicit tests of various valid/invalid sequences */
56 utf_validate(const char **msg
,
57 svn_boolean_t msg_only
,
58 svn_test_opts_t
*opts
,
65 {TRUE
, {'a', 'b', '\0'}},
66 {FALSE
, {'a', 'b', '\x80', '\0'}},
68 {FALSE
, {'a', 'b', '\xC0', '\0'}},
69 {FALSE
, {'a', 'b', '\xC0', '\x81', 'x', 'y', '\0'}},
71 {TRUE
, {'a', 'b', '\xC5', '\x81', 'x', 'y', '\0'}},
72 {FALSE
, {'a', 'b', '\xC5', '\xC0', 'x', 'y', '\0'}},
74 {FALSE
, {'a', 'b', '\xE0', '\0'}},
75 {FALSE
, {'a', 'b', '\xE0', 'x', 'y', '\0'}},
76 {FALSE
, {'a', 'b', '\xE0', '\xA0', '\0'}},
77 {FALSE
, {'a', 'b', '\xE0', '\xA0', 'x', 'y', '\0'}},
78 {TRUE
, {'a', 'b', '\xE0', '\xA0', '\x81', 'x', 'y', '\0'}},
79 {FALSE
, {'a', 'b', '\xE0', '\x9F', '\x81', 'x', 'y', '\0'}},
80 {FALSE
, {'a', 'b', '\xE0', '\xCF', '\x81', 'x', 'y', '\0'}},
82 {FALSE
, {'a', 'b', '\xE5', '\0'}},
83 {FALSE
, {'a', 'b', '\xE5', 'x', 'y', '\0'}},
84 {FALSE
, {'a', 'b', '\xE5', '\x81', '\0'}},
85 {FALSE
, {'a', 'b', '\xE5', '\x81', 'x', 'y', '\0'}},
86 {TRUE
, {'a', 'b', '\xE5', '\x81', '\x81', 'x', 'y', '\0'}},
87 {FALSE
, {'a', 'b', '\xE5', '\xE1', '\x81', 'x', 'y', '\0'}},
88 {FALSE
, {'a', 'b', '\xE5', '\x81', '\xE1', 'x', 'y', '\0'}},
90 {FALSE
, {'a', 'b', '\xED', '\0'}},
91 {FALSE
, {'a', 'b', '\xED', 'x', 'y', '\0'}},
92 {FALSE
, {'a', 'b', '\xED', '\x81', '\0'}},
93 {FALSE
, {'a', 'b', '\xED', '\x81', 'x', 'y', '\0'}},
94 {TRUE
, {'a', 'b', '\xED', '\x81', '\x81', 'x', 'y', '\0'}},
95 {FALSE
, {'a', 'b', '\xED', '\xA0', '\x81', 'x', 'y', '\0'}},
96 {FALSE
, {'a', 'b', '\xED', '\x81', '\xC1', 'x', 'y', '\0'}},
98 {FALSE
, {'a', 'b', '\xEE', '\0'}},
99 {FALSE
, {'a', 'b', '\xEE', 'x', 'y', '\0'}},
100 {FALSE
, {'a', 'b', '\xEE', '\x81', '\0'}},
101 {FALSE
, {'a', 'b', '\xEE', '\x81', 'x', 'y', '\0'}},
102 {TRUE
, {'a', 'b', '\xEE', '\x81', '\x81', 'x', 'y', '\0'}},
103 {TRUE
, {'a', 'b', '\xEE', '\xA0', '\x81', 'x', 'y', '\0'}},
104 {FALSE
, {'a', 'b', '\xEE', '\xC0', '\x81', 'x', 'y', '\0'}},
105 {FALSE
, {'a', 'b', '\xEE', '\x81', '\xC1', 'x', 'y', '\0'}},
107 {FALSE
, {'a', 'b', '\xF0', '\0'}},
108 {FALSE
, {'a', 'b', '\xF0', 'x', 'y', '\0'}},
109 {FALSE
, {'a', 'b', '\xF0', '\x91', '\0'}},
110 {FALSE
, {'a', 'b', '\xF0', '\x91', 'x', 'y', '\0'}},
111 {FALSE
, {'a', 'b', '\xF0', '\x91', '\x81', '\0'}},
112 {FALSE
, {'a', 'b', '\xF0', '\x91', '\x81', 'x', 'y', '\0'}},
113 {TRUE
, {'a', 'b', '\xF0', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
114 {FALSE
, {'a', 'b', '\xF0', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
115 {FALSE
, {'a', 'b', '\xF0', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
116 {FALSE
, {'a', 'b', '\xF0', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
117 {FALSE
, {'a', 'b', '\xF0', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
119 {FALSE
, {'a', 'b', '\xF2', 'x', 'y', '\0'}},
120 {FALSE
, {'a', 'b', '\xF2', '\x91', 'x', 'y', '\0'}},
121 {FALSE
, {'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}},
122 {TRUE
, {'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
123 {TRUE
, {'a', 'b', '\xF2', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
124 {FALSE
, {'a', 'b', '\xF2', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
125 {FALSE
, {'a', 'b', '\xF2', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
126 {FALSE
, {'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
128 {FALSE
, {'a', 'b', '\xF4', 'x', 'y', '\0'}},
129 {FALSE
, {'a', 'b', '\xF4', '\x91', 'x', 'y', '\0'}},
130 {FALSE
, {'a', 'b', '\xF4', '\x91', '\x81', 'x', 'y', '\0'}},
131 {FALSE
, {'a', 'b', '\xF4', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
132 {TRUE
, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
133 {FALSE
, {'a', 'b', '\xF4', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
134 {FALSE
, {'a', 'b', '\xF4', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
135 {FALSE
, {'a', 'b', '\xF4', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
137 {FALSE
, {'a', 'b', '\xF5', 'x', 'y', '\0'}},
138 {FALSE
, {'a', 'b', '\xF5', '\x81', 'x', 'y', '\0'}},
140 {TRUE
, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
141 'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
142 {FALSE
, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
143 'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
144 {FALSE
, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
145 'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}},
151 *msg
= "test is_valid/last_valid";
157 while (tests
[i
].valid
!= -1)
159 const char *last
= svn_utf__last_valid(tests
[i
].string
,
160 strlen(tests
[i
].string
));
161 apr_size_t len
= strlen(tests
[i
].string
);
163 if ((svn_utf__cstring_is_valid(tests
[i
].string
) != tests
[i
].valid
)
165 (svn_utf__is_valid(tests
[i
].string
, len
) != tests
[i
].valid
))
166 return svn_error_createf
167 (SVN_ERR_TEST_FAILED
, NULL
, "is_valid test %d failed", i
);
169 if (!svn_utf__is_valid(tests
[i
].string
, last
- tests
[i
].string
)
171 (tests
[i
].valid
&& *last
))
172 return svn_error_createf
173 (SVN_ERR_TEST_FAILED
, NULL
, "last_valid test %d failed", i
);
181 /* Compare the two different implementations using random data. */
183 utf_validate2(const char **msg
,
184 svn_boolean_t msg_only
,
185 svn_test_opts_t
*opts
,
190 *msg
= apr_psprintf(pool
,
191 "test last_valid/last_valid2 (seed:%u)", seed_val());
196 /* We want enough iterations so that most runs get both valid and invalid
197 strings. We also want enough iterations such that a deliberate error
198 in one of the implementations will trigger a failure. By experiment
199 the second requirement requires a much larger number of iterations
201 for (i
= 0; i
< 100000; ++i
)
207 /* A random string; experiment shows that it's occasionally (less
208 than 1%) valid but usually invalid. */
209 for (j
= 0; j
< sizeof(str
) - 1; ++j
)
210 str
[j
] = range_rand(0, 255);
211 str
[sizeof(str
) - 1] = 0;
214 if (svn_utf__last_valid(str
, len
) != svn_utf__last_valid2(str
, len
))
216 /* Duplicate calls for easy debugging */
217 svn_utf__last_valid(str
, len
);
218 svn_utf__last_valid2(str
, len
);
219 return svn_error_createf
220 (SVN_ERR_TEST_FAILED
, NULL
, "is_valid2 test %d failed", i
);
227 /* Test conversion from different codepages to utf8. */
229 test_utf_cstring_to_utf8_ex2(const char **msg
,
230 svn_boolean_t msg_only
,
231 svn_test_opts_t
*opts
,
235 apr_pool_t
*subpool
= svn_pool_create(pool
);
239 const char *expected_result
;
240 const char *from_page
;
242 {"ascii text\n", "ascii text\n", "unexistant-page"},
243 {"Edelwei\xdf", "Edelwei\xc3\x9f", "ISO-8859-1"}
246 *msg
= "test svn_utf_cstring_to_utf8_ex2";
251 for (i
= 0; i
< sizeof(tests
) / sizeof(tests
[0]); i
++)
255 svn_pool_clear(subpool
);
257 SVN_ERR(svn_utf_cstring_to_utf8_ex2(&dest
, tests
[i
].string
,
258 tests
[i
].from_page
, pool
));
260 if (strcmp(dest
, tests
[i
].expected_result
))
262 return svn_error_createf
263 (SVN_ERR_TEST_FAILED
, NULL
,
264 "svn_utf_cstring_to_utf8_ex2 ('%s', '%s') returned ('%s') "
266 tests
[i
].string
, tests
[i
].from_page
,
268 tests
[i
].expected_result
);
271 svn_pool_destroy(subpool
);
275 /* Test conversion to different codepages from utf8. */
277 test_utf_cstring_from_utf8_ex2(const char **msg
,
278 svn_boolean_t msg_only
,
279 svn_test_opts_t
*opts
,
283 apr_pool_t
*subpool
= svn_pool_create(pool
);
287 const char *expected_result
;
290 {"ascii text\n", "ascii text\n", "unexistant-page"},
291 {"Edelwei\xc3\x9f", "Edelwei\xdf", "ISO-8859-1"}
294 *msg
= "test svn_utf_cstring_from_utf8_ex2";
299 for (i
= 0; i
< sizeof(tests
) / sizeof(tests
[0]); i
++)
303 svn_pool_clear(subpool
);
305 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&dest
, tests
[i
].string
,
306 tests
[i
].to_page
, pool
));
308 if (strcmp(dest
, tests
[i
].expected_result
))
310 return svn_error_createf
311 (SVN_ERR_TEST_FAILED
, NULL
,
312 "svn_utf_cstring_from_utf8_ex2 ('%s', '%s') returned ('%s') "
314 tests
[i
].string
, tests
[i
].to_page
,
316 tests
[i
].expected_result
);
319 svn_pool_destroy(subpool
);
324 /* The test table. */
326 struct svn_test_descriptor_t test_funcs
[] =
329 SVN_TEST_PASS(utf_validate
),
330 SVN_TEST_PASS(utf_validate2
),
331 SVN_TEST_PASS(test_utf_cstring_to_utf8_ex2
),
332 SVN_TEST_PASS(test_utf_cstring_from_utf8_ex2
),