Followup to r29625: fix getopt tests.
[svn.git] / subversion / tests / libsvn_subr / utf-test.c
blob4ec061f563ec95d5718453d6a4aee9d8758a8057
1 /*
2 * utf-test.c -- test the utf functions
4 * ====================================================================
5 * Copyright (c) 2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
19 #include "../svn_test.h"
20 #include "../../libsvn_subr/utf_impl.h"
21 #include "svn_utf.h"
22 #include "svn_pools.h"
24 /* Random number seed. Yes, it's global, just pretend you can't see it. */
25 static apr_uint32_t diff_diff3_seed;
27 /* Return the value of the current random number seed, initializing it if
28 necessary */
29 static apr_uint32_t
30 seed_val(void)
32 static svn_boolean_t first = TRUE;
34 if (first)
36 diff_diff3_seed = (apr_uint32_t) apr_time_now();
37 first = FALSE;
40 return diff_diff3_seed;
43 /* Return a random number N such that MIN_VAL <= N <= MAX_VAL */
44 static apr_uint32_t
45 range_rand(apr_uint32_t min_val,
46 apr_uint32_t max_val)
48 apr_uint64_t diff = max_val - min_val;
49 apr_uint64_t val = diff * svn_test_rand(&diff_diff3_seed);
50 val /= 0xffffffff;
51 return min_val + (apr_uint32_t) val;
54 /* Explicit tests of various valid/invalid sequences */
55 static svn_error_t *
56 utf_validate(const char **msg,
57 svn_boolean_t msg_only,
58 svn_test_opts_t *opts,
59 apr_pool_t *pool)
61 struct data {
62 svn_boolean_t valid;
63 char string[20];
64 } tests[] = {
65 {TRUE, {'a', 'b', '\0'}},
66 {FALSE, {'a', 'b', '\x80', '\0'}},
68 {FALSE, {'a', 'b', '\xC0', '\0'}},
69 {FALSE, {'a', 'b', '\xC0', '\x81', 'x', 'y', '\0'}},
71 {TRUE, {'a', 'b', '\xC5', '\x81', 'x', 'y', '\0'}},
72 {FALSE, {'a', 'b', '\xC5', '\xC0', 'x', 'y', '\0'}},
74 {FALSE, {'a', 'b', '\xE0', '\0'}},
75 {FALSE, {'a', 'b', '\xE0', 'x', 'y', '\0'}},
76 {FALSE, {'a', 'b', '\xE0', '\xA0', '\0'}},
77 {FALSE, {'a', 'b', '\xE0', '\xA0', 'x', 'y', '\0'}},
78 {TRUE, {'a', 'b', '\xE0', '\xA0', '\x81', 'x', 'y', '\0'}},
79 {FALSE, {'a', 'b', '\xE0', '\x9F', '\x81', 'x', 'y', '\0'}},
80 {FALSE, {'a', 'b', '\xE0', '\xCF', '\x81', 'x', 'y', '\0'}},
82 {FALSE, {'a', 'b', '\xE5', '\0'}},
83 {FALSE, {'a', 'b', '\xE5', 'x', 'y', '\0'}},
84 {FALSE, {'a', 'b', '\xE5', '\x81', '\0'}},
85 {FALSE, {'a', 'b', '\xE5', '\x81', 'x', 'y', '\0'}},
86 {TRUE, {'a', 'b', '\xE5', '\x81', '\x81', 'x', 'y', '\0'}},
87 {FALSE, {'a', 'b', '\xE5', '\xE1', '\x81', 'x', 'y', '\0'}},
88 {FALSE, {'a', 'b', '\xE5', '\x81', '\xE1', 'x', 'y', '\0'}},
90 {FALSE, {'a', 'b', '\xED', '\0'}},
91 {FALSE, {'a', 'b', '\xED', 'x', 'y', '\0'}},
92 {FALSE, {'a', 'b', '\xED', '\x81', '\0'}},
93 {FALSE, {'a', 'b', '\xED', '\x81', 'x', 'y', '\0'}},
94 {TRUE, {'a', 'b', '\xED', '\x81', '\x81', 'x', 'y', '\0'}},
95 {FALSE, {'a', 'b', '\xED', '\xA0', '\x81', 'x', 'y', '\0'}},
96 {FALSE, {'a', 'b', '\xED', '\x81', '\xC1', 'x', 'y', '\0'}},
98 {FALSE, {'a', 'b', '\xEE', '\0'}},
99 {FALSE, {'a', 'b', '\xEE', 'x', 'y', '\0'}},
100 {FALSE, {'a', 'b', '\xEE', '\x81', '\0'}},
101 {FALSE, {'a', 'b', '\xEE', '\x81', 'x', 'y', '\0'}},
102 {TRUE, {'a', 'b', '\xEE', '\x81', '\x81', 'x', 'y', '\0'}},
103 {TRUE, {'a', 'b', '\xEE', '\xA0', '\x81', 'x', 'y', '\0'}},
104 {FALSE, {'a', 'b', '\xEE', '\xC0', '\x81', 'x', 'y', '\0'}},
105 {FALSE, {'a', 'b', '\xEE', '\x81', '\xC1', 'x', 'y', '\0'}},
107 {FALSE, {'a', 'b', '\xF0', '\0'}},
108 {FALSE, {'a', 'b', '\xF0', 'x', 'y', '\0'}},
109 {FALSE, {'a', 'b', '\xF0', '\x91', '\0'}},
110 {FALSE, {'a', 'b', '\xF0', '\x91', 'x', 'y', '\0'}},
111 {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', '\0'}},
112 {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', 'x', 'y', '\0'}},
113 {TRUE, {'a', 'b', '\xF0', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
114 {FALSE, {'a', 'b', '\xF0', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
115 {FALSE, {'a', 'b', '\xF0', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
116 {FALSE, {'a', 'b', '\xF0', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
117 {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
119 {FALSE, {'a', 'b', '\xF2', 'x', 'y', '\0'}},
120 {FALSE, {'a', 'b', '\xF2', '\x91', 'x', 'y', '\0'}},
121 {FALSE, {'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}},
122 {TRUE, {'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
123 {TRUE, {'a', 'b', '\xF2', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
124 {FALSE, {'a', 'b', '\xF2', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
125 {FALSE, {'a', 'b', '\xF2', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
126 {FALSE, {'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
128 {FALSE, {'a', 'b', '\xF4', 'x', 'y', '\0'}},
129 {FALSE, {'a', 'b', '\xF4', '\x91', 'x', 'y', '\0'}},
130 {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', 'x', 'y', '\0'}},
131 {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
132 {TRUE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', '\0'}},
133 {FALSE, {'a', 'b', '\xF4', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}},
134 {FALSE, {'a', 'b', '\xF4', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}},
135 {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
137 {FALSE, {'a', 'b', '\xF5', 'x', 'y', '\0'}},
138 {FALSE, {'a', 'b', '\xF5', '\x81', 'x', 'y', '\0'}},
140 {TRUE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
141 'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}},
142 {FALSE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
143 'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}},
144 {FALSE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y',
145 'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}},
147 {-1},
149 int i = 0;
151 *msg = "test is_valid/last_valid";
153 if (msg_only)
154 return SVN_NO_ERROR;
157 while (tests[i].valid != -1)
159 const char *last = svn_utf__last_valid(tests[i].string,
160 strlen(tests[i].string));
161 apr_size_t len = strlen(tests[i].string);
163 if ((svn_utf__cstring_is_valid(tests[i].string) != tests[i].valid)
165 (svn_utf__is_valid(tests[i].string, len) != tests[i].valid))
166 return svn_error_createf
167 (SVN_ERR_TEST_FAILED, NULL, "is_valid test %d failed", i);
169 if (!svn_utf__is_valid(tests[i].string, last - tests[i].string)
171 (tests[i].valid && *last))
172 return svn_error_createf
173 (SVN_ERR_TEST_FAILED, NULL, "last_valid test %d failed", i);
175 ++i;
178 return SVN_NO_ERROR;
181 /* Compare the two different implementations using random data. */
182 static svn_error_t *
183 utf_validate2(const char **msg,
184 svn_boolean_t msg_only,
185 svn_test_opts_t *opts,
186 apr_pool_t *pool)
188 int i;
190 *msg = apr_psprintf(pool,
191 "test last_valid/last_valid2 (seed:%u)", seed_val());
193 if (msg_only)
194 return SVN_NO_ERROR;
196 /* We want enough iterations so that most runs get both valid and invalid
197 strings. We also want enough iterations such that a deliberate error
198 in one of the implementations will trigger a failure. By experiment
199 the second requirement requires a much larger number of iterations
200 that the first. */
201 for (i = 0; i < 100000; ++i)
203 unsigned int j;
204 char str[64];
205 apr_size_t len;
207 /* A random string; experiment shows that it's occasionally (less
208 than 1%) valid but usually invalid. */
209 for (j = 0; j < sizeof(str) - 1; ++j)
210 str[j] = range_rand(0, 255);
211 str[sizeof(str) - 1] = 0;
212 len = strlen(str);
214 if (svn_utf__last_valid(str, len) != svn_utf__last_valid2(str, len))
216 /* Duplicate calls for easy debugging */
217 svn_utf__last_valid(str, len);
218 svn_utf__last_valid2(str, len);
219 return svn_error_createf
220 (SVN_ERR_TEST_FAILED, NULL, "is_valid2 test %d failed", i);
224 return SVN_NO_ERROR;
227 /* Test conversion from different codepages to utf8. */
228 static svn_error_t *
229 test_utf_cstring_to_utf8_ex2(const char **msg,
230 svn_boolean_t msg_only,
231 svn_test_opts_t *opts,
232 apr_pool_t *pool)
234 apr_size_t i;
235 apr_pool_t *subpool = svn_pool_create(pool);
237 struct data {
238 const char *string;
239 const char *expected_result;
240 const char *from_page;
241 } tests[] = {
242 {"ascii text\n", "ascii text\n", "unexistant-page"},
243 {"Edelwei\xdf", "Edelwei\xc3\x9f", "ISO-8859-1"}
246 *msg = "test svn_utf_cstring_to_utf8_ex2";
248 if (msg_only)
249 return SVN_NO_ERROR;
251 for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++)
253 const char *dest;
255 svn_pool_clear(subpool);
257 SVN_ERR(svn_utf_cstring_to_utf8_ex2(&dest, tests[i].string,
258 tests[i].from_page, pool));
260 if (strcmp(dest, tests[i].expected_result))
262 return svn_error_createf
263 (SVN_ERR_TEST_FAILED, NULL,
264 "svn_utf_cstring_to_utf8_ex2 ('%s', '%s') returned ('%s') "
265 "instead of ('%s')",
266 tests[i].string, tests[i].from_page,
267 dest,
268 tests[i].expected_result);
271 svn_pool_destroy(subpool);
272 return SVN_NO_ERROR;
275 /* Test conversion to different codepages from utf8. */
276 static svn_error_t *
277 test_utf_cstring_from_utf8_ex2(const char **msg,
278 svn_boolean_t msg_only,
279 svn_test_opts_t *opts,
280 apr_pool_t *pool)
282 apr_size_t i;
283 apr_pool_t *subpool = svn_pool_create(pool);
285 struct data {
286 const char *string;
287 const char *expected_result;
288 const char *to_page;
289 } tests[] = {
290 {"ascii text\n", "ascii text\n", "unexistant-page"},
291 {"Edelwei\xc3\x9f", "Edelwei\xdf", "ISO-8859-1"}
294 *msg = "test svn_utf_cstring_from_utf8_ex2";
296 if (msg_only)
297 return SVN_NO_ERROR;
299 for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++)
301 const char *dest;
303 svn_pool_clear(subpool);
305 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&dest, tests[i].string,
306 tests[i].to_page, pool));
308 if (strcmp(dest, tests[i].expected_result))
310 return svn_error_createf
311 (SVN_ERR_TEST_FAILED, NULL,
312 "svn_utf_cstring_from_utf8_ex2 ('%s', '%s') returned ('%s') "
313 "instead of ('%s')",
314 tests[i].string, tests[i].to_page,
315 dest,
316 tests[i].expected_result);
319 svn_pool_destroy(subpool);
320 return SVN_NO_ERROR;
324 /* The test table. */
326 struct svn_test_descriptor_t test_funcs[] =
328 SVN_TEST_NULL,
329 SVN_TEST_PASS(utf_validate),
330 SVN_TEST_PASS(utf_validate2),
331 SVN_TEST_PASS(test_utf_cstring_to_utf8_ex2),
332 SVN_TEST_PASS(test_utf_cstring_from_utf8_ex2),
333 SVN_TEST_NULL