1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include "rtl/strbuf.hxx"
22 #include "rtl/uri.hxx"
23 #include "rtl/ustrbuf.hxx"
25 #include <cppunit/TestFixture.h>
26 #include <cppunit/extensions/HelperMacros.h>
27 #include <cppunit/plugin/TestPlugIn.h>
34 struct Test
: public CppUnit::TestFixture
{
37 CPPUNIT_TEST_SUITE(Test
);
38 CPPUNIT_TEST(test_Uri
);
39 CPPUNIT_TEST_SUITE_END();
42 void Test::test_Uri() {
43 rtl_UriCharClass
const eFirstCharClass
= rtl_UriCharClassNone
;
44 rtl_UriCharClass
const eLastCharClass
= rtl_UriCharClassUnoParamValue
;
46 rtl::OUStringBuffer aBuffer
;
50 // Check that all characters map back to themselves when encoded/decoded:
52 aText1
= rtl::OUString(
53 RTL_CONSTASCII_USTRINGPARAM(
54 "\x00\x01\x02\x03\x04\x05\x06\x07"
55 "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
56 "\x10\x11\x12\x13\x14\x15\x16\x17"
57 "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
58 "\x20\x21\x22\x23\x24\x25\x26\x27"
59 "\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
60 "\x30\x31\x32\x33\x34\x35\x36\x37"
61 "\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
62 "\x40\x41\x42\x43\x44\x45\x46\x47"
63 "\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
64 "\x50\x51\x52\x53\x54\x55\x56\x57"
65 "\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
66 "\x60\x61\x62\x63\x64\x65\x66\x67"
67 "\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
68 "\x70\x71\x72\x73\x74\x75\x76\x77"
69 "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"));
71 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
72 eCharClass
<= eLastCharClass
;
73 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
75 CPPUNIT_ASSERT_MESSAGE(
79 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
80 RTL_TEXTENCODING_ISO_8859_1
),
81 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
)
83 CPPUNIT_ASSERT_MESSAGE(
87 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
88 RTL_TEXTENCODING_ISO_8859_1
),
89 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
)
91 CPPUNIT_ASSERT_MESSAGE(
95 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
96 RTL_TEXTENCODING_ISO_8859_1
),
97 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
)
99 CPPUNIT_ASSERT_MESSAGE(
103 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
104 RTL_TEXTENCODING_ISO_8859_1
),
105 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
)
107 CPPUNIT_ASSERT_MESSAGE(
111 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
112 RTL_TEXTENCODING_ISO_8859_1
),
113 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
)
115 CPPUNIT_ASSERT_MESSAGE(
119 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
120 RTL_TEXTENCODING_ISO_8859_1
),
121 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
)
125 aText1
= rtl::OUString(
126 RTL_CONSTASCII_USTRINGPARAM(
127 "\x00\x01\x02\x03\x04\x05\x06\x07"
128 "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
129 "\x10\x11\x12\x13\x14\x15\x16\x17"
130 "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
131 "\x20\x21\x22\x23\x24\x25\x26\x27"
132 "\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
133 "\x30\x31\x32\x33\x34\x35\x36\x37"
134 "\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
135 "\x40\x41\x42\x43\x44\x45\x46\x47"
136 "\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
137 "\x50\x51\x52\x53\x54\x55\x56\x57"
138 "\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
139 "\x60\x61\x62\x63\x64\x65\x66\x67"
140 "\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
141 "\x70\x71\x72\x73\x74\x75\x76\x77"
142 "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
143 "\x80\x81\x82\x83\x84\x85\x86\x87"
144 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
145 "\x90\x91\x92\x93\x94\x95\x96\x97"
146 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
147 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
148 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
149 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
150 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
151 "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7"
152 "\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
153 "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7"
154 "\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
155 "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7"
156 "\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
157 "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7"
158 "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"));
160 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
161 eCharClass
<= eLastCharClass
;
162 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
164 CPPUNIT_ASSERT_MESSAGE(
168 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
169 RTL_TEXTENCODING_ISO_8859_1
),
170 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
)
172 CPPUNIT_ASSERT_MESSAGE(
176 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
177 RTL_TEXTENCODING_ISO_8859_1
),
178 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
)
180 CPPUNIT_ASSERT_MESSAGE(
184 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
185 RTL_TEXTENCODING_UTF8
),
186 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
)
188 CPPUNIT_ASSERT_MESSAGE(
192 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
193 RTL_TEXTENCODING_UTF8
),
194 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
)
198 // Check surrogate handling:
200 aBuffer
.append(static_cast< sal_Unicode
>(0xD800)); // %ED%A0%80
201 aBuffer
.append(static_cast< sal_Unicode
>(0xD800)); // %F0%90%8F%BF
202 aBuffer
.append(static_cast< sal_Unicode
>(0xDFFF));
203 aBuffer
.append(static_cast< sal_Unicode
>(0xDFFF)); // %ED%BF%BF
204 aBuffer
.append(static_cast< sal_Unicode
>('A')); // A
205 aText1
= aBuffer
.makeStringAndClear();
206 aText2
= rtl::OUString(
207 "%ED%A0%80" "%F0%90%8F%BF" "%ED%BF%BF" "A");
208 CPPUNIT_ASSERT_MESSAGE(
211 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
212 RTL_TEXTENCODING_UTF8
)
214 CPPUNIT_ASSERT_MESSAGE(
217 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeKeepEscapes
,
218 RTL_TEXTENCODING_UTF8
)
220 CPPUNIT_ASSERT_MESSAGE(
223 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
224 RTL_TEXTENCODING_UTF8
)
227 aText1
= rtl::OUString(
228 "%ed%a0%80" "%f0%90%8f%bf" "%ed%bf%bf" "A");
229 aBuffer
.append("%ED%A0%80");
230 aBuffer
.append(static_cast< sal_Unicode
>(0xD800));
231 aBuffer
.append(static_cast< sal_Unicode
>(0xDFFF));
232 aBuffer
.append("%ED%BF%BF");
233 aBuffer
.append(static_cast< sal_Unicode
>('A'));
234 aText2
= aBuffer
.makeStringAndClear();
235 CPPUNIT_ASSERT_MESSAGE(
237 (rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
)
239 CPPUNIT_ASSERT_MESSAGE(
242 aText1
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
)
245 // Check UTF-8 handling:
247 aText1
= rtl::OUString("%E0%83%BF");
248 // \U+00FF encoded with three instead of two bytes
250 CPPUNIT_ASSERT_MESSAGE(
253 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
254 RTL_TEXTENCODING_UTF8
)
257 aText1
= rtl::OUString("%EF%BF%BF");
258 // \U+FFFF is no legal character
260 CPPUNIT_ASSERT_MESSAGE(
263 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
264 RTL_TEXTENCODING_UTF8
)
267 // Check IURI handling:
269 aText1
= rtl::OUString("%30%C3%BF");
270 aBuffer
.append("%30");
271 aBuffer
.append(static_cast< sal_Unicode
>(0x00FF));
272 aText2
= aBuffer
.makeStringAndClear();
273 CPPUNIT_ASSERT_MESSAGE(
275 (rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
)
278 // Check modified rtl_UriCharClassUnoParamValue (removed '[' and ']'):
280 aText1
= rtl::OUString("[]%5B%5D");
281 aText2
= rtl::OUString("%5B%5D%5B%5D");
282 CPPUNIT_ASSERT_MESSAGE(
285 aText1
, rtl_UriCharClassUnoParamValue
, rtl_UriEncodeCheckEscapes
,
286 RTL_TEXTENCODING_ASCII_US
)
289 // Check Uri::convertRelToAbs:
297 static RelToAbsTest
const aRelToAbsTest
[]
298 = { // The following tests are taken from RFC 2396:
299 { "http://a/b/c/d;p?q", "g:h", "g:h" },
300 { "http://a/b/c/d;p?q", "g", "http://a/b/c/g" },
301 { "http://a/b/c/d;p?q", "./g", "http://a/b/c/g" },
302 { "http://a/b/c/d;p?q", "g/", "http://a/b/c/g/" },
303 { "http://a/b/c/d;p?q", "/g", "http://a/g" },
304 { "http://a/b/c/d;p?q", "//g", "http://g" },
305 { "http://a/b/c/d;p?q", "?y", "http://a/b/c/?y" },
306 { "http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y" },
307 { "http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s" },
308 { "http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s" },
309 { "http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s" },
310 { "http://a/b/c/d;p?q", ";x", "http://a/b/c/;x" },
311 { "http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x" },
312 { "http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s" },
313 { "http://a/b/c/d;p?q", ".", "http://a/b/c/" },
314 { "http://a/b/c/d;p?q", "./", "http://a/b/c/" },
315 { "http://a/b/c/d;p?q", "..", "http://a/b/" },
316 { "http://a/b/c/d;p?q", "../", "http://a/b/" },
317 { "http://a/b/c/d;p?q", "../g", "http://a/b/g" },
318 { "http://a/b/c/d;p?q", "../..", "http://a/" },
319 { "http://a/b/c/d;p?q", "../../", "http://a/" },
320 { "http://a/b/c/d;p?q", "../../g", "http://a/g" },
321 { "http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q" },
322 { "http://a/b/c/d;p?q", "../../../g", "http://a/../g" },
323 { "http://a/b/c/d;p?q", "../../../../g", "http://a/../../g" },
324 { "http://a/b/c/d;p?q", "/./g", "http://a/./g" },
325 { "http://a/b/c/d;p?q", "/../g", "http://a/../g" },
326 { "http://a/b/c/d;p?q", "g.", "http://a/b/c/g." },
327 { "http://a/b/c/d;p?q", ".g", "http://a/b/c/.g" },
328 { "http://a/b/c/d;p?q", "g..", "http://a/b/c/g.." },
329 { "http://a/b/c/d;p?q", "..g", "http://a/b/c/..g" },
330 { "http://a/b/c/d;p?q", "./../g", "http://a/b/g" },
331 { "http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/" },
332 { "http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h" },
333 { "http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h" },
334 { "http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y" },
335 { "http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y" },
336 { "http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x" },
337 { "http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x" },
338 { "http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x" },
339 { "http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x" },
340 { "http://a/b/c/d;p?q", "http:g", "http:g" },
341 { "http!://a/b/c/d;p?q", "g:h", "g:h" },
342 { "http!://a/b/c/d;p?q", "g", 0 },
343 { "http:b/c/d;p?q", "g:h", "g:h" },
344 { "http:b/c/d;p?q", "g", 0 },
345 { "http://a/b/../", "../c", "http://a/b/../../c" },
346 { "http://a/b/..", "../c", "http://a/c" },
347 { "http://a/./b/", ".././.././../c", "http://a/./../../c" } };
348 for (std::size_t i
= 0; i
< sizeof aRelToAbsTest
/ sizeof (RelToAbsTest
); ++i
)
351 bool bMalformed
= false;
353 aAbs
= rtl::Uri::convertRelToAbs(
354 rtl::OUString::createFromAscii(aRelToAbsTest
[i
].pBase
),
355 rtl::OUString::createFromAscii(aRelToAbsTest
[i
].pRel
));
356 } catch (const rtl::MalformedUriException
&) {
360 ? aRelToAbsTest
[i
].pAbs
!= 0
361 : (aRelToAbsTest
[i
].pAbs
== 0
362 || !aAbs
.equalsAscii(aRelToAbsTest
[i
].pAbs
)))
365 "FAILED convertRelToAbs(%s, %s) -> %s != %s\n",
366 aRelToAbsTest
[i
].pBase
, aRelToAbsTest
[i
].pRel
,
369 : rtl::OUStringToOString(
370 aAbs
, RTL_TEXTENCODING_UTF8
).getStr()),
371 (aRelToAbsTest
[i
].pAbs
== 0
372 ? "<MALFORMED>" : aRelToAbsTest
[i
].pAbs
));
373 CPPUNIT_ASSERT(false);
377 // Check encode with unusual text encodings:
380 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x045F, 0 };
381 aText1
= rtl::OUString(aText1U
);
382 aText2
= rtl::OUString("%20!%A1%FF");
383 CPPUNIT_ASSERT_MESSAGE(
386 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
387 RTL_TEXTENCODING_ISO_8859_5
)
389 CPPUNIT_ASSERT_MESSAGE(
392 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
)
396 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
397 aText1
= rtl::OUString(aText1U
);
398 sal_Unicode
const aText2U
[] = {
399 '%', '2', '0', '!', '%', 'A', '1', 0x0700, '%', 'F', 'F', 0 };
400 aText2
= rtl::OUString(aText2U
);
401 CPPUNIT_ASSERT_MESSAGE(
404 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
405 RTL_TEXTENCODING_ISO_8859_5
)
407 CPPUNIT_ASSERT_MESSAGE(
410 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
)
414 sal_Unicode
const aText1U
[] = { ' ', '!', 0x028A, 0xD849, 0xDD13, 0 };
415 aText1
= rtl::OUString(aText1U
);
416 aText2
= rtl::OUString("%20!%81%30%B1%33%95%39%C5%37");
417 CPPUNIT_ASSERT_MESSAGE(
420 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
421 RTL_TEXTENCODING_GB_18030
)
423 CPPUNIT_ASSERT_MESSAGE(
426 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_GB_18030
)
430 // Check strict mode:
433 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
434 aText1
= rtl::OUString(aText1U
);
435 aText2
= rtl::OUString();
436 CPPUNIT_ASSERT_MESSAGE(
439 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrict
,
440 RTL_TEXTENCODING_ISO_8859_5
)
444 aText1
= rtl::OUString("%20%C4%80%FF");
445 aText2
= rtl::OUString();
446 CPPUNIT_ASSERT_MESSAGE(
449 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_UTF8
)
453 aText1
= rtl::OUString("%81 ");
454 aText2
= rtl::OUString();
455 CPPUNIT_ASSERT_MESSAGE(
458 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
)
462 aText1
= rtl::OUString("%81%20");
463 aText2
= rtl::OUString();
464 CPPUNIT_ASSERT_MESSAGE(
467 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
)
471 aText1
= rtl::OUString("%81%30%B1%33");
472 sal_Unicode
const aText2U
[] = { 0x028A, 0 };
473 aText2
= rtl::OUString(aText2U
);
474 CPPUNIT_ASSERT_MESSAGE(
477 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
)
481 aText1
= rtl::OUString("%810%B13");
482 sal_Unicode
const aText2U
[] = { 0x028A, 0 };
483 aText2
= rtl::OUString(aText2U
);
484 CPPUNIT_ASSERT_MESSAGE(
487 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
)
491 // Check rtl_UriEncodeStrictKeepEscapes mode:
494 aText1
= rtl::OUString("%%ea%c3%aa");
495 aText2
= rtl::OUString("%25%EA%C3%AA");
496 CPPUNIT_ASSERT_MESSAGE(
499 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
500 RTL_TEXTENCODING_UTF8
)
504 sal_Unicode
const aText1U
[] = { 0x00EA, 0 };
505 aText1
= rtl::OUString(aText1U
);
506 aText2
= rtl::OUString("%C3%AA");
507 CPPUNIT_ASSERT_MESSAGE(
510 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
511 RTL_TEXTENCODING_UTF8
)
515 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
516 aText1
= rtl::OUString(aText1U
);
517 aText2
= rtl::OUString();
518 CPPUNIT_ASSERT_MESSAGE(
521 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
522 RTL_TEXTENCODING_ISO_8859_5
)
529 CPPUNIT_TEST_SUITE_REGISTRATION(Test
);
532 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */