1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_locales.h>
22 #include <rtl/uri.hxx>
24 #include <cppunit/TestFixture.h>
25 #include <cppunit/extensions/HelperMacros.h>
31 struct Test
: public CppUnit::TestFixture
{
34 CPPUNIT_TEST_SUITE(Test
);
35 CPPUNIT_TEST(test_Uri
);
36 CPPUNIT_TEST_SUITE_END();
39 void Test::test_Uri() {
40 rtl_UriCharClass
const eFirstCharClass
= rtl_UriCharClassNone
;
41 rtl_UriCharClass
const eLastCharClass
= rtl_UriCharClassUnoParamValue
;
46 // Check that all characters map back to themselves when encoded/decoded:
49 RTL_CONSTASCII_USTRINGPARAM(
50 "\x00\x01\x02\x03\x04\x05\x06\x07"
51 "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
52 "\x10\x11\x12\x13\x14\x15\x16\x17"
53 "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
54 "\x20\x21\x22\x23\x24\x25\x26\x27"
55 "\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
56 "\x30\x31\x32\x33\x34\x35\x36\x37"
57 "\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
58 "\x40\x41\x42\x43\x44\x45\x46\x47"
59 "\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
60 "\x50\x51\x52\x53\x54\x55\x56\x57"
61 "\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
62 "\x60\x61\x62\x63\x64\x65\x66\x67"
63 "\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
64 "\x70\x71\x72\x73\x74\x75\x76\x77"
65 "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"));
67 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
68 eCharClass
<= eLastCharClass
;
69 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
71 CPPUNIT_ASSERT_EQUAL_MESSAGE(
76 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
77 RTL_TEXTENCODING_ISO_8859_1
),
78 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
));
79 CPPUNIT_ASSERT_EQUAL_MESSAGE(
84 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
85 RTL_TEXTENCODING_ISO_8859_1
),
86 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
));
87 CPPUNIT_ASSERT_EQUAL_MESSAGE(
92 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
93 RTL_TEXTENCODING_ISO_8859_1
),
94 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
95 CPPUNIT_ASSERT_EQUAL_MESSAGE(
100 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
101 RTL_TEXTENCODING_ISO_8859_1
),
102 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
103 CPPUNIT_ASSERT_EQUAL_MESSAGE(
108 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
109 RTL_TEXTENCODING_ISO_8859_1
),
110 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
111 CPPUNIT_ASSERT_EQUAL_MESSAGE(
116 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
117 RTL_TEXTENCODING_ISO_8859_1
),
118 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
122 ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
123 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
124 "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
125 "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
126 "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
127 "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
128 "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
129 "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
130 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
131 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
132 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
133 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
134 "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
135 "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
136 "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
137 "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"),
138 256, RTL_TEXTENCODING_ISO_8859_1
);
140 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
141 eCharClass
<= eLastCharClass
;
142 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
144 CPPUNIT_ASSERT_EQUAL_MESSAGE(
149 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
150 RTL_TEXTENCODING_ISO_8859_1
),
151 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
152 CPPUNIT_ASSERT_EQUAL_MESSAGE(
157 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
158 RTL_TEXTENCODING_ISO_8859_1
),
159 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
160 CPPUNIT_ASSERT_EQUAL_MESSAGE(
165 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
166 RTL_TEXTENCODING_UTF8
),
167 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
168 CPPUNIT_ASSERT_EQUAL_MESSAGE(
173 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
174 RTL_TEXTENCODING_UTF8
),
175 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
178 // Check surrogate handling:
180 aText1
= u
"\xD800" // %ED%A0%80
181 u
"\U000103FF" // 0xD800,0xDFFF -> %F0%90%8F%BF
182 u
"\xDFFF" // %ED%BF%BF
184 aText2
= "%ED%A0%80" "%F0%90%8F%BF" "%ED%BF%BF" "A";
185 CPPUNIT_ASSERT_EQUAL_MESSAGE(
189 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
190 RTL_TEXTENCODING_UTF8
));
191 CPPUNIT_ASSERT_EQUAL_MESSAGE(
195 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeKeepEscapes
,
196 RTL_TEXTENCODING_UTF8
));
197 CPPUNIT_ASSERT_EQUAL_MESSAGE(
201 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
202 RTL_TEXTENCODING_UTF8
));
204 aText1
= "%ed%a0%80" "%f0%90%8f%bf" "%ed%bf%bf" "A";
205 aText2
= u
"%ED%A0%80" u
"\U000103FF" u
"%ED%BF%BF" u
"A"_ustr
;
206 CPPUNIT_ASSERT_EQUAL_MESSAGE(
209 rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
));
210 CPPUNIT_ASSERT_EQUAL_MESSAGE(
214 aText1
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
216 // Check UTF-8 handling:
218 aText1
= "%E0%83%BF";
219 // \U+00FF encoded with three instead of two bytes
221 CPPUNIT_ASSERT_EQUAL_MESSAGE(
225 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
226 RTL_TEXTENCODING_UTF8
));
228 aText1
= "%EF%BF%BF";
229 // \U+FFFF is no legal character
231 CPPUNIT_ASSERT_EQUAL_MESSAGE(
235 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
236 RTL_TEXTENCODING_UTF8
));
238 // Check IURI handling:
240 aText1
= "%30%C3%BF";
241 aText2
= u
"%30\u00FF"_ustr
;
242 CPPUNIT_ASSERT_EQUAL_MESSAGE(
245 rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
));
247 // Check modified rtl_UriCharClassUnoParamValue (removed '[' and ']'):
250 aText2
= "%5B%5D%5B%5D";
251 CPPUNIT_ASSERT_EQUAL_MESSAGE(
255 aText1
, rtl_UriCharClassUnoParamValue
, rtl_UriEncodeCheckEscapes
,
256 RTL_TEXTENCODING_ASCII_US
));
258 // Check Uri::convertRelToAbs:
266 static RelToAbsTest
const aRelToAbsTest
[]
267 = { // The following tests are taken from RFC 3986:
268 { "http://a/b/c/d;p?q", "g:h", "g:h" },
269 { "http://a/b/c/d;p?q", "g", "http://a/b/c/g" },
270 { "http://a/b/c/d;p?q", "./g", "http://a/b/c/g" },
271 { "http://a/b/c/d;p?q", "g/", "http://a/b/c/g/" },
272 { "http://a/b/c/d;p?q", "/g", "http://a/g" },
273 { "http://a/b/c/d;p?q", "//g", "http://g" },
274 { "http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y" },
275 { "http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y" },
276 { "http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s" },
277 { "http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s" },
278 { "http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s" },
279 { "http://a/b/c/d;p?q", ";x", "http://a/b/c/;x" },
280 { "http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x" },
281 { "http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s" },
282 { "http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q" },
283 { "http://a/b/c/d;p?q", ".", "http://a/b/c/" },
284 { "http://a/b/c/d;p?q", "./", "http://a/b/c/" },
285 { "http://a/b/c/d;p?q", "..", "http://a/b/" },
286 { "http://a/b/c/d;p?q", "../", "http://a/b/" },
287 { "http://a/b/c/d;p?q", "../g", "http://a/b/g" },
288 { "http://a/b/c/d;p?q", "../..", "http://a/" },
289 { "http://a/b/c/d;p?q", "../../", "http://a/" },
290 { "http://a/b/c/d;p?q", "../../g", "http://a/g" },
291 { "http://a/b/c/d;p?q", "../../../g", "http://a/g" },
292 { "http://a/b/c/d;p?q", "../../../../g", "http://a/g" },
293 { "http://a/b/c/d;p?q", "/./g", "http://a/g" },
294 { "http://a/b/c/d;p?q", "/../g", "http://a/g" },
295 { "http://a/b/c/d;p?q", "g.", "http://a/b/c/g." },
296 { "http://a/b/c/d;p?q", ".g", "http://a/b/c/.g" },
297 { "http://a/b/c/d;p?q", "g..", "http://a/b/c/g.." },
298 { "http://a/b/c/d;p?q", "..g", "http://a/b/c/..g" },
299 { "http://a/b/c/d;p?q", "./../g", "http://a/b/g" },
300 { "http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/" },
301 { "http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h" },
302 { "http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h" },
303 { "http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y" },
304 { "http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y" },
305 { "http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x" },
306 { "http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x" },
307 { "http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x" },
308 { "http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x" },
309 { "http://a/b/c/d;p?q", "http:g", "http:g" },
311 { "http!://a/b/c/d;p?q", "g:h", "g:h" },
312 { "http!://a/b/c/d;p?q", "g", nullptr },
313 { "http:b/c/d;p?q", "g:h", "g:h" },
314 { "http:b/c/d;p?q", "g", "http:b/c/g" },
315 { "http://a/b/../", "../c", "http://a/c" },
316 { "http://a/b/..", "../c", "http://a/c" },
317 { "http://a/./b/", ".././.././../c", "http://a/c" },
318 { "http://a", "b", "http://a/b" },
319 { "", "http://a/b/../c", "http://a/c" },
321 { "http://a/b/c", "d", "http://a/b/d" },
322 { "http://a/b/c/", "d", "http://a/b/c/d" },
323 { "http://a/b/c//", "d", "http://a/b/c//d" } };
325 for (auto const[pBase
, pRel
, pAbs
] : aRelToAbsTest
)
328 bool bMalformed
= false;
331 aAbsResult
= rtl::Uri::convertRelToAbs(OUString::createFromAscii(pBase
),
332 OUString::createFromAscii(pRel
));
334 catch (const rtl::MalformedUriException
&)
338 if (bMalformed
? pAbs
!= nullptr : (pAbs
== nullptr || !aAbsResult
.equalsAscii(pAbs
)))
340 printf("FAILED convertRelToAbs(%s, %s) -> %s != %s\n", pBase
, pRel
,
341 (bMalformed
? "<MALFORMED>"
342 : OUStringToOString(aAbsResult
, RTL_TEXTENCODING_UTF8
).getStr()),
343 (pAbs
== nullptr ? "<MALFORMED>" : pAbs
));
344 CPPUNIT_ASSERT(false);
348 // Check encode with unusual text encodings:
351 static constexpr OUStringLiteral aText1U
= u
" !\u0401\u045F";
352 aText1
= OUString(aText1U
);
353 aText2
= "%20!%A1%FF";
354 CPPUNIT_ASSERT_EQUAL_MESSAGE(
358 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
359 RTL_TEXTENCODING_ISO_8859_5
));
360 CPPUNIT_ASSERT_EQUAL_MESSAGE(
364 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
));
367 static constexpr OUStringLiteral aText1U
= u
" !\u0401\u0700\u045F";
368 aText1
= OUString(aText1U
);
369 static constexpr OUStringLiteral aText2U
=
371 aText2
= OUString(aText2U
);
372 CPPUNIT_ASSERT_EQUAL_MESSAGE(
376 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
377 RTL_TEXTENCODING_ISO_8859_5
));
378 CPPUNIT_ASSERT_EQUAL_MESSAGE(
382 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
));
384 #if WITH_LOCALE_ALL || WITH_LOCALE_zh
386 static constexpr OUStringLiteral aText1U
= u
" !\u028A\U00022513";
387 aText1
= OUString(aText1U
);
388 aText2
= "%20!%81%30%B1%33%95%39%C5%37";
389 CPPUNIT_ASSERT_EQUAL_MESSAGE(
393 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
394 RTL_TEXTENCODING_GB_18030
));
395 CPPUNIT_ASSERT_EQUAL_MESSAGE(
399 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_GB_18030
));
402 // Check strict mode:
405 static constexpr OUStringLiteral aText1U
= u
" !\u0401\u0700\u045F";
406 aText1
= OUString(aText1U
);
408 CPPUNIT_ASSERT_EQUAL_MESSAGE(
412 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrict
,
413 RTL_TEXTENCODING_ISO_8859_5
));
416 aText1
= "%20%C4%80%FF";
418 CPPUNIT_ASSERT_EQUAL_MESSAGE(
422 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_UTF8
));
424 #if WITH_LOCALE_ALL || WITH_LOCALE_zh
428 CPPUNIT_ASSERT_EQUAL_MESSAGE(
432 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
437 CPPUNIT_ASSERT_EQUAL_MESSAGE(
441 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
444 aText1
= "%81%30%B1%33";
445 static constexpr OUStringLiteral aText2U
= u
"\u028A";
446 aText2
= OUString(aText2U
);
447 CPPUNIT_ASSERT_EQUAL_MESSAGE(
451 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
455 static constexpr OUStringLiteral aText2U
= u
"\u028A";
456 aText2
= OUString(aText2U
);
457 CPPUNIT_ASSERT_EQUAL_MESSAGE(
461 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
464 // Check rtl_UriEncodeStrictKeepEscapes mode:
467 aText1
= "%%ea%c3%aa";
468 aText2
= "%25%EA%C3%AA";
469 CPPUNIT_ASSERT_EQUAL_MESSAGE(
473 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
474 RTL_TEXTENCODING_UTF8
));
477 static constexpr OUStringLiteral aText1U
= u
"\u00EA";
478 aText1
= OUString(aText1U
);
480 CPPUNIT_ASSERT_EQUAL_MESSAGE(
484 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
485 RTL_TEXTENCODING_UTF8
));
488 static constexpr OUStringLiteral aText1U
= u
" !\u0401\u0700\u045F";
489 aText1
= OUString(aText1U
);
491 CPPUNIT_ASSERT_EQUAL_MESSAGE(
495 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
496 RTL_TEXTENCODING_ISO_8859_5
));
499 CPPUNIT_ASSERT_EQUAL(
502 u
"%80"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
503 RTL_TEXTENCODING_ASCII_US
));
504 CPPUNIT_ASSERT_EQUAL(
507 u
"%80"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
508 RTL_TEXTENCODING_ISO_8859_1
));
509 CPPUNIT_ASSERT_EQUAL(
512 u
"%80"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
513 RTL_TEXTENCODING_UTF8
));
514 CPPUNIT_ASSERT_EQUAL(
517 u
"%80"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
518 RTL_TEXTENCODING_SHIFT_JIS
));
519 CPPUNIT_ASSERT_EQUAL(
522 u
"%FE"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
523 RTL_TEXTENCODING_ASCII_US
));
524 CPPUNIT_ASSERT_EQUAL(
527 u
"%FE"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
528 RTL_TEXTENCODING_ISO_8859_1
));
529 CPPUNIT_ASSERT_EQUAL(
532 u
"%FE"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
533 RTL_TEXTENCODING_UTF8
));
534 CPPUNIT_ASSERT_EQUAL(
537 u
"%FE"_ustr
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
538 RTL_TEXTENCODING_SHIFT_JIS
));
543 CPPUNIT_TEST_SUITE_REGISTRATION(Test
);
545 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */