1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_locales.h>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/uri.hxx>
24 #include <rtl/ustrbuf.hxx>
26 #include <cppunit/TestFixture.h>
27 #include <cppunit/extensions/HelperMacros.h>
28 #include <cppunit/plugin/TestPlugIn.h>
35 struct Test
: public CppUnit::TestFixture
{
38 CPPUNIT_TEST_SUITE(Test
);
39 CPPUNIT_TEST(test_Uri
);
40 CPPUNIT_TEST_SUITE_END();
43 void Test::test_Uri() {
44 rtl_UriCharClass
const eFirstCharClass
= rtl_UriCharClassNone
;
45 rtl_UriCharClass
const eLastCharClass
= rtl_UriCharClassUnoParamValue
;
50 // Check that all characters map back to themselves when encoded/decoded:
53 RTL_CONSTASCII_USTRINGPARAM(
54 "\x00\x01\x02\x03\x04\x05\x06\x07"
55 "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
56 "\x10\x11\x12\x13\x14\x15\x16\x17"
57 "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
58 "\x20\x21\x22\x23\x24\x25\x26\x27"
59 "\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
60 "\x30\x31\x32\x33\x34\x35\x36\x37"
61 "\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
62 "\x40\x41\x42\x43\x44\x45\x46\x47"
63 "\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
64 "\x50\x51\x52\x53\x54\x55\x56\x57"
65 "\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
66 "\x60\x61\x62\x63\x64\x65\x66\x67"
67 "\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
68 "\x70\x71\x72\x73\x74\x75\x76\x77"
69 "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"));
71 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
72 eCharClass
<= eLastCharClass
;
73 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
75 CPPUNIT_ASSERT_EQUAL_MESSAGE(
80 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
81 RTL_TEXTENCODING_ISO_8859_1
),
82 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
));
83 CPPUNIT_ASSERT_EQUAL_MESSAGE(
88 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
89 RTL_TEXTENCODING_ISO_8859_1
),
90 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ASCII_US
));
91 CPPUNIT_ASSERT_EQUAL_MESSAGE(
96 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
97 RTL_TEXTENCODING_ISO_8859_1
),
98 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
99 CPPUNIT_ASSERT_EQUAL_MESSAGE(
104 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
105 RTL_TEXTENCODING_ISO_8859_1
),
106 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
107 CPPUNIT_ASSERT_EQUAL_MESSAGE(
112 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
113 RTL_TEXTENCODING_ISO_8859_1
),
114 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
115 CPPUNIT_ASSERT_EQUAL_MESSAGE(
120 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
121 RTL_TEXTENCODING_ISO_8859_1
),
122 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
126 ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
127 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
128 "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
129 "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
130 "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
131 "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
132 "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
133 "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
134 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
135 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
136 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
137 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
138 "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
139 "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
140 "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
141 "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"),
142 256, RTL_TEXTENCODING_ISO_8859_1
);
144 for (rtl_UriCharClass eCharClass
= eFirstCharClass
;
145 eCharClass
<= eLastCharClass
;
146 eCharClass
= static_cast< rtl_UriCharClass
>(eCharClass
+ 1))
148 CPPUNIT_ASSERT_EQUAL_MESSAGE(
153 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
154 RTL_TEXTENCODING_ISO_8859_1
),
155 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
156 CPPUNIT_ASSERT_EQUAL_MESSAGE(
161 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
162 RTL_TEXTENCODING_ISO_8859_1
),
163 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_1
));
164 CPPUNIT_ASSERT_EQUAL_MESSAGE(
169 aText1
, eCharClass
, rtl_UriEncodeKeepEscapes
,
170 RTL_TEXTENCODING_UTF8
),
171 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
172 CPPUNIT_ASSERT_EQUAL_MESSAGE(
177 aText1
, eCharClass
, rtl_UriEncodeCheckEscapes
,
178 RTL_TEXTENCODING_UTF8
),
179 rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
182 // Check surrogate handling:
184 aText1
= u
"\xD800" // %ED%A0%80
185 u
"\U000103FF" // 0xD800,0xDFFF -> %F0%90%8F%BF
186 u
"\xDFFF" // %ED%BF%BF
188 aText2
= "%ED%A0%80" "%F0%90%8F%BF" "%ED%BF%BF" "A";
189 CPPUNIT_ASSERT_EQUAL_MESSAGE(
193 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
194 RTL_TEXTENCODING_UTF8
));
195 CPPUNIT_ASSERT_EQUAL_MESSAGE(
199 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeKeepEscapes
,
200 RTL_TEXTENCODING_UTF8
));
201 CPPUNIT_ASSERT_EQUAL_MESSAGE(
205 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
206 RTL_TEXTENCODING_UTF8
));
208 aText1
= "%ed%a0%80" "%f0%90%8f%bf" "%ed%bf%bf" "A";
209 aText2
= u
"%ED%A0%80" u
"\U000103FF" u
"%ED%BF%BF" u
"A";
210 CPPUNIT_ASSERT_EQUAL_MESSAGE(
213 rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
));
214 CPPUNIT_ASSERT_EQUAL_MESSAGE(
218 aText1
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_UTF8
));
220 // Check UTF-8 handling:
222 aText1
= "%E0%83%BF";
223 // \U+00FF encoded with three instead of two bytes
225 CPPUNIT_ASSERT_EQUAL_MESSAGE(
229 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
230 RTL_TEXTENCODING_UTF8
));
232 aText1
= "%EF%BF%BF";
233 // \U+FFFF is no legal character
235 CPPUNIT_ASSERT_EQUAL_MESSAGE(
239 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeCheckEscapes
,
240 RTL_TEXTENCODING_UTF8
));
242 // Check IURI handling:
244 aText1
= "%30%C3%BF";
245 aText2
= u
"%30\u00FF";
246 CPPUNIT_ASSERT_EQUAL_MESSAGE(
249 rtl::Uri::decode(aText1
, rtl_UriDecodeToIuri
, RTL_TEXTENCODING_UTF8
));
251 // Check modified rtl_UriCharClassUnoParamValue (removed '[' and ']'):
254 aText2
= "%5B%5D%5B%5D";
255 CPPUNIT_ASSERT_EQUAL_MESSAGE(
259 aText1
, rtl_UriCharClassUnoParamValue
, rtl_UriEncodeCheckEscapes
,
260 RTL_TEXTENCODING_ASCII_US
));
262 // Check Uri::convertRelToAbs:
270 static RelToAbsTest
const aRelToAbsTest
[]
271 = { // The following tests are taken from RFC 3986:
272 { "http://a/b/c/d;p?q", "g:h", "g:h" },
273 { "http://a/b/c/d;p?q", "g", "http://a/b/c/g" },
274 { "http://a/b/c/d;p?q", "./g", "http://a/b/c/g" },
275 { "http://a/b/c/d;p?q", "g/", "http://a/b/c/g/" },
276 { "http://a/b/c/d;p?q", "/g", "http://a/g" },
277 { "http://a/b/c/d;p?q", "//g", "http://g" },
278 { "http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y" },
279 { "http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y" },
280 { "http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s" },
281 { "http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s" },
282 { "http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s" },
283 { "http://a/b/c/d;p?q", ";x", "http://a/b/c/;x" },
284 { "http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x" },
285 { "http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s" },
286 { "http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q" },
287 { "http://a/b/c/d;p?q", ".", "http://a/b/c/" },
288 { "http://a/b/c/d;p?q", "./", "http://a/b/c/" },
289 { "http://a/b/c/d;p?q", "..", "http://a/b/" },
290 { "http://a/b/c/d;p?q", "../", "http://a/b/" },
291 { "http://a/b/c/d;p?q", "../g", "http://a/b/g" },
292 { "http://a/b/c/d;p?q", "../..", "http://a/" },
293 { "http://a/b/c/d;p?q", "../../", "http://a/" },
294 { "http://a/b/c/d;p?q", "../../g", "http://a/g" },
295 { "http://a/b/c/d;p?q", "../../../g", "http://a/g" },
296 { "http://a/b/c/d;p?q", "../../../../g", "http://a/g" },
297 { "http://a/b/c/d;p?q", "/./g", "http://a/g" },
298 { "http://a/b/c/d;p?q", "/../g", "http://a/g" },
299 { "http://a/b/c/d;p?q", "g.", "http://a/b/c/g." },
300 { "http://a/b/c/d;p?q", ".g", "http://a/b/c/.g" },
301 { "http://a/b/c/d;p?q", "g..", "http://a/b/c/g.." },
302 { "http://a/b/c/d;p?q", "..g", "http://a/b/c/..g" },
303 { "http://a/b/c/d;p?q", "./../g", "http://a/b/g" },
304 { "http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/" },
305 { "http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h" },
306 { "http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h" },
307 { "http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y" },
308 { "http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y" },
309 { "http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x" },
310 { "http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x" },
311 { "http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x" },
312 { "http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x" },
313 { "http://a/b/c/d;p?q", "http:g", "http:g" },
315 { "http!://a/b/c/d;p?q", "g:h", "g:h" },
316 { "http!://a/b/c/d;p?q", "g", nullptr },
317 { "http:b/c/d;p?q", "g:h", "g:h" },
318 { "http:b/c/d;p?q", "g", "http:b/c/g" },
319 { "http://a/b/../", "../c", "http://a/c" },
320 { "http://a/b/..", "../c", "http://a/c" },
321 { "http://a/./b/", ".././.././../c", "http://a/c" },
322 { "http://a", "b", "http://a/b" },
323 { "", "http://a/b/../c", "http://a/c" },
325 { "http://a/b/c", "d", "http://a/b/d" },
326 { "http://a/b/c/", "d", "http://a/b/c/d" },
327 { "http://a/b/c//", "d", "http://a/b/c//d" } };
329 for (std::size_t i
= 0; i
< SAL_N_ELEMENTS(aRelToAbsTest
); ++i
)
332 bool bMalformed
= false;
334 aAbs
= rtl::Uri::convertRelToAbs(
335 OUString::createFromAscii(aRelToAbsTest
[i
].pBase
),
336 OUString::createFromAscii(aRelToAbsTest
[i
].pRel
));
337 } catch (const rtl::MalformedUriException
&) {
341 ? aRelToAbsTest
[i
].pAbs
!= nullptr
342 : (aRelToAbsTest
[i
].pAbs
== nullptr
343 || !aAbs
.equalsAscii(aRelToAbsTest
[i
].pAbs
)))
346 "FAILED convertRelToAbs(%s, %s) -> %s != %s\n",
347 aRelToAbsTest
[i
].pBase
, aRelToAbsTest
[i
].pRel
,
351 aAbs
, RTL_TEXTENCODING_UTF8
).getStr()),
352 (aRelToAbsTest
[i
].pAbs
== nullptr
353 ? "<MALFORMED>" : aRelToAbsTest
[i
].pAbs
));
354 CPPUNIT_ASSERT(false);
358 // Check encode with unusual text encodings:
361 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x045F, 0 };
362 aText1
= OUString(aText1U
);
363 aText2
= "%20!%A1%FF";
364 CPPUNIT_ASSERT_EQUAL_MESSAGE(
368 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
369 RTL_TEXTENCODING_ISO_8859_5
));
370 CPPUNIT_ASSERT_EQUAL_MESSAGE(
374 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
));
377 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
378 aText1
= OUString(aText1U
);
379 sal_Unicode
const aText2U
[] = {
380 '%', '2', '0', '!', '%', 'A', '1', 0x0700, '%', 'F', 'F', 0 };
381 aText2
= OUString(aText2U
);
382 CPPUNIT_ASSERT_EQUAL_MESSAGE(
386 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
387 RTL_TEXTENCODING_ISO_8859_5
));
388 CPPUNIT_ASSERT_EQUAL_MESSAGE(
392 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_ISO_8859_5
));
394 #if WITH_LOCALE_ALL || WITH_LOCALE_zh
396 sal_Unicode
const aText1U
[] = { ' ', '!', 0x028A, 0xD849, 0xDD13, 0 };
397 aText1
= OUString(aText1U
);
398 aText2
= "%20!%81%30%B1%33%95%39%C5%37";
399 CPPUNIT_ASSERT_EQUAL_MESSAGE(
403 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeIgnoreEscapes
,
404 RTL_TEXTENCODING_GB_18030
));
405 CPPUNIT_ASSERT_EQUAL_MESSAGE(
409 aText2
, rtl_UriDecodeWithCharset
, RTL_TEXTENCODING_GB_18030
));
412 // Check strict mode:
415 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
416 aText1
= OUString(aText1U
);
418 CPPUNIT_ASSERT_EQUAL_MESSAGE(
422 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrict
,
423 RTL_TEXTENCODING_ISO_8859_5
));
426 aText1
= "%20%C4%80%FF";
428 CPPUNIT_ASSERT_EQUAL_MESSAGE(
432 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_UTF8
));
434 #if WITH_LOCALE_ALL || WITH_LOCALE_zh
438 CPPUNIT_ASSERT_EQUAL_MESSAGE(
442 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
447 CPPUNIT_ASSERT_EQUAL_MESSAGE(
451 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
454 aText1
= "%81%30%B1%33";
455 sal_Unicode
const aText2U
[] = { 0x028A, 0 };
456 aText2
= OUString(aText2U
);
457 CPPUNIT_ASSERT_EQUAL_MESSAGE(
461 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
465 sal_Unicode
const aText2U
[] = { 0x028A, 0 };
466 aText2
= OUString(aText2U
);
467 CPPUNIT_ASSERT_EQUAL_MESSAGE(
471 aText1
, rtl_UriDecodeStrict
, RTL_TEXTENCODING_GB_18030
));
474 // Check rtl_UriEncodeStrictKeepEscapes mode:
477 aText1
= "%%ea%c3%aa";
478 aText2
= "%25%EA%C3%AA";
479 CPPUNIT_ASSERT_EQUAL_MESSAGE(
483 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
484 RTL_TEXTENCODING_UTF8
));
487 sal_Unicode
const aText1U
[] = { 0x00EA, 0 };
488 aText1
= OUString(aText1U
);
490 CPPUNIT_ASSERT_EQUAL_MESSAGE(
494 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
495 RTL_TEXTENCODING_UTF8
));
498 sal_Unicode
const aText1U
[] = { ' ', '!', 0x0401, 0x0700, 0x045F, 0 };
499 aText1
= OUString(aText1U
);
501 CPPUNIT_ASSERT_EQUAL_MESSAGE(
505 aText1
, rtl_UriCharClassUric
, rtl_UriEncodeStrictKeepEscapes
,
506 RTL_TEXTENCODING_ISO_8859_5
));
512 CPPUNIT_TEST_SUITE_REGISTRATION(Test
);
514 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */