Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sal / rtl / uri.cxx
blob592b92c8852535a6a76e648102e751340e6e157e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/character.hxx>
21 #include <rtl/strbuf.hxx>
22 #include <rtl/textenc.h>
23 #include <rtl/textcvt.h>
24 #include <rtl/uri.h>
25 #include <rtl/ustrbuf.h>
26 #include <rtl/ustrbuf.hxx>
27 #include <rtl/ustring.h>
28 #include <rtl/ustring.hxx>
29 #include <sal/types.h>
30 #include <sal/macros.h>
32 #include <uri_internal.hxx>
34 #include <algorithm>
35 #include <cstddef>
37 namespace {
39 std::size_t const nCharClassSize = 128;
41 sal_Unicode const cEscapePrefix = 0x25; // '%'
43 int getHexWeight(sal_uInt32 nUtf32)
45 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
46 static_cast< int >(nUtf32 - 0x30) :
47 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
48 static_cast< int >(nUtf32 - 0x41 + 10) :
49 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
50 static_cast< int >(nUtf32 - 0x61 + 10) :
51 -1; // not a hex digit
54 bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
56 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
59 void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
60 sal_Unicode cChar)
62 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
67 namespace rtl::uri::detail {
69 /** Read any of the following:
71 @li sequence of escape sequences representing character from eCharset,
72 translated to single UCS4 character; or
73 @li pair of UTF-16 surrogates, translated to single UCS4 character; or
74 @li single UTF-16 character, extended to UCS4 character.
76 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
77 bool bEncoded, rtl_TextEncoding eCharset,
78 EscapeType * pType)
80 sal_uInt32 nChar = *(*pBegin)++;
81 int nWeight1;
82 int nWeight2;
83 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
84 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
85 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
87 *pBegin += 2;
88 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
89 if (nChar <= 0x7F)
91 *pType = EscapeChar;
93 else if (eCharset == RTL_TEXTENCODING_UTF8)
95 if (nChar >= 0xC0 && nChar <= 0xF4)
97 sal_uInt32 nEncoded;
98 int nShift;
99 sal_uInt32 nMin;
100 if (nChar <= 0xDF)
102 nEncoded = (nChar & 0x1F) << 6;
103 nShift = 0;
104 nMin = 0x80;
106 else if (nChar <= 0xEF)
108 nEncoded = (nChar & 0x0F) << 12;
109 nShift = 6;
110 nMin = 0x800;
112 else
114 nEncoded = (nChar & 0x07) << 18;
115 nShift = 12;
116 nMin = 0x10000;
119 sal_Unicode const * p = *pBegin;
120 bool bUTF8 = true;
122 for (; nShift >= 0; nShift -= 6)
124 if (pEnd - p < 3 || p[0] != cEscapePrefix
125 || (nWeight1 = getHexWeight(p[1])) < 8
126 || nWeight1 > 11
127 || (nWeight2 = getHexWeight(p[2])) < 0)
129 bUTF8 = false;
130 break;
132 p += 3;
133 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
135 if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
136 && nEncoded >= nMin)
138 *pBegin = p;
139 *pType = EscapeChar;
140 return nEncoded;
143 *pType = EscapeOctet;
145 else
147 OStringBuffer aBuf;
148 aBuf.append(static_cast< char >(nChar));
149 rtl_TextToUnicodeConverter aConverter
150 = rtl_createTextToUnicodeConverter(eCharset);
151 sal_Unicode const * p = *pBegin;
153 for (;;)
155 sal_Unicode aDst[2];
156 sal_uInt32 nInfo;
157 sal_Size nConverted;
158 sal_Size nDstSize = rtl_convertTextToUnicode(
159 aConverter, nullptr, aBuf.getStr(), aBuf.getLength(), aDst,
160 SAL_N_ELEMENTS( aDst ),
161 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
162 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
163 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
164 &nInfo, &nConverted);
166 if (nInfo == 0)
168 assert( nConverted
169 == sal::static_int_cast< sal_uInt32 >(
170 aBuf.getLength()));
172 rtl_destroyTextToUnicodeConverter(aConverter);
173 *pBegin = p;
174 *pType = EscapeChar;
176 assert( nDstSize == 1
177 || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
178 && rtl::isLowSurrogate(aDst[1])));
180 return nDstSize == 1
181 ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
183 if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
184 && pEnd - p >= 3 && p[0] == cEscapePrefix
185 && (nWeight1 = getHexWeight(p[1])) >= 0
186 && (nWeight2 = getHexWeight(p[2])) >= 0)
188 p += 3;
189 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
191 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
192 && p != pEnd && *p <= 0x7F)
194 aBuf.append(static_cast< char >(*p++));
196 else
198 assert(
199 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
200 == 0);
201 break;
204 rtl_destroyTextToUnicodeConverter(aConverter);
205 *pType = EscapeOctet;
207 return nChar;
210 *pType = EscapeNo;
211 return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
212 && rtl::isLowSurrogate(**pBegin) ?
213 rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
218 namespace {
220 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
222 assert(rtl::isUnicodeCodePoint(nUtf32));
223 if (nUtf32 <= 0xFFFF)
225 writeUnicode(pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
227 else
229 nUtf32 -= 0x10000;
230 writeUnicode(
231 pBuffer, pCapacity,
232 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
233 writeUnicode(
234 pBuffer, pCapacity,
235 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
239 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
240 sal_uInt32 nOctet)
242 assert(nOctet <= 0xFF); // bad octet
244 static sal_Unicode const aHex[16]
245 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
248 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
249 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
250 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
253 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
254 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
256 assert(rtl::isUnicodeCodePoint(nUtf32));
257 if (eCharset == RTL_TEXTENCODING_UTF8)
259 if (nUtf32 < 0x80)
261 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
263 else if (nUtf32 < 0x800)
265 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
266 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
268 else if (nUtf32 < 0x10000)
270 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
271 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
272 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
274 else
276 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
277 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
278 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
279 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
282 else
284 rtl_UnicodeToTextConverter aConverter
285 = rtl_createUnicodeToTextConverter(eCharset);
286 sal_Unicode aSrc[2];
287 sal_Size nSrcSize;
288 if (nUtf32 <= 0xFFFF)
290 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
291 nSrcSize = 1;
293 else
295 aSrc[0] = static_cast< sal_Unicode >(
296 ((nUtf32 - 0x10000) >> 10) | 0xD800);
297 aSrc[1] = static_cast< sal_Unicode >(
298 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
299 nSrcSize = 2;
302 sal_Char aDst[32]; // FIXME random value
303 sal_uInt32 nInfo;
304 sal_Size nConverted;
305 sal_Size nDstSize = rtl_convertUnicodeToText(
306 aConverter, nullptr, aSrc, nSrcSize, aDst, sizeof aDst,
307 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
308 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
309 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
310 &nInfo, &nConverted);
311 assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
312 rtl_destroyUnicodeToTextConverter(aConverter);
314 if (nInfo == 0)
316 assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
318 for (sal_Size i = 0; i < nDstSize; ++i)
320 writeEscapeOctet(pBuffer, pCapacity,
321 static_cast< unsigned char >(aDst[i]));
322 // FIXME all octets are escaped, even if there is no need
325 else
327 if (bStrict)
328 return false;
330 writeUcs4(pBuffer, pCapacity, nUtf32);
333 return true;
336 struct Component
338 sal_Unicode const * pBegin;
339 sal_Unicode const * pEnd;
341 Component(): pBegin(nullptr), pEnd(nullptr) {}
343 bool isPresent() const { return pBegin != nullptr; }
345 sal_Int32 getLength() const;
348 sal_Int32 Component::getLength() const
350 assert(isPresent()); // taking length of non-present component
351 return static_cast< sal_Int32 >(pEnd - pBegin);
354 struct Components
356 Component aScheme;
357 Component aAuthority;
358 Component aPath;
359 Component aQuery;
360 Component aFragment;
363 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
365 // This algorithm is liberal and accepts various forms of illegal input.
367 sal_Unicode const * pBegin = pUriRef->buffer;
368 sal_Unicode const * pEnd = pBegin + pUriRef->length;
369 sal_Unicode const * pPos = pBegin;
371 if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
373 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
375 if (*p == ':')
377 pComponents->aScheme.pBegin = pBegin;
378 pComponents->aScheme.pEnd = ++p;
379 pPos = p;
380 break;
383 if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
384 && *p != '.')
386 break;
391 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
393 pComponents->aAuthority.pBegin = pPos;
394 pPos += 2;
395 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
397 ++pPos;
400 pComponents->aAuthority.pEnd = pPos;
403 pComponents->aPath.pBegin = pPos;
404 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
406 ++pPos;
409 pComponents->aPath.pEnd = pPos;
411 if (pPos != pEnd && *pPos == '?')
413 pComponents->aQuery.pBegin = pPos++;
414 while (pPos != pEnd && * pPos != '#')
416 ++pPos;
419 pComponents->aQuery.pEnd = pPos;
422 if (pPos != pEnd)
424 assert(*pPos == '#');
425 pComponents->aFragment.pBegin = pPos;
426 pComponents->aFragment.pEnd = pEnd;
430 void appendPath(
431 OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
432 sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
434 while (precedingSlash || pathBegin != pathEnd)
436 sal_Unicode const * p = pathBegin;
437 while (p != pathEnd && *p != '/')
439 ++p;
442 std::size_t n = p - pathBegin;
443 if (n == 1 && pathBegin[0] == '.')
445 // input begins with "." -> remove from input (and done):
446 // i.e., !precedingSlash -> !precedingSlash
447 // input begins with "./" -> remove from input:
448 // i.e., !precedingSlash -> !precedingSlash
449 // input begins with "/." -> replace with "/" in input (and not yet
450 // done):
451 // i.e., precedingSlash -> precedingSlash
452 // input begins with "/./" -> replace with "/" in input:
453 // i.e., precedingSlash -> precedingSlash
455 else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.')
457 // input begins with ".." -> remove from input (and done):
458 // i.e., !precedingSlash -> !precedingSlash
459 // input begins with "../" -> remove from input
460 // i.e., !precedingSlash -> !precedingSlash
461 // input begins with "/.." -> replace with "/" in input, and shrink
462 // output (not yet done):
463 // i.e., precedingSlash -> precedingSlash
464 // input begins with "/../" -> replace with "/" in input, and shrink
465 // output:
466 // i.e., precedingSlash -> precedingSlash
467 if (precedingSlash)
469 buffer.truncate(
470 bufferStart
471 + std::max<sal_Int32>(
472 rtl_ustr_lastIndexOfChar_WithLength(
473 buffer.getStr() + bufferStart,
474 buffer.getLength() - bufferStart, '/'),
475 0));
478 else
480 if (precedingSlash)
481 buffer.append('/');
483 buffer.append(pathBegin, n);
484 precedingSlash = p != pathEnd;
486 pathBegin = p + (p == pathEnd ? 0 : 1);
492 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
493 SAL_THROW_EXTERN_C()
495 static sal_Bool const aCharClass[][nCharClassSize] = {
496 {false, false, false, false, false, false, false, false,// None
497 false, false, false, false, false, false, false, false,
498 false, false, false, false, false, false, false, false,
499 false, false, false, false, false, false, false, false,
500 false, false, false, false, false, false, false, false, // !"#$%&'
501 false, false, false, false, false, false, false, false, // ()*+,-./
502 false, false, false, false, false, false, false, false, // 01234567
503 false, false, false, false, false, false, false, false, // 89:;<=>?
504 false, false, false, false, false, false, false, false, // @ABCDEFG
505 false, false, false, false, false, false, false, false, // HIJKLMNO
506 false, false, false, false, false, false, false, false, // PQRSTUVW
507 false, false, false, false, false, false, false, false, // XYZ[\]^_
508 false, false, false, false, false, false, false, false, // `abcdefg
509 false, false, false, false, false, false, false, false, // hijklmno
510 false, false, false, false, false, false, false, false, // pqrstuvw
511 false, false, false, false, false, false, false, false}, // xyz{|}~
512 {false, false, false, false, false, false, false, false,// Uric
513 false, false, false, false, false, false, false, false,
514 false, false, false, false, false, false, false, false,
515 false, false, false, false, false, false, false, false,
516 false, true, false, false, true, false, true, true, // !"#$%&'
517 true, true, true, true, true, true, true, true, // ()*+,-./
518 true, true, true, true, true, true, true, true, // 01234567
519 true, true, true, true, false, true, false, true, // 89:;<=>?
520 true, true, true, true, true, true, true, true, // @ABCDEFG
521 true, true, true, true, true, true, true, true, // HIJKLMNO
522 true, true, true, true, true, true, true, true, // PQRSTUVW
523 true, true, true, true, false, true, false, true, // XYZ[\]^_
524 false, true, true, true, true, true, true, true, // `abcdefg
525 true, true, true, true, true, true, true, true, // hijklmno
526 true, true, true, true, true, true, true, true, // pqrstuvw
527 true, true, true, false, false, false, true, false}, // xyz{|}~
528 {false, false, false, false, false, false, false, false,// UricNoSlash
529 false, false, false, false, false, false, false, false,
530 false, false, false, false, false, false, false, false,
531 false, false, false, false, false, false, false, false,
532 false, true, false, false, true, false, true, true, // !"#$%&'
533 true, true, true, true, true, true, true, false, // ()*+,-./
534 true, true, true, true, true, true, true, true, // 01234567
535 true, true, true, true, false, true, false, true, // 89:;<=>?
536 true, true, true, true, true, true, true, true, // @ABCDEFG
537 true, true, true, true, true, true, true, true, // HIJKLMNO
538 true, true, true, true, true, true, true, true, // PQRSTUVW
539 true, true, true, false, false, false, false, true, // XYZ[\]^_
540 false, true, true, true, true, true, true, true, // `abcdefg
541 true, true, true, true, true, true, true, true, // hijklmno
542 true, true, true, true, true, true, true, true, // pqrstuvw
543 true, true, true, false, false, false, true, false}, // xyz{|}~
544 {false, false, false, false, false, false, false, false,// RelSegment
545 false, false, false, false, false, false, false, false,
546 false, false, false, false, false, false, false, false,
547 false, false, false, false, false, false, false, false,
548 false, true, false, false, true, false, true, true, // !"#$%&'
549 true, true, true, true, true, true, true, false, // ()*+,-./
550 true, true, true, true, true, true, true, true, // 01234567
551 true, true, false, true, false, true, false, false, // 89:;<=>?
552 true, true, true, true, true, true, true, true, // @ABCDEFG
553 true, true, true, true, true, true, true, true, // HIJKLMNO
554 true, true, true, true, true, true, true, true, // PQRSTUVW
555 true, true, true, false, false, false, false, true, // XYZ[\]^_
556 false, true, true, true, true, true, true, true, // `abcdefg
557 true, true, true, true, true, true, true, true, // hijklmno
558 true, true, true, true, true, true, true, true, // pqrstuvw
559 true, true, true, false, false, false, true, false}, // xyz{|}~
560 {false, false, false, false, false, false, false, false,// RegName
561 false, false, false, false, false, false, false, false,
562 false, false, false, false, false, false, false, false,
563 false, false, false, false, false, false, false, false,
564 false, true, false, false, true, false, true, true, // !"#$%&'
565 true, true, true, true, true, true, true, false, // ()*+,-./
566 true, true, true, true, true, true, true, true, // 01234567
567 true, true, true, true, false, true, false, false, // 89:;<=>?
568 true, true, true, true, true, true, true, true, // @ABCDEFG
569 true, true, true, true, true, true, true, true, // HIJKLMNO
570 true, true, true, true, true, true, true, true, // PQRSTUVW
571 true, true, true, false, false, false, false, true, // XYZ[\]^_
572 false, true, true, true, true, true, true, true, // `abcdefg
573 true, true, true, true, true, true, true, true, // hijklmno
574 true, true, true, true, true, true, true, true, // pqrstuvw
575 true, true, true, false, false, false, true, false}, // xyz{|}~
576 {false, false, false, false, false, false, false, false,// Userinfo
577 false, false, false, false, false, false, false, false,
578 false, false, false, false, false, false, false, false,
579 false, false, false, false, false, false, false, false,
580 false, true, false, false, true, false, true, true, // !"#$%&'
581 true, true, true, true, true, true, true, false, // ()*+,-./
582 true, true, true, true, true, true, true, true, // 01234567
583 true, true, true, true, false, true, false, false, // 89:;<=>?
584 false, true, true, true, true, true, true, true, // @ABCDEFG
585 true, true, true, true, true, true, true, true, // HIJKLMNO
586 true, true, true, true, true, true, true, true, // PQRSTUVW
587 true, true, true, false, false, false, false, true, // XYZ[\]^_
588 false, true, true, true, true, true, true, true, // `abcdefg
589 true, true, true, true, true, true, true, true, // hijklmno
590 true, true, true, true, true, true, true, true, // pqrstuvw
591 true, true, true, false, false, false, true, false}, // xyz{|}~
592 {false, false, false, false, false, false, false, false,// Pchar
593 false, false, false, false, false, false, false, false,
594 false, false, false, false, false, false, false, false,
595 false, false, false, false, false, false, false, false,
596 false, true, false, false, true, false, true, true, // !"#$%&'
597 true, true, true, true, true, true, true, false, // ()*+,-./
598 true, true, true, true, true, true, true, true, // 01234567
599 true, true, true, false, false, true, false, false, // 89:;<=>?
600 true, true, true, true, true, true, true, true, // @ABCDEFG
601 true, true, true, true, true, true, true, true, // HIJKLMNO
602 true, true, true, true, true, true, true, true, // PQRSTUVW
603 true, true, true, false, false, false, false, true, // XYZ[\]^_
604 false, true, true, true, true, true, true, true, // `abcdefg
605 true, true, true, true, true, true, true, true, // hijklmno
606 true, true, true, true, true, true, true, true, // pqrstuvw
607 true, true, true, false, false, false, true, false}, // xyz{|}~
608 {false, false, false, false, false, false, false, false,// UnoParamValue
609 false, false, false, false, false, false, false, false,
610 false, false, false, false, false, false, false, false,
611 false, false, false, false, false, false, false, false,
612 false, true, false, false, true, false, true, true, // !"#$%&'
613 true, true, true, true, false, true, true, true, // ()*+,-./
614 true, true, true, true, true, true, true, true, // 01234567
615 true, true, true, false, false, false, false, true, // 89:;<=>?
616 true, true, true, true, true, true, true, true, // @ABCDEFG
617 true, true, true, true, true, true, true, true, // HIJKLMNO
618 true, true, true, true, true, true, true, true, // PQRSTUVW
619 true, true, true, false, false, false, false, true, // XYZ[\]^_
620 false, true, true, true, true, true, true, true, // `abcdefg
621 true, true, true, true, true, true, true, true, // hijklmno
622 true, true, true, true, true, true, true, true, // pqrstuvw
623 true, true, true, false, false, false, true, false}}; // xyz{|}~
625 assert(
626 (eCharClass >= 0
627 && (sal::static_int_cast< std::size_t >(eCharClass)
628 < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
629 return aCharClass[eCharClass];
632 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
633 rtl_UriEncodeMechanism eMechanism,
634 rtl_TextEncoding eCharset, rtl_uString ** pResult)
635 SAL_THROW_EXTERN_C()
637 assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
639 sal_Unicode const * p = pText->buffer;
640 sal_Unicode const * pEnd = p + pText->length;
641 sal_Int32 nCapacity = 256;
642 rtl_uString_new_WithLength(pResult, nCapacity);
644 while (p < pEnd)
646 rtl::uri::detail::EscapeType eType;
647 sal_uInt32 nUtf32 = rtl::uri::detail::readUcs4(
648 &p, pEnd,
649 (eMechanism == rtl_UriEncodeKeepEscapes
650 || eMechanism == rtl_UriEncodeCheckEscapes
651 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
652 eCharset, &eType);
654 switch (eType)
656 case rtl::uri::detail::EscapeNo:
657 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
659 writeUnicode(pResult, &nCapacity,
660 static_cast< sal_Unicode >(nUtf32));
662 else if (!writeEscapeChar(
663 pResult, &nCapacity, nUtf32, eCharset,
664 (eMechanism == rtl_UriEncodeStrict
665 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
667 rtl_uString_new(pResult);
668 return;
670 break;
672 case rtl::uri::detail::EscapeChar:
673 if (eMechanism == rtl_UriEncodeCheckEscapes
674 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
676 writeUnicode(pResult, &nCapacity,
677 static_cast< sal_Unicode >(nUtf32));
679 else if (!writeEscapeChar(
680 pResult, &nCapacity, nUtf32, eCharset,
681 (eMechanism == rtl_UriEncodeStrict
682 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
684 rtl_uString_new(pResult);
685 return;
687 break;
689 case rtl::uri::detail::EscapeOctet:
690 writeEscapeOctet(pResult, &nCapacity, nUtf32);
691 break;
694 *pResult = rtl_uStringBuffer_makeStringAndClear(pResult, &nCapacity);
697 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
698 rtl_UriDecodeMechanism eMechanism,
699 rtl_TextEncoding eCharset, rtl_uString ** pResult)
700 SAL_THROW_EXTERN_C()
702 switch (eMechanism)
704 case rtl_UriDecodeNone:
705 rtl_uString_assign(pResult, pText);
706 break;
708 case rtl_UriDecodeToIuri:
709 eCharset = RTL_TEXTENCODING_UTF8;
710 [[fallthrough]];
711 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
713 sal_Unicode const * p = pText->buffer;
714 sal_Unicode const * pEnd = p + pText->length;
715 sal_Int32 nCapacity = pText->length;
716 rtl_uString_new_WithLength(pResult, nCapacity);
718 while (p < pEnd)
720 rtl::uri::detail::EscapeType eType;
721 sal_uInt32 nUtf32 = rtl::uri::detail::readUcs4(&p, pEnd, true, eCharset, &eType);
722 switch (eType)
724 case rtl::uri::detail::EscapeChar:
725 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
727 writeEscapeOctet(pResult, &nCapacity, nUtf32);
728 break;
730 [[fallthrough]];
732 case rtl::uri::detail::EscapeNo:
733 writeUcs4(pResult, &nCapacity, nUtf32);
734 break;
736 case rtl::uri::detail::EscapeOctet:
737 if (eMechanism == rtl_UriDecodeStrict)
739 rtl_uString_new(pResult);
740 return;
742 writeEscapeOctet(pResult, &nCapacity, nUtf32);
743 break;
747 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
749 break;
753 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
754 rtl_uString * pRelUriRef,
755 rtl_uString ** pResult,
756 rtl_uString ** pException)
757 SAL_THROW_EXTERN_C()
759 // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
760 // relative URI into an absolute one:
761 Components aRelComponents;
762 parseUriRef(pRelUriRef, &aRelComponents);
763 OUStringBuffer aBuffer(256);
765 if (aRelComponents.aScheme.isPresent())
767 aBuffer.append(aRelComponents.aScheme.pBegin,
768 aRelComponents.aScheme.getLength());
770 if (aRelComponents.aAuthority.isPresent())
772 aBuffer.append(aRelComponents.aAuthority.pBegin,
773 aRelComponents.aAuthority.getLength());
776 appendPath(
777 aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
778 aRelComponents.aPath.pEnd);
780 if (aRelComponents.aQuery.isPresent())
782 aBuffer.append(aRelComponents.aQuery.pBegin,
783 aRelComponents.aQuery.getLength());
786 else
788 Components aBaseComponents;
789 parseUriRef(pBaseUriRef, &aBaseComponents);
790 if (!aBaseComponents.aScheme.isPresent())
792 rtl_uString_assign(
793 pException,
794 (OUString(
795 "<" + OUString::unacquired(&pBaseUriRef)
796 + "> does not start with a scheme component")
797 .pData));
798 return false;
801 aBuffer.append(aBaseComponents.aScheme.pBegin,
802 aBaseComponents.aScheme.getLength());
803 if (aRelComponents.aAuthority.isPresent())
805 aBuffer.append(aRelComponents.aAuthority.pBegin,
806 aRelComponents.aAuthority.getLength());
807 appendPath(
808 aBuffer, aBuffer.getLength(), false,
809 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
811 if (aRelComponents.aQuery.isPresent())
813 aBuffer.append(aRelComponents.aQuery.pBegin,
814 aRelComponents.aQuery.getLength());
817 else
819 if (aBaseComponents.aAuthority.isPresent())
821 aBuffer.append(aBaseComponents.aAuthority.pBegin,
822 aBaseComponents.aAuthority.getLength());
825 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
827 aBuffer.append(aBaseComponents.aPath.pBegin,
828 aBaseComponents.aPath.getLength());
829 if (aRelComponents.aQuery.isPresent())
831 aBuffer.append(aRelComponents.aQuery.pBegin,
832 aRelComponents.aQuery.getLength());
834 else if (aBaseComponents.aQuery.isPresent())
836 aBuffer.append(aBaseComponents.aQuery.pBegin,
837 aBaseComponents.aQuery.getLength());
840 else
842 if (*aRelComponents.aPath.pBegin == '/')
844 appendPath(
845 aBuffer, aBuffer.getLength(), false,
846 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
848 else if (aBaseComponents.aAuthority.isPresent()
849 && aBaseComponents.aPath.pBegin
850 == aBaseComponents.aPath.pEnd)
852 appendPath(
853 aBuffer, aBuffer.getLength(), true,
854 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
856 else
858 sal_Int32 n = aBuffer.getLength();
859 sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
860 aBaseComponents.aPath.pBegin,
861 aBaseComponents.aPath.getLength(), '/');
863 if (i >= 0)
865 appendPath(
866 aBuffer, n, false, aBaseComponents.aPath.pBegin,
867 aBaseComponents.aPath.pBegin + i);
870 appendPath(
871 aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
872 aRelComponents.aPath.pEnd);
875 if (aRelComponents.aQuery.isPresent())
877 aBuffer.append(aRelComponents.aQuery.pBegin,
878 aRelComponents.aQuery.getLength());
883 if (aRelComponents.aFragment.isPresent())
885 aBuffer.append(aRelComponents.aFragment.pBegin,
886 aRelComponents.aFragment.getLength());
889 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
890 return true;
893 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */