lok: vcl: fix multiple floatwin removal case more robustly.
[LibreOffice.git] / sal / rtl / uri.cxx
blob3bf0e1d34c2fb6f8e5286af5f7d352d0b2abeb7a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/character.hxx>
21 #include <rtl/strbuf.hxx>
22 #include <rtl/textenc.h>
23 #include <rtl/textcvt.h>
24 #include <rtl/uri.h>
25 #include <rtl/ustrbuf.h>
26 #include <rtl/ustrbuf.hxx>
27 #include <rtl/ustring.h>
28 #include <rtl/ustring.hxx>
29 #include <sal/types.h>
30 #include <sal/macros.h>
32 #include <algorithm>
33 #include <cstddef>
35 namespace {
37 std::size_t const nCharClassSize = 128;
39 sal_Unicode const cEscapePrefix = 0x25; // '%'
41 int getHexWeight(sal_uInt32 nUtf32)
43 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
44 static_cast< int >(nUtf32 - 0x30) :
45 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
46 static_cast< int >(nUtf32 - 0x41 + 10) :
47 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
48 static_cast< int >(nUtf32 - 0x61 + 10) :
49 -1; // not a hex digit
52 bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
54 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
57 void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
58 sal_Unicode cChar)
60 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
63 enum EscapeType
65 EscapeNo,
66 EscapeChar,
67 EscapeOctet
70 /** Read any of the following:
72 @li sequence of escape sequences representing character from eCharset,
73 translated to single UCS4 character; or
74 @li pair of UTF-16 surrogates, translated to single UCS4 character; or
75 @li single UTF-16 character, extended to UCS4 character.
77 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
78 bool bEncoded, rtl_TextEncoding eCharset,
79 EscapeType * pType)
81 sal_uInt32 nChar = *(*pBegin)++;
82 int nWeight1;
83 int nWeight2;
84 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
85 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
86 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
88 *pBegin += 2;
89 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
90 if (nChar <= 0x7F)
92 *pType = EscapeChar;
94 else if (eCharset == RTL_TEXTENCODING_UTF8)
96 if (nChar >= 0xC0 && nChar <= 0xF4)
98 sal_uInt32 nEncoded;
99 int nShift;
100 sal_uInt32 nMin;
101 if (nChar <= 0xDF)
103 nEncoded = (nChar & 0x1F) << 6;
104 nShift = 0;
105 nMin = 0x80;
107 else if (nChar <= 0xEF)
109 nEncoded = (nChar & 0x0F) << 12;
110 nShift = 6;
111 nMin = 0x800;
113 else
115 nEncoded = (nChar & 0x07) << 18;
116 nShift = 12;
117 nMin = 0x10000;
120 sal_Unicode const * p = *pBegin;
121 bool bUTF8 = true;
123 for (; nShift >= 0; nShift -= 6)
125 if (pEnd - p < 3 || p[0] != cEscapePrefix
126 || (nWeight1 = getHexWeight(p[1])) < 8
127 || nWeight1 > 11
128 || (nWeight2 = getHexWeight(p[2])) < 0)
130 bUTF8 = false;
131 break;
133 p += 3;
134 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
136 if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
137 && nEncoded >= nMin)
139 *pBegin = p;
140 *pType = EscapeChar;
141 return nEncoded;
144 *pType = EscapeOctet;
146 else
148 OStringBuffer aBuf;
149 aBuf.append(static_cast< char >(nChar));
150 rtl_TextToUnicodeConverter aConverter
151 = rtl_createTextToUnicodeConverter(eCharset);
152 sal_Unicode const * p = *pBegin;
154 for (;;)
156 sal_Unicode aDst[2];
157 sal_uInt32 nInfo;
158 sal_Size nConverted;
159 sal_Size nDstSize = rtl_convertTextToUnicode(
160 aConverter, nullptr, aBuf.getStr(), aBuf.getLength(), aDst,
161 SAL_N_ELEMENTS( aDst ),
162 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
163 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
164 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
165 &nInfo, &nConverted);
167 if (nInfo == 0)
169 assert( nConverted
170 == sal::static_int_cast< sal_uInt32 >(
171 aBuf.getLength()));
173 rtl_destroyTextToUnicodeConverter(aConverter);
174 *pBegin = p;
175 *pType = EscapeChar;
177 assert( nDstSize == 1
178 || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
179 && rtl::isLowSurrogate(aDst[1])));
181 return nDstSize == 1
182 ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
184 if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
185 && pEnd - p >= 3 && p[0] == cEscapePrefix
186 && (nWeight1 = getHexWeight(p[1])) >= 0
187 && (nWeight2 = getHexWeight(p[2])) >= 0)
189 p += 3;
190 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
192 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
193 && p != pEnd && *p <= 0x7F)
195 aBuf.append(static_cast< char >(*p++));
197 else
199 assert(
200 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
201 == 0);
202 break;
205 rtl_destroyTextToUnicodeConverter(aConverter);
206 *pType = EscapeOctet;
208 return nChar;
211 *pType = EscapeNo;
212 return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
213 && rtl::isLowSurrogate(**pBegin) ?
214 rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
217 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
219 assert(rtl::isUnicodeCodePoint(nUtf32));
220 if (nUtf32 <= 0xFFFF)
222 writeUnicode(pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
224 else
226 nUtf32 -= 0x10000;
227 writeUnicode(
228 pBuffer, pCapacity,
229 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
230 writeUnicode(
231 pBuffer, pCapacity,
232 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
236 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
237 sal_uInt32 nOctet)
239 assert(nOctet <= 0xFF); // bad octet
241 static sal_Unicode const aHex[16]
242 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
243 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
245 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
246 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
247 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
250 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
251 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
253 assert(rtl::isUnicodeCodePoint(nUtf32));
254 if (eCharset == RTL_TEXTENCODING_UTF8)
256 if (nUtf32 < 0x80)
258 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
260 else if (nUtf32 < 0x800)
262 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
263 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
265 else if (nUtf32 < 0x10000)
267 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
268 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
269 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
271 else
273 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
274 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
275 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
276 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
279 else
281 rtl_UnicodeToTextConverter aConverter
282 = rtl_createUnicodeToTextConverter(eCharset);
283 sal_Unicode aSrc[2];
284 sal_Size nSrcSize;
285 if (nUtf32 <= 0xFFFF)
287 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
288 nSrcSize = 1;
290 else
292 aSrc[0] = static_cast< sal_Unicode >(
293 ((nUtf32 - 0x10000) >> 10) | 0xD800);
294 aSrc[1] = static_cast< sal_Unicode >(
295 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
296 nSrcSize = 2;
299 sal_Char aDst[32]; // FIXME random value
300 sal_uInt32 nInfo;
301 sal_Size nConverted;
302 sal_Size nDstSize = rtl_convertUnicodeToText(
303 aConverter, nullptr, aSrc, nSrcSize, aDst, sizeof aDst,
304 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
305 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
306 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
307 &nInfo, &nConverted);
308 assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
309 rtl_destroyUnicodeToTextConverter(aConverter);
311 if (nInfo == 0)
313 assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
315 for (sal_Size i = 0; i < nDstSize; ++i)
317 writeEscapeOctet(pBuffer, pCapacity,
318 static_cast< unsigned char >(aDst[i]));
319 // FIXME all octets are escaped, even if there is no need
322 else
324 if (bStrict)
325 return false;
327 writeUcs4(pBuffer, pCapacity, nUtf32);
330 return true;
333 struct Component
335 sal_Unicode const * pBegin;
336 sal_Unicode const * pEnd;
338 Component(): pBegin(nullptr), pEnd(nullptr) {}
340 bool isPresent() const { return pBegin != nullptr; }
342 sal_Int32 getLength() const;
345 sal_Int32 Component::getLength() const
347 assert(isPresent()); // taking length of non-present component
348 return static_cast< sal_Int32 >(pEnd - pBegin);
351 struct Components
353 Component aScheme;
354 Component aAuthority;
355 Component aPath;
356 Component aQuery;
357 Component aFragment;
360 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
362 // This algorithm is liberal and accepts various forms of illegal input.
364 sal_Unicode const * pBegin = pUriRef->buffer;
365 sal_Unicode const * pEnd = pBegin + pUriRef->length;
366 sal_Unicode const * pPos = pBegin;
368 if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
370 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
372 if (*p == ':')
374 pComponents->aScheme.pBegin = pBegin;
375 pComponents->aScheme.pEnd = ++p;
376 pPos = p;
377 break;
380 if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
381 && *p != '.')
383 break;
388 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
390 pComponents->aAuthority.pBegin = pPos;
391 pPos += 2;
392 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
394 ++pPos;
397 pComponents->aAuthority.pEnd = pPos;
400 pComponents->aPath.pBegin = pPos;
401 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
403 ++pPos;
406 pComponents->aPath.pEnd = pPos;
408 if (pPos != pEnd && *pPos == '?')
410 pComponents->aQuery.pBegin = pPos++;
411 while (pPos != pEnd && * pPos != '#')
413 ++pPos;
416 pComponents->aQuery.pEnd = pPos;
419 if (pPos != pEnd)
421 assert(*pPos == '#');
422 pComponents->aFragment.pBegin = pPos;
423 pComponents->aFragment.pEnd = pEnd;
427 void appendPath(
428 OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
429 sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
431 while (precedingSlash || pathBegin != pathEnd)
433 sal_Unicode const * p = pathBegin;
434 while (p != pathEnd && *p != '/')
436 ++p;
439 std::size_t n = p - pathBegin;
440 if (n == 1 && pathBegin[0] == '.')
442 // input begins with "." -> remove from input (and done):
443 // i.e., !precedingSlash -> !precedingSlash
444 // input begins with "./" -> remove from input:
445 // i.e., !precedingSlash -> !precedingSlash
446 // input begins with "/." -> replace with "/" in input (and not yet
447 // done):
448 // i.e., precedingSlash -> precedingSlash
449 // input begins with "/./" -> replace with "/" in input:
450 // i.e., precedingSlash -> precedingSlash
452 else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.')
454 // input begins with ".." -> remove from input (and done):
455 // i.e., !precedingSlash -> !precedingSlash
456 // input begins with "../" -> remove from input
457 // i.e., !precedingSlash -> !precedingSlash
458 // input begins with "/.." -> replace with "/" in input, and shrink
459 // output (not yet done):
460 // i.e., precedingSlash -> precedingSlash
461 // input begins with "/../" -> replace with "/" in input, and shrink
462 // output:
463 // i.e., precedingSlash -> precedingSlash
464 if (precedingSlash)
466 buffer.truncate(
467 bufferStart
468 + std::max<sal_Int32>(
469 rtl_ustr_lastIndexOfChar_WithLength(
470 buffer.getStr() + bufferStart,
471 buffer.getLength() - bufferStart, '/'),
472 0));
475 else
477 if (precedingSlash)
478 buffer.append('/');
480 buffer.append(pathBegin, n);
481 precedingSlash = p != pathEnd;
483 pathBegin = p + (p == pathEnd ? 0 : 1);
489 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
490 SAL_THROW_EXTERN_C()
492 static sal_Bool const aCharClass[][nCharClassSize] = {
493 {false, false, false, false, false, false, false, false,// None
494 false, false, false, false, false, false, false, false,
495 false, false, false, false, false, false, false, false,
496 false, false, false, false, false, false, false, false,
497 false, false, false, false, false, false, false, false, // !"#$%&'
498 false, false, false, false, false, false, false, false, // ()*+,-./
499 false, false, false, false, false, false, false, false, // 01234567
500 false, false, false, false, false, false, false, false, // 89:;<=>?
501 false, false, false, false, false, false, false, false, // @ABCDEFG
502 false, false, false, false, false, false, false, false, // HIJKLMNO
503 false, false, false, false, false, false, false, false, // PQRSTUVW
504 false, false, false, false, false, false, false, false, // XYZ[\]^_
505 false, false, false, false, false, false, false, false, // `abcdefg
506 false, false, false, false, false, false, false, false, // hijklmno
507 false, false, false, false, false, false, false, false, // pqrstuvw
508 false, false, false, false, false, false, false, false}, // xyz{|}~
509 {false, false, false, false, false, false, false, false,// Uric
510 false, false, false, false, false, false, false, false,
511 false, false, false, false, false, false, false, false,
512 false, false, false, false, false, false, false, false,
513 false, true, false, false, true, false, true, true, // !"#$%&'
514 true, true, true, true, true, true, true, true, // ()*+,-./
515 true, true, true, true, true, true, true, true, // 01234567
516 true, true, true, true, false, true, false, true, // 89:;<=>?
517 true, true, true, true, true, true, true, true, // @ABCDEFG
518 true, true, true, true, true, true, true, true, // HIJKLMNO
519 true, true, true, true, true, true, true, true, // PQRSTUVW
520 true, true, true, true, false, true, false, true, // XYZ[\]^_
521 false, true, true, true, true, true, true, true, // `abcdefg
522 true, true, true, true, true, true, true, true, // hijklmno
523 true, true, true, true, true, true, true, true, // pqrstuvw
524 true, true, true, false, false, false, true, false}, // xyz{|}~
525 {false, false, false, false, false, false, false, false,// UricNoSlash
526 false, false, false, false, false, false, false, false,
527 false, false, false, false, false, false, false, false,
528 false, false, false, false, false, false, false, false,
529 false, true, false, false, true, false, true, true, // !"#$%&'
530 true, true, true, true, true, true, true, false, // ()*+,-./
531 true, true, true, true, true, true, true, true, // 01234567
532 true, true, true, true, false, true, false, true, // 89:;<=>?
533 true, true, true, true, true, true, true, true, // @ABCDEFG
534 true, true, true, true, true, true, true, true, // HIJKLMNO
535 true, true, true, true, true, true, true, true, // PQRSTUVW
536 true, true, true, false, false, false, false, true, // XYZ[\]^_
537 false, true, true, true, true, true, true, true, // `abcdefg
538 true, true, true, true, true, true, true, true, // hijklmno
539 true, true, true, true, true, true, true, true, // pqrstuvw
540 true, true, true, false, false, false, true, false}, // xyz{|}~
541 {false, false, false, false, false, false, false, false,// RelSegment
542 false, false, false, false, false, false, false, false,
543 false, false, false, false, false, false, false, false,
544 false, false, false, false, false, false, false, false,
545 false, true, false, false, true, false, true, true, // !"#$%&'
546 true, true, true, true, true, true, true, false, // ()*+,-./
547 true, true, true, true, true, true, true, true, // 01234567
548 true, true, false, true, false, true, false, false, // 89:;<=>?
549 true, true, true, true, true, true, true, true, // @ABCDEFG
550 true, true, true, true, true, true, true, true, // HIJKLMNO
551 true, true, true, true, true, true, true, true, // PQRSTUVW
552 true, true, true, false, false, false, false, true, // XYZ[\]^_
553 false, true, true, true, true, true, true, true, // `abcdefg
554 true, true, true, true, true, true, true, true, // hijklmno
555 true, true, true, true, true, true, true, true, // pqrstuvw
556 true, true, true, false, false, false, true, false}, // xyz{|}~
557 {false, false, false, false, false, false, false, false,// RegName
558 false, false, false, false, false, false, false, false,
559 false, false, false, false, false, false, false, false,
560 false, false, false, false, false, false, false, false,
561 false, true, false, false, true, false, true, true, // !"#$%&'
562 true, true, true, true, true, true, true, false, // ()*+,-./
563 true, true, true, true, true, true, true, true, // 01234567
564 true, true, true, true, false, true, false, false, // 89:;<=>?
565 true, true, true, true, true, true, true, true, // @ABCDEFG
566 true, true, true, true, true, true, true, true, // HIJKLMNO
567 true, true, true, true, true, true, true, true, // PQRSTUVW
568 true, true, true, false, false, false, false, true, // XYZ[\]^_
569 false, true, true, true, true, true, true, true, // `abcdefg
570 true, true, true, true, true, true, true, true, // hijklmno
571 true, true, true, true, true, true, true, true, // pqrstuvw
572 true, true, true, false, false, false, true, false}, // xyz{|}~
573 {false, false, false, false, false, false, false, false,// Userinfo
574 false, false, false, false, false, false, false, false,
575 false, false, false, false, false, false, false, false,
576 false, false, false, false, false, false, false, false,
577 false, true, false, false, true, false, true, true, // !"#$%&'
578 true, true, true, true, true, true, true, false, // ()*+,-./
579 true, true, true, true, true, true, true, true, // 01234567
580 true, true, true, true, false, true, false, false, // 89:;<=>?
581 false, true, true, true, true, true, true, true, // @ABCDEFG
582 true, true, true, true, true, true, true, true, // HIJKLMNO
583 true, true, true, true, true, true, true, true, // PQRSTUVW
584 true, true, true, false, false, false, false, true, // XYZ[\]^_
585 false, true, true, true, true, true, true, true, // `abcdefg
586 true, true, true, true, true, true, true, true, // hijklmno
587 true, true, true, true, true, true, true, true, // pqrstuvw
588 true, true, true, false, false, false, true, false}, // xyz{|}~
589 {false, false, false, false, false, false, false, false,// Pchar
590 false, false, false, false, false, false, false, false,
591 false, false, false, false, false, false, false, false,
592 false, false, false, false, false, false, false, false,
593 false, true, false, false, true, false, true, true, // !"#$%&'
594 true, true, true, true, true, true, true, false, // ()*+,-./
595 true, true, true, true, true, true, true, true, // 01234567
596 true, true, true, false, false, true, false, false, // 89:;<=>?
597 true, true, true, true, true, true, true, true, // @ABCDEFG
598 true, true, true, true, true, true, true, true, // HIJKLMNO
599 true, true, true, true, true, true, true, true, // PQRSTUVW
600 true, true, true, false, false, false, false, true, // XYZ[\]^_
601 false, true, true, true, true, true, true, true, // `abcdefg
602 true, true, true, true, true, true, true, true, // hijklmno
603 true, true, true, true, true, true, true, true, // pqrstuvw
604 true, true, true, false, false, false, true, false}, // xyz{|}~
605 {false, false, false, false, false, false, false, false,// UnoParamValue
606 false, false, false, false, false, false, false, false,
607 false, false, false, false, false, false, false, false,
608 false, false, false, false, false, false, false, false,
609 false, true, false, false, true, false, true, true, // !"#$%&'
610 true, true, true, true, false, true, true, true, // ()*+,-./
611 true, true, true, true, true, true, true, true, // 01234567
612 true, true, true, false, false, false, false, true, // 89:;<=>?
613 true, true, true, true, true, true, true, true, // @ABCDEFG
614 true, true, true, true, true, true, true, true, // HIJKLMNO
615 true, true, true, true, true, true, true, true, // PQRSTUVW
616 true, true, true, false, false, false, false, true, // XYZ[\]^_
617 false, true, true, true, true, true, true, true, // `abcdefg
618 true, true, true, true, true, true, true, true, // hijklmno
619 true, true, true, true, true, true, true, true, // pqrstuvw
620 true, true, true, false, false, false, true, false}}; // xyz{|}~
622 assert(
623 (eCharClass >= 0
624 && (sal::static_int_cast< std::size_t >(eCharClass)
625 < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
626 return aCharClass[eCharClass];
629 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
630 rtl_UriEncodeMechanism eMechanism,
631 rtl_TextEncoding eCharset, rtl_uString ** pResult)
632 SAL_THROW_EXTERN_C()
634 assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
636 sal_Unicode const * p = pText->buffer;
637 sal_Unicode const * pEnd = p + pText->length;
638 sal_Int32 nCapacity = pText->length;
639 rtl_uString_new_WithLength(pResult, nCapacity);
641 while (p < pEnd)
643 EscapeType eType;
644 sal_uInt32 nUtf32 = readUcs4(
645 &p, pEnd,
646 (eMechanism == rtl_UriEncodeKeepEscapes
647 || eMechanism == rtl_UriEncodeCheckEscapes
648 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
649 eCharset, &eType);
651 switch (eType)
653 case EscapeNo:
654 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
656 writeUnicode(pResult, &nCapacity,
657 static_cast< sal_Unicode >(nUtf32));
659 else if (!writeEscapeChar(
660 pResult, &nCapacity, nUtf32, eCharset,
661 (eMechanism == rtl_UriEncodeStrict
662 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
664 rtl_uString_new(pResult);
665 return;
667 break;
669 case EscapeChar:
670 if (eMechanism == rtl_UriEncodeCheckEscapes
671 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
673 writeUnicode(pResult, &nCapacity,
674 static_cast< sal_Unicode >(nUtf32));
676 else if (!writeEscapeChar(
677 pResult, &nCapacity, nUtf32, eCharset,
678 (eMechanism == rtl_UriEncodeStrict
679 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
681 rtl_uString_new(pResult);
682 return;
684 break;
686 case EscapeOctet:
687 writeEscapeOctet(pResult, &nCapacity, nUtf32);
688 break;
691 *pResult = rtl_uStringBuffer_makeStringAndClear(pResult, &nCapacity);
694 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
695 rtl_UriDecodeMechanism eMechanism,
696 rtl_TextEncoding eCharset, rtl_uString ** pResult)
697 SAL_THROW_EXTERN_C()
699 switch (eMechanism)
701 case rtl_UriDecodeNone:
702 rtl_uString_assign(pResult, pText);
703 break;
705 case rtl_UriDecodeToIuri:
706 eCharset = RTL_TEXTENCODING_UTF8;
707 SAL_FALLTHROUGH;
708 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
710 sal_Unicode const * p = pText->buffer;
711 sal_Unicode const * pEnd = p + pText->length;
712 sal_Int32 nCapacity = pText->length;
713 rtl_uString_new_WithLength(pResult, nCapacity);
715 while (p < pEnd)
717 EscapeType eType;
718 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
719 switch (eType)
721 case EscapeChar:
722 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
724 writeEscapeOctet(pResult, &nCapacity, nUtf32);
725 break;
727 SAL_FALLTHROUGH;
729 case EscapeNo:
730 writeUcs4(pResult, &nCapacity, nUtf32);
731 break;
733 case EscapeOctet:
734 if (eMechanism == rtl_UriDecodeStrict)
736 rtl_uString_new(pResult);
737 return;
739 writeEscapeOctet(pResult, &nCapacity, nUtf32);
740 break;
744 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
746 break;
750 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
751 rtl_uString * pRelUriRef,
752 rtl_uString ** pResult,
753 rtl_uString ** pException)
754 SAL_THROW_EXTERN_C()
756 // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
757 // relative URI into an absolute one:
758 OUStringBuffer aBuffer;
759 Components aRelComponents;
760 parseUriRef(pRelUriRef, &aRelComponents);
762 if (aRelComponents.aScheme.isPresent())
764 aBuffer.append(aRelComponents.aScheme.pBegin,
765 aRelComponents.aScheme.getLength());
767 if (aRelComponents.aAuthority.isPresent())
769 aBuffer.append(aRelComponents.aAuthority.pBegin,
770 aRelComponents.aAuthority.getLength());
773 appendPath(
774 aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
775 aRelComponents.aPath.pEnd);
777 if (aRelComponents.aQuery.isPresent())
779 aBuffer.append(aRelComponents.aQuery.pBegin,
780 aRelComponents.aQuery.getLength());
783 else
785 Components aBaseComponents;
786 parseUriRef(pBaseUriRef, &aBaseComponents);
787 if (!aBaseComponents.aScheme.isPresent())
789 rtl_uString_assign(
790 pException,
791 (OUString(
792 "<" + OUString(pBaseUriRef)
793 + "> does not start with a scheme component")
794 .pData));
795 return false;
798 aBuffer.append(aBaseComponents.aScheme.pBegin,
799 aBaseComponents.aScheme.getLength());
800 if (aRelComponents.aAuthority.isPresent())
802 aBuffer.append(aRelComponents.aAuthority.pBegin,
803 aRelComponents.aAuthority.getLength());
804 appendPath(
805 aBuffer, aBuffer.getLength(), false,
806 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
808 if (aRelComponents.aQuery.isPresent())
810 aBuffer.append(aRelComponents.aQuery.pBegin,
811 aRelComponents.aQuery.getLength());
814 else
816 if (aBaseComponents.aAuthority.isPresent())
818 aBuffer.append(aBaseComponents.aAuthority.pBegin,
819 aBaseComponents.aAuthority.getLength());
822 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
824 aBuffer.append(aBaseComponents.aPath.pBegin,
825 aBaseComponents.aPath.getLength());
826 if (aRelComponents.aQuery.isPresent())
828 aBuffer.append(aRelComponents.aQuery.pBegin,
829 aRelComponents.aQuery.getLength());
831 else if (aBaseComponents.aQuery.isPresent())
833 aBuffer.append(aBaseComponents.aQuery.pBegin,
834 aBaseComponents.aQuery.getLength());
837 else
839 if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
840 && *aRelComponents.aPath.pBegin == '/')
842 appendPath(
843 aBuffer, aBuffer.getLength(), false,
844 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
846 else if (aBaseComponents.aAuthority.isPresent()
847 && aBaseComponents.aPath.pBegin
848 == aBaseComponents.aPath.pEnd)
850 appendPath(
851 aBuffer, aBuffer.getLength(), true,
852 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
854 else
856 sal_Int32 n = aBuffer.getLength();
857 sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
858 aBaseComponents.aPath.pBegin,
859 aBaseComponents.aPath.getLength(), '/');
861 if (i >= 0)
863 appendPath(
864 aBuffer, n, false, aBaseComponents.aPath.pBegin,
865 aBaseComponents.aPath.pBegin + i);
868 appendPath(
869 aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
870 aRelComponents.aPath.pEnd);
873 if (aRelComponents.aQuery.isPresent())
875 aBuffer.append(aRelComponents.aQuery.pBegin,
876 aRelComponents.aQuery.getLength());
881 if (aRelComponents.aFragment.isPresent())
883 aBuffer.append(aRelComponents.aFragment.pBegin,
884 aRelComponents.aFragment.getLength());
887 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
888 return true;
891 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */