merged tag ooo/DEV300_m102
[LibreOffice.git] / sal / rtl / source / uri.cxx
blob551c4f199251b8841af658d7fe911496ea4a9458
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sal.hxx"
31 #include "rtl/uri.h"
33 #include "surrogates.h"
35 #include "osl/diagnose.h"
36 #include "rtl/strbuf.hxx"
37 #include "rtl/textenc.h"
38 #include "rtl/textcvt.h"
39 #include "rtl/uri.h"
40 #include "rtl/ustrbuf.h"
41 #include "rtl/ustrbuf.hxx"
42 #include "rtl/ustring.h"
43 #include "rtl/ustring.hxx"
44 #include "sal/types.h"
46 #include <cstddef>
48 namespace {
50 std::size_t const nCharClassSize = 128;
52 sal_Unicode const cEscapePrefix = 0x25; // '%'
54 inline bool isDigit(sal_uInt32 nUtf32)
56 return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
59 inline bool isAlpha(sal_uInt32 nUtf32)
61 // 'A'--'Z', 'a'--'z'
62 return (
63 (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
64 (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
68 inline bool isHighSurrogate(sal_uInt32 nUtf16)
70 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
73 inline bool isLowSurrogate(sal_uInt32 nUtf16)
75 return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
78 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
80 return SAL_RTL_COMBINE_SURROGATES(high, low);
83 inline int getHexWeight(sal_uInt32 nUtf32)
85 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
86 static_cast< int >(nUtf32 - 0x30) :
87 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
88 static_cast< int >(nUtf32 - 0x41 + 10) :
89 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
90 static_cast< int >(nUtf32 - 0x61 + 10) :
91 -1; // not a hex digit
94 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
96 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
99 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
100 sal_Unicode cChar)
102 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
105 enum EscapeType
107 EscapeNo,
108 EscapeChar,
109 EscapeOctet
112 /* Read any of the following:
114 - sequence of escape sequences representing character from eCharset,
115 translated to single UCS4 character; or
117 - pair of UTF-16 surrogates, translated to single UCS4 character; or
119 _ single UTF-16 character, extended to UCS4 character.
121 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
122 bool bEncoded, rtl_TextEncoding eCharset,
123 EscapeType * pType)
125 sal_uInt32 nChar = *(*pBegin)++;
126 int nWeight1;
127 int nWeight2;
128 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
129 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
130 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
132 *pBegin += 2;
133 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
134 if (nChar <= 0x7F)
135 *pType = EscapeChar;
136 else if (eCharset == RTL_TEXTENCODING_UTF8)
138 if (nChar >= 0xC0 && nChar <= 0xF4)
140 sal_uInt32 nEncoded;
141 int nShift;
142 sal_uInt32 nMin;
143 if (nChar <= 0xDF)
145 nEncoded = (nChar & 0x1F) << 6;
146 nShift = 0;
147 nMin = 0x80;
149 else if (nChar <= 0xEF)
151 nEncoded = (nChar & 0x0F) << 12;
152 nShift = 6;
153 nMin = 0x800;
155 else
157 nEncoded = (nChar & 0x07) << 18;
158 nShift = 12;
159 nMin = 0x10000;
161 sal_Unicode const * p = *pBegin;
162 bool bUTF8 = true;
163 for (; nShift >= 0; nShift -= 6)
165 if (pEnd - p < 3 || p[0] != cEscapePrefix
166 || (nWeight1 = getHexWeight(p[1])) < 8
167 || nWeight1 > 11
168 || (nWeight2 = getHexWeight(p[2])) < 0)
170 bUTF8 = sal_False;
171 break;
173 p += 3;
174 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
176 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
177 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
179 *pBegin = p;
180 *pType = EscapeChar;
181 return nEncoded;
184 *pType = EscapeOctet;
186 else
188 rtl::OStringBuffer aBuf;
189 aBuf.append(static_cast< char >(nChar));
190 rtl_TextToUnicodeConverter aConverter
191 = rtl_createTextToUnicodeConverter(eCharset);
192 sal_Unicode const * p = *pBegin;
193 for (;;)
195 sal_Unicode aDst[2];
196 sal_uInt32 nInfo;
197 sal_Size nConverted;
198 sal_Size nDstSize = rtl_convertTextToUnicode(
199 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
200 sizeof aDst / sizeof aDst[0],
201 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
202 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
203 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
204 &nInfo, &nConverted);
205 if (nInfo == 0)
207 OSL_ASSERT(
208 nConverted
209 == sal::static_int_cast< sal_uInt32 >(
210 aBuf.getLength()));
211 rtl_destroyTextToUnicodeConverter(aConverter);
212 *pBegin = p;
213 *pType = EscapeChar;
214 OSL_ASSERT(
215 nDstSize == 1
216 || (nDstSize == 2 && isHighSurrogate(aDst[0])
217 && isLowSurrogate(aDst[1])));
218 return nDstSize == 1
219 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
221 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
222 && pEnd - p >= 3 && p[0] == cEscapePrefix
223 && (nWeight1 = getHexWeight(p[1])) >= 0
224 && (nWeight2 = getHexWeight(p[2])) >= 0)
226 p += 3;
227 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
229 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
230 && p != pEnd && *p <= 0x7F)
232 aBuf.append(static_cast< char >(*p++));
234 else
236 OSL_ASSERT(
237 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
238 == 0);
239 break;
242 rtl_destroyTextToUnicodeConverter(aConverter);
243 *pType = EscapeOctet;
245 return nChar;
247 else
249 *pType = EscapeNo;
250 return isHighSurrogate(nChar) && *pBegin < pEnd
251 && isLowSurrogate(**pBegin) ?
252 combineSurrogates(nChar, *(*pBegin)++) : nChar;
256 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
258 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
259 if (nUtf32 <= 0xFFFF) {
260 writeUnicode(
261 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
262 } else {
263 nUtf32 -= 0x10000;
264 writeUnicode(
265 pBuffer, pCapacity,
266 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
267 writeUnicode(
268 pBuffer, pCapacity,
269 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
273 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
274 sal_uInt32 nOctet)
276 OSL_ENSURE(nOctet <= 0xFF, "bad octet");
278 static sal_Unicode const aHex[16]
279 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
280 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
282 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
283 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
284 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
287 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
288 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
290 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
291 if (eCharset == RTL_TEXTENCODING_UTF8) {
292 if (nUtf32 < 0x80)
293 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
294 else if (nUtf32 < 0x800)
296 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
297 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
299 else if (nUtf32 < 0x10000)
301 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
302 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
303 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
305 else
307 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
308 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
309 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
310 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
312 } else {
313 rtl_UnicodeToTextConverter aConverter
314 = rtl_createUnicodeToTextConverter(eCharset);
315 sal_Unicode aSrc[2];
316 sal_Size nSrcSize;
317 if (nUtf32 <= 0xFFFF)
319 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
320 nSrcSize = 1;
322 else
324 aSrc[0] = static_cast< sal_Unicode >(
325 ((nUtf32 - 0x10000) >> 10) | 0xD800);
326 aSrc[1] = static_cast< sal_Unicode >(
327 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
328 nSrcSize = 2;
330 sal_Char aDst[32]; // FIXME random value
331 sal_uInt32 nInfo;
332 sal_Size nConverted;
333 sal_Size nDstSize = rtl_convertUnicodeToText(
334 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
335 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
336 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
337 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
338 &nInfo, &nConverted);
339 OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
340 rtl_destroyUnicodeToTextConverter(aConverter);
341 if (nInfo == 0) {
342 OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
343 for (sal_Size i = 0; i < nDstSize; ++i)
344 writeEscapeOctet(pBuffer, pCapacity,
345 static_cast< unsigned char >(aDst[i]));
346 // FIXME all octets are escaped, even if there is no need
347 } else {
348 if (bStrict) {
349 return false;
350 } else {
351 writeUcs4(pBuffer, pCapacity, nUtf32);
355 return true;
358 struct Component
360 sal_Unicode const * pBegin;
361 sal_Unicode const * pEnd;
363 inline Component(): pBegin(0) {}
365 inline bool isPresent() const { return pBegin != 0; }
367 inline sal_Int32 getLength() const;
370 inline sal_Int32 Component::getLength() const
372 OSL_ENSURE(isPresent(), "taking length of non-present component");
373 return static_cast< sal_Int32 >(pEnd - pBegin);
376 struct Components
378 Component aScheme;
379 Component aAuthority;
380 Component aPath;
381 Component aQuery;
382 Component aFragment;
385 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
387 // This algorithm is liberal and accepts various forms of illegal input.
389 sal_Unicode const * pBegin = pUriRef->buffer;
390 sal_Unicode const * pEnd = pBegin + pUriRef->length;
391 sal_Unicode const * pPos = pBegin;
393 if (pPos != pEnd && isAlpha(*pPos))
394 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
395 if (*p == ':')
397 pComponents->aScheme.pBegin = pBegin;
398 pComponents->aScheme.pEnd = ++p;
399 pPos = p;
400 break;
402 else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
403 && *p != '.')
404 break;
406 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
408 pComponents->aAuthority.pBegin = pPos;
409 pPos += 2;
410 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
411 ++pPos;
412 pComponents->aAuthority.pEnd = pPos;
415 pComponents->aPath.pBegin = pPos;
416 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
417 ++pPos;
418 pComponents->aPath.pEnd = pPos;
420 if (pPos != pEnd && *pPos == '?')
422 pComponents->aQuery.pBegin = pPos++;
423 while (pPos != pEnd && * pPos != '#')
424 ++pPos;
425 pComponents->aQuery.pEnd = pPos;
428 if (pPos != pEnd)
430 OSL_ASSERT(*pPos == '#');
431 pComponents->aFragment.pBegin = pPos;
432 pComponents->aFragment.pEnd = pEnd;
436 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
438 OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
439 OSL_ASSERT(rRelPath.isPresent());
441 // The invariant of aBuffer is that it always starts and ends with a slash
442 // (until probably right at the end of the algorithm, when the last segment
443 // of rRelPath is added, which does not necessarily end in a slash):
444 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
445 // XXX numeric overflow
447 // Segments "." and ".." within rBasePath are not conisdered special (but
448 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
449 // bit unclear about this point:
450 sal_Int32 nFixed = 1;
451 sal_Unicode const * p = rBasePath.pBegin + 1;
452 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
453 if (*q == '/')
455 if (
456 (q - p == 1 && p[0] == '.') ||
457 (q - p == 2 && p[0] == '.' && p[1] == '.')
460 nFixed = q + 1 - rBasePath.pBegin;
462 p = q + 1;
464 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
466 p = rRelPath.pBegin;
467 if (p != rRelPath.pEnd)
468 for (;;)
470 sal_Unicode const * q = p;
471 sal_Unicode const * r;
472 for (;;)
474 if (q == rRelPath.pEnd)
476 r = q;
477 break;
479 if (*q == '/')
481 r = q + 1;
482 break;
484 ++q;
486 if (q - p == 2 && p[0] == '.' && p[1] == '.')
488 // Erroneous excess segments ".." within rRelPath are left
489 // intact, as the examples in RFC 2396, section C.2, suggest:
490 sal_Int32 i = aBuffer.getLength() - 1;
491 if (i < nFixed)
493 aBuffer.append(p, r - p);
494 nFixed += 3;
496 else
498 while (aBuffer.charAt(i - 1) != '/')
499 --i;
500 aBuffer.setLength(i);
503 else if (q - p != 1 || *p != '.')
504 aBuffer.append(p, r - p);
505 if (q == rRelPath.pEnd)
506 break;
507 p = q + 1;
510 return aBuffer.makeStringAndClear();
515 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
516 SAL_THROW_EXTERN_C()
518 static sal_Bool const aCharClass[][nCharClassSize]
519 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
528 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
534 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
537 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
543 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
546 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
551 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
552 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
555 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
561 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
564 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
568 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
569 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
570 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
571 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
573 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
577 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
579 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
582 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
586 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
588 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
591 OSL_ENSURE(
592 (eCharClass >= 0
593 && (sal::static_int_cast< std::size_t >(eCharClass)
594 < sizeof aCharClass / sizeof aCharClass[0])),
595 "bad eCharClass");
596 return aCharClass[eCharClass];
599 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
600 rtl_UriEncodeMechanism eMechanism,
601 rtl_TextEncoding eCharset, rtl_uString ** pResult)
602 SAL_THROW_EXTERN_C()
604 OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
605 // make sure the percent sign is encoded...
607 sal_Unicode const * p = pText->buffer;
608 sal_Unicode const * pEnd = p + pText->length;
609 sal_Int32 nCapacity = 0;
610 rtl_uString_new(pResult);
611 while (p < pEnd)
613 EscapeType eType;
614 sal_uInt32 nUtf32 = readUcs4(
615 &p, pEnd,
616 (eMechanism == rtl_UriEncodeKeepEscapes
617 || eMechanism == rtl_UriEncodeCheckEscapes
618 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
619 eCharset, &eType);
620 switch (eType)
622 case EscapeNo:
623 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
624 writeUnicode(pResult, &nCapacity,
625 static_cast< sal_Unicode >(nUtf32));
626 else if (!writeEscapeChar(
627 pResult, &nCapacity, nUtf32, eCharset,
628 (eMechanism == rtl_UriEncodeStrict
629 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
631 rtl_uString_new(pResult);
632 return;
634 break;
636 case EscapeChar:
637 if (eMechanism == rtl_UriEncodeCheckEscapes
638 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
639 writeUnicode(pResult, &nCapacity,
640 static_cast< sal_Unicode >(nUtf32));
641 else if (!writeEscapeChar(
642 pResult, &nCapacity, nUtf32, eCharset,
643 (eMechanism == rtl_UriEncodeStrict
644 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
646 rtl_uString_new(pResult);
647 return;
649 break;
651 case EscapeOctet:
652 writeEscapeOctet(pResult, &nCapacity, nUtf32);
653 break;
658 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
659 rtl_UriDecodeMechanism eMechanism,
660 rtl_TextEncoding eCharset, rtl_uString ** pResult)
661 SAL_THROW_EXTERN_C()
663 switch (eMechanism)
665 case rtl_UriDecodeNone:
666 rtl_uString_assign(pResult, pText);
667 break;
669 case rtl_UriDecodeToIuri:
670 eCharset = RTL_TEXTENCODING_UTF8;
671 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
673 sal_Unicode const * p = pText->buffer;
674 sal_Unicode const * pEnd = p + pText->length;
675 sal_Int32 nCapacity = 0;
676 rtl_uString_new(pResult);
677 while (p < pEnd)
679 EscapeType eType;
680 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
681 switch (eType)
683 case EscapeChar:
684 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
686 writeEscapeOctet(pResult, &nCapacity, nUtf32);
687 break;
689 case EscapeNo:
690 writeUcs4(pResult, &nCapacity, nUtf32);
691 break;
693 case EscapeOctet:
694 if (eMechanism == rtl_UriDecodeStrict) {
695 rtl_uString_new(pResult);
696 return;
698 writeEscapeOctet(pResult, &nCapacity, nUtf32);
699 break;
703 break;
707 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
708 rtl_uString * pRelUriRef,
709 rtl_uString ** pResult,
710 rtl_uString ** pException)
711 SAL_THROW_EXTERN_C()
713 // If pRelUriRef starts with a scheme component it is an absolute URI
714 // reference, and we are done (i.e., this algorithm does not support
715 // backwards-compatible relative URIs starting with a scheme component, see
716 // RFC 2396, section 5.2, step 3):
717 Components aRelComponents;
718 parseUriRef(pRelUriRef, &aRelComponents);
719 if (aRelComponents.aScheme.isPresent())
721 rtl_uString_assign(pResult, pRelUriRef);
722 return true;
725 // Parse pBaseUriRef; if the scheme component is not present or not valid,
726 // or the path component is not empty and starts with anything but a slash,
727 // an exception is raised:
728 Components aBaseComponents;
729 parseUriRef(pBaseUriRef, &aBaseComponents);
730 if (!aBaseComponents.aScheme.isPresent())
732 rtl::OUString aMessage(pBaseUriRef);
733 aMessage += rtl::OUString(
734 RTL_CONSTASCII_USTRINGPARAM(
735 " does not start with a scheme component"));
736 rtl_uString_assign(pException,
737 const_cast< rtl::OUString & >(aMessage).pData);
738 return false;
740 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
741 && *aBaseComponents.aPath.pBegin != '/')
743 rtl::OUString aMessage(pBaseUriRef);
744 aMessage += rtl::OUString(
745 RTL_CONSTASCII_USTRINGPARAM(
746 "path component does not start with slash"));
747 rtl_uString_assign(pException, aMessage.pData);
748 return false;
751 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
752 // into an absolute one (if the relative URI is a reference to the "current
753 // document," the "current document" is here taken to be the base URI):
754 rtl::OUStringBuffer aBuffer;
755 aBuffer.append(aBaseComponents.aScheme.pBegin,
756 aBaseComponents.aScheme.getLength());
757 if (aRelComponents.aAuthority.isPresent())
759 aBuffer.append(aRelComponents.aAuthority.pBegin,
760 aRelComponents.aAuthority.getLength());
761 aBuffer.append(aRelComponents.aPath.pBegin,
762 aRelComponents.aPath.getLength());
763 if (aRelComponents.aQuery.isPresent())
764 aBuffer.append(aRelComponents.aQuery.pBegin,
765 aRelComponents.aQuery.getLength());
767 else
769 if (aBaseComponents.aAuthority.isPresent())
770 aBuffer.append(aBaseComponents.aAuthority.pBegin,
771 aBaseComponents.aAuthority.getLength());
772 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
773 && !aRelComponents.aQuery.isPresent())
775 aBuffer.append(aBaseComponents.aPath.pBegin,
776 aBaseComponents.aPath.getLength());
777 if (aBaseComponents.aQuery.isPresent())
778 aBuffer.append(aBaseComponents.aQuery.pBegin,
779 aBaseComponents.aQuery.getLength());
781 else
783 if (*aRelComponents.aPath.pBegin == '/')
784 aBuffer.append(aRelComponents.aPath.pBegin,
785 aRelComponents.aPath.getLength());
786 else
787 aBuffer.append(joinPaths(aBaseComponents.aPath,
788 aRelComponents.aPath));
789 if (aRelComponents.aQuery.isPresent())
790 aBuffer.append(aRelComponents.aQuery.pBegin,
791 aRelComponents.aQuery.getLength());
794 if (aRelComponents.aFragment.isPresent())
795 aBuffer.append(aRelComponents.aFragment.pBegin,
796 aRelComponents.aFragment.getLength());
797 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
798 return true;