update dev300-m57
[ooovba.git] / sal / rtl / source / uri.cxx
bloba60cce814292e0a6d2d8ad9b2d7227a56012397e
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: uri.cxx,v $
10 * $Revision: 1.12 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sal.hxx"
34 #include "rtl/uri.h"
36 #include "surrogates.h"
38 #include "osl/diagnose.h"
39 #include "rtl/strbuf.hxx"
40 #include "rtl/textenc.h"
41 #include "rtl/textcvt.h"
42 #include "rtl/uri.h"
43 #include "rtl/ustrbuf.h"
44 #include "rtl/ustrbuf.hxx"
45 #include "rtl/ustring.h"
46 #include "rtl/ustring.hxx"
47 #include "sal/types.h"
49 #include <cstddef>
51 namespace {
53 std::size_t const nCharClassSize = 128;
55 sal_Unicode const cEscapePrefix = 0x25; // '%'
57 inline bool isDigit(sal_uInt32 nUtf32)
59 return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
62 inline bool isAlpha(sal_uInt32 nUtf32)
64 // 'A'--'Z', 'a'--'z'
65 return (
66 (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
67 (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
71 inline bool isHighSurrogate(sal_uInt32 nUtf16)
73 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
76 inline bool isLowSurrogate(sal_uInt32 nUtf16)
78 return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
81 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
83 return SAL_RTL_COMBINE_SURROGATES(high, low);
86 inline int getHexWeight(sal_uInt32 nUtf32)
88 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
89 static_cast< int >(nUtf32 - 0x30) :
90 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
91 static_cast< int >(nUtf32 - 0x41 + 10) :
92 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
93 static_cast< int >(nUtf32 - 0x61 + 10) :
94 -1; // not a hex digit
97 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
99 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
102 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
103 sal_Unicode cChar)
105 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
108 enum EscapeType
110 EscapeNo,
111 EscapeChar,
112 EscapeOctet
115 /* Read any of the following:
117 - sequence of escape sequences representing character from eCharset,
118 translated to single UCS4 character; or
120 - pair of UTF-16 surrogates, translated to single UCS4 character; or
122 _ single UTF-16 character, extended to UCS4 character.
124 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
125 bool bEncoded, rtl_TextEncoding eCharset,
126 EscapeType * pType)
128 sal_uInt32 nChar = *(*pBegin)++;
129 int nWeight1;
130 int nWeight2;
131 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
132 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
133 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
135 *pBegin += 2;
136 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
137 if (nChar <= 0x7F)
138 *pType = EscapeChar;
139 else if (eCharset == RTL_TEXTENCODING_UTF8)
141 if (nChar >= 0xC0 && nChar <= 0xF4)
143 sal_uInt32 nEncoded;
144 int nShift;
145 sal_uInt32 nMin;
146 if (nChar <= 0xDF)
148 nEncoded = (nChar & 0x1F) << 6;
149 nShift = 0;
150 nMin = 0x80;
152 else if (nChar <= 0xEF)
154 nEncoded = (nChar & 0x0F) << 12;
155 nShift = 6;
156 nMin = 0x800;
158 else
160 nEncoded = (nChar & 0x07) << 18;
161 nShift = 12;
162 nMin = 0x10000;
164 sal_Unicode const * p = *pBegin;
165 bool bUTF8 = true;
166 for (; nShift >= 0; nShift -= 6)
168 if (pEnd - p < 3 || p[0] != cEscapePrefix
169 || (nWeight1 = getHexWeight(p[1])) < 8
170 || nWeight1 > 11
171 || (nWeight2 = getHexWeight(p[2])) < 0)
173 bUTF8 = sal_False;
174 break;
176 p += 3;
177 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
179 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
180 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
182 *pBegin = p;
183 *pType = EscapeChar;
184 return nEncoded;
187 *pType = EscapeOctet;
189 else
191 rtl::OStringBuffer aBuf;
192 aBuf.append(static_cast< char >(nChar));
193 rtl_TextToUnicodeConverter aConverter
194 = rtl_createTextToUnicodeConverter(eCharset);
195 sal_Unicode const * p = *pBegin;
196 for (;;)
198 sal_Unicode aDst[2];
199 sal_uInt32 nInfo;
200 sal_Size nConverted;
201 sal_Size nDstSize = rtl_convertTextToUnicode(
202 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
203 sizeof aDst / sizeof aDst[0],
204 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
205 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
206 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
207 &nInfo, &nConverted);
208 if (nInfo == 0)
210 OSL_ASSERT(
211 nConverted
212 == sal::static_int_cast< sal_uInt32 >(
213 aBuf.getLength()));
214 rtl_destroyTextToUnicodeConverter(aConverter);
215 *pBegin = p;
216 *pType = EscapeChar;
217 OSL_ASSERT(
218 nDstSize == 1
219 || (nDstSize == 2 && isHighSurrogate(aDst[0])
220 && isLowSurrogate(aDst[1])));
221 return nDstSize == 1
222 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
224 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
225 && pEnd - p >= 3 && p[0] == cEscapePrefix
226 && (nWeight1 = getHexWeight(p[1])) >= 0
227 && (nWeight2 = getHexWeight(p[2])) >= 0)
229 p += 3;
230 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
232 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
233 && p != pEnd && *p <= 0x7F)
235 aBuf.append(static_cast< char >(*p++));
237 else
239 OSL_ASSERT(
240 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
241 == 0);
242 break;
245 rtl_destroyTextToUnicodeConverter(aConverter);
246 *pType = EscapeOctet;
248 return nChar;
250 else
252 *pType = EscapeNo;
253 return isHighSurrogate(nChar) && *pBegin < pEnd
254 && isLowSurrogate(**pBegin) ?
255 combineSurrogates(nChar, *(*pBegin)++) : nChar;
259 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
261 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
262 if (nUtf32 <= 0xFFFF) {
263 writeUnicode(
264 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
265 } else {
266 nUtf32 -= 0x10000;
267 writeUnicode(
268 pBuffer, pCapacity,
269 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
270 writeUnicode(
271 pBuffer, pCapacity,
272 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
276 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
277 sal_uInt32 nOctet)
279 OSL_ENSURE(nOctet <= 0xFF, "bad octet");
281 static sal_Unicode const aHex[16]
282 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
283 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
285 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
286 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
287 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
290 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
291 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
293 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
294 if (eCharset == RTL_TEXTENCODING_UTF8) {
295 if (nUtf32 < 0x80)
296 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
297 else if (nUtf32 < 0x800)
299 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
300 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
302 else if (nUtf32 < 0x10000)
304 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
305 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
306 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
308 else
310 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
311 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
312 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
313 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
315 } else {
316 rtl_UnicodeToTextConverter aConverter
317 = rtl_createUnicodeToTextConverter(eCharset);
318 sal_Unicode aSrc[2];
319 sal_Size nSrcSize;
320 if (nUtf32 <= 0xFFFF)
322 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
323 nSrcSize = 1;
325 else
327 aSrc[0] = static_cast< sal_Unicode >(
328 ((nUtf32 - 0x10000) >> 10) | 0xD800);
329 aSrc[1] = static_cast< sal_Unicode >(
330 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
331 nSrcSize = 2;
333 sal_Char aDst[32]; // FIXME random value
334 sal_uInt32 nInfo;
335 sal_Size nConverted;
336 sal_Size nDstSize = rtl_convertUnicodeToText(
337 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
338 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
339 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
340 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
341 &nInfo, &nConverted);
342 OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
343 rtl_destroyUnicodeToTextConverter(aConverter);
344 if (nInfo == 0) {
345 OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
346 for (sal_Size i = 0; i < nDstSize; ++i)
347 writeEscapeOctet(pBuffer, pCapacity,
348 static_cast< unsigned char >(aDst[i]));
349 // FIXME all octets are escaped, even if there is no need
350 } else {
351 if (bStrict) {
352 return false;
353 } else {
354 writeUcs4(pBuffer, pCapacity, nUtf32);
358 return true;
361 struct Component
363 sal_Unicode const * pBegin;
364 sal_Unicode const * pEnd;
366 inline Component(): pBegin(0) {}
368 inline bool isPresent() const { return pBegin != 0; }
370 inline sal_Int32 getLength() const;
373 inline sal_Int32 Component::getLength() const
375 OSL_ENSURE(isPresent(), "taking length of non-present component");
376 return static_cast< sal_Int32 >(pEnd - pBegin);
379 struct Components
381 Component aScheme;
382 Component aAuthority;
383 Component aPath;
384 Component aQuery;
385 Component aFragment;
388 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
390 // This algorithm is liberal and accepts various forms of illegal input.
392 sal_Unicode const * pBegin = pUriRef->buffer;
393 sal_Unicode const * pEnd = pBegin + pUriRef->length;
394 sal_Unicode const * pPos = pBegin;
396 if (pPos != pEnd && isAlpha(*pPos))
397 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
398 if (*p == ':')
400 pComponents->aScheme.pBegin = pBegin;
401 pComponents->aScheme.pEnd = ++p;
402 pPos = p;
403 break;
405 else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
406 && *p != '.')
407 break;
409 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
411 pComponents->aAuthority.pBegin = pPos;
412 pPos += 2;
413 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
414 ++pPos;
415 pComponents->aAuthority.pEnd = pPos;
418 pComponents->aPath.pBegin = pPos;
419 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
420 ++pPos;
421 pComponents->aPath.pEnd = pPos;
423 if (pPos != pEnd && *pPos == '?')
425 pComponents->aQuery.pBegin = pPos++;
426 while (pPos != pEnd && * pPos != '#')
427 ++pPos;
428 pComponents->aQuery.pEnd = pPos;
431 if (pPos != pEnd)
433 OSL_ASSERT(*pPos == '#');
434 pComponents->aFragment.pBegin = pPos;
435 pComponents->aFragment.pEnd = pEnd;
439 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
441 OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
442 OSL_ASSERT(rRelPath.isPresent());
444 // The invariant of aBuffer is that it always starts and ends with a slash
445 // (until probably right at the end of the algorithm, when the last segment
446 // of rRelPath is added, which does not necessarily end in a slash):
447 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
448 // XXX numeric overflow
450 // Segments "." and ".." within rBasePath are not conisdered special (but
451 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
452 // bit unclear about this point:
453 sal_Int32 nFixed = 1;
454 sal_Unicode const * p = rBasePath.pBegin + 1;
455 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
456 if (*q == '/')
458 if (
459 (q - p == 1 && p[0] == '.') ||
460 (q - p == 2 && p[0] == '.' && p[1] == '.')
463 nFixed = q + 1 - rBasePath.pBegin;
465 p = q + 1;
467 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
469 p = rRelPath.pBegin;
470 if (p != rRelPath.pEnd)
471 for (;;)
473 sal_Unicode const * q = p;
474 sal_Unicode const * r;
475 for (;;)
477 if (q == rRelPath.pEnd)
479 r = q;
480 break;
482 if (*q == '/')
484 r = q + 1;
485 break;
487 ++q;
489 if (q - p == 2 && p[0] == '.' && p[1] == '.')
491 // Erroneous excess segments ".." within rRelPath are left
492 // intact, as the examples in RFC 2396, section C.2, suggest:
493 sal_Int32 i = aBuffer.getLength() - 1;
494 if (i < nFixed)
496 aBuffer.append(p, r - p);
497 nFixed += 3;
499 else
501 while (aBuffer.charAt(i - 1) != '/')
502 --i;
503 aBuffer.setLength(i);
506 else if (q - p != 1 || *p != '.')
507 aBuffer.append(p, r - p);
508 if (q == rRelPath.pEnd)
509 break;
510 p = q + 1;
513 return aBuffer.makeStringAndClear();
518 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
519 SAL_THROW_EXTERN_C()
521 static sal_Bool const aCharClass[][nCharClassSize]
522 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
531 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
537 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
540 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
542 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
546 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
549 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
552 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
555 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
558 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
561 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
564 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
567 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
570 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
571 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
573 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
576 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
582 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
585 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
591 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
594 OSL_ENSURE(
595 (eCharClass >= 0
596 && (sal::static_int_cast< std::size_t >(eCharClass)
597 < sizeof aCharClass / sizeof aCharClass[0])),
598 "bad eCharClass");
599 return aCharClass[eCharClass];
602 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
603 rtl_UriEncodeMechanism eMechanism,
604 rtl_TextEncoding eCharset, rtl_uString ** pResult)
605 SAL_THROW_EXTERN_C()
607 OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
608 // make sure the percent sign is encoded...
610 sal_Unicode const * p = pText->buffer;
611 sal_Unicode const * pEnd = p + pText->length;
612 sal_Int32 nCapacity = 0;
613 rtl_uString_new(pResult);
614 while (p < pEnd)
616 EscapeType eType;
617 sal_uInt32 nUtf32 = readUcs4(
618 &p, pEnd,
619 (eMechanism == rtl_UriEncodeKeepEscapes
620 || eMechanism == rtl_UriEncodeCheckEscapes
621 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
622 eCharset, &eType);
623 switch (eType)
625 case EscapeNo:
626 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
627 writeUnicode(pResult, &nCapacity,
628 static_cast< sal_Unicode >(nUtf32));
629 else if (!writeEscapeChar(
630 pResult, &nCapacity, nUtf32, eCharset,
631 (eMechanism == rtl_UriEncodeStrict
632 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
634 rtl_uString_new(pResult);
635 return;
637 break;
639 case EscapeChar:
640 if (eMechanism == rtl_UriEncodeCheckEscapes
641 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
642 writeUnicode(pResult, &nCapacity,
643 static_cast< sal_Unicode >(nUtf32));
644 else if (!writeEscapeChar(
645 pResult, &nCapacity, nUtf32, eCharset,
646 (eMechanism == rtl_UriEncodeStrict
647 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
649 rtl_uString_new(pResult);
650 return;
652 break;
654 case EscapeOctet:
655 writeEscapeOctet(pResult, &nCapacity, nUtf32);
656 break;
661 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
662 rtl_UriDecodeMechanism eMechanism,
663 rtl_TextEncoding eCharset, rtl_uString ** pResult)
664 SAL_THROW_EXTERN_C()
666 switch (eMechanism)
668 case rtl_UriDecodeNone:
669 rtl_uString_assign(pResult, pText);
670 break;
672 case rtl_UriDecodeToIuri:
673 eCharset = RTL_TEXTENCODING_UTF8;
674 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
676 sal_Unicode const * p = pText->buffer;
677 sal_Unicode const * pEnd = p + pText->length;
678 sal_Int32 nCapacity = 0;
679 rtl_uString_new(pResult);
680 while (p < pEnd)
682 EscapeType eType;
683 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
684 switch (eType)
686 case EscapeChar:
687 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
689 writeEscapeOctet(pResult, &nCapacity, nUtf32);
690 break;
692 case EscapeNo:
693 writeUcs4(pResult, &nCapacity, nUtf32);
694 break;
696 case EscapeOctet:
697 if (eMechanism == rtl_UriDecodeStrict) {
698 rtl_uString_new(pResult);
699 return;
701 writeEscapeOctet(pResult, &nCapacity, nUtf32);
702 break;
706 break;
710 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
711 rtl_uString * pRelUriRef,
712 rtl_uString ** pResult,
713 rtl_uString ** pException)
714 SAL_THROW_EXTERN_C()
716 // If pRelUriRef starts with a scheme component it is an absolute URI
717 // reference, and we are done (i.e., this algorithm does not support
718 // backwards-compatible relative URIs starting with a scheme component, see
719 // RFC 2396, section 5.2, step 3):
720 Components aRelComponents;
721 parseUriRef(pRelUriRef, &aRelComponents);
722 if (aRelComponents.aScheme.isPresent())
724 rtl_uString_assign(pResult, pRelUriRef);
725 return true;
728 // Parse pBaseUriRef; if the scheme component is not present or not valid,
729 // or the path component is not empty and starts with anything but a slash,
730 // an exception is raised:
731 Components aBaseComponents;
732 parseUriRef(pBaseUriRef, &aBaseComponents);
733 if (!aBaseComponents.aScheme.isPresent())
735 rtl::OUString aMessage(pBaseUriRef);
736 aMessage += rtl::OUString(
737 RTL_CONSTASCII_USTRINGPARAM(
738 " does not start with a scheme component"));
739 rtl_uString_assign(pException,
740 const_cast< rtl::OUString & >(aMessage).pData);
741 return false;
743 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
744 && *aBaseComponents.aPath.pBegin != '/')
746 rtl::OUString aMessage(pBaseUriRef);
747 aMessage += rtl::OUString(
748 RTL_CONSTASCII_USTRINGPARAM(
749 "path component does not start with slash"));
750 rtl_uString_assign(pException, aMessage.pData);
751 return false;
754 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
755 // into an absolute one (if the relative URI is a reference to the "current
756 // document," the "current document" is here taken to be the base URI):
757 rtl::OUStringBuffer aBuffer;
758 aBuffer.append(aBaseComponents.aScheme.pBegin,
759 aBaseComponents.aScheme.getLength());
760 if (aRelComponents.aAuthority.isPresent())
762 aBuffer.append(aRelComponents.aAuthority.pBegin,
763 aRelComponents.aAuthority.getLength());
764 aBuffer.append(aRelComponents.aPath.pBegin,
765 aRelComponents.aPath.getLength());
766 if (aRelComponents.aQuery.isPresent())
767 aBuffer.append(aRelComponents.aQuery.pBegin,
768 aRelComponents.aQuery.getLength());
770 else
772 if (aBaseComponents.aAuthority.isPresent())
773 aBuffer.append(aBaseComponents.aAuthority.pBegin,
774 aBaseComponents.aAuthority.getLength());
775 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
776 && !aRelComponents.aQuery.isPresent())
778 aBuffer.append(aBaseComponents.aPath.pBegin,
779 aBaseComponents.aPath.getLength());
780 if (aBaseComponents.aQuery.isPresent())
781 aBuffer.append(aBaseComponents.aQuery.pBegin,
782 aBaseComponents.aQuery.getLength());
784 else
786 if (*aRelComponents.aPath.pBegin == '/')
787 aBuffer.append(aRelComponents.aPath.pBegin,
788 aRelComponents.aPath.getLength());
789 else
790 aBuffer.append(joinPaths(aBaseComponents.aPath,
791 aRelComponents.aPath));
792 if (aRelComponents.aQuery.isPresent())
793 aBuffer.append(aRelComponents.aQuery.pBegin,
794 aRelComponents.aQuery.getLength());
797 if (aRelComponents.aFragment.isPresent())
798 aBuffer.append(aRelComponents.aFragment.pBegin,
799 aRelComponents.aFragment.getLength());
800 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
801 return true;