Bump version to 4.1-6
[LibreOffice.git] / sal / rtl / uri.cxx
blob7e4402f785c8299279a36051d732d70ae217460c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "surrogates.hxx"
22 #include "osl/diagnose.h"
23 #include "rtl/character.hxx"
24 #include "rtl/strbuf.hxx"
25 #include "rtl/textenc.h"
26 #include "rtl/textcvt.h"
27 #include "rtl/uri.h"
28 #include "rtl/ustrbuf.h"
29 #include "rtl/ustrbuf.hxx"
30 #include "rtl/ustring.h"
31 #include "rtl/ustring.hxx"
32 #include "sal/types.h"
33 #include "sal/macros.h"
35 #include <cstddef>
37 namespace {
39 std::size_t const nCharClassSize = 128;
41 sal_Unicode const cEscapePrefix = 0x25; // '%'
43 inline bool isHighSurrogate(sal_uInt32 nUtf16)
45 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
48 inline bool isLowSurrogate(sal_uInt32 nUtf16)
50 return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
53 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
55 return SAL_RTL_COMBINE_SURROGATES(high, low);
58 inline int getHexWeight(sal_uInt32 nUtf32)
60 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
61 static_cast< int >(nUtf32 - 0x30) :
62 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
63 static_cast< int >(nUtf32 - 0x41 + 10) :
64 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
65 static_cast< int >(nUtf32 - 0x61 + 10) :
66 -1; // not a hex digit
69 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
71 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
74 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
75 sal_Unicode cChar)
77 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
80 enum EscapeType
82 EscapeNo,
83 EscapeChar,
84 EscapeOctet
87 /* Read any of the following:
89 - sequence of escape sequences representing character from eCharset,
90 translated to single UCS4 character; or
92 - pair of UTF-16 surrogates, translated to single UCS4 character; or
94 _ single UTF-16 character, extended to UCS4 character.
96 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
97 bool bEncoded, rtl_TextEncoding eCharset,
98 EscapeType * pType)
100 sal_uInt32 nChar = *(*pBegin)++;
101 int nWeight1;
102 int nWeight2;
103 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
104 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
105 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
107 *pBegin += 2;
108 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
109 if (nChar <= 0x7F)
110 *pType = EscapeChar;
111 else if (eCharset == RTL_TEXTENCODING_UTF8)
113 if (nChar >= 0xC0 && nChar <= 0xF4)
115 sal_uInt32 nEncoded;
116 int nShift;
117 sal_uInt32 nMin;
118 if (nChar <= 0xDF)
120 nEncoded = (nChar & 0x1F) << 6;
121 nShift = 0;
122 nMin = 0x80;
124 else if (nChar <= 0xEF)
126 nEncoded = (nChar & 0x0F) << 12;
127 nShift = 6;
128 nMin = 0x800;
130 else
132 nEncoded = (nChar & 0x07) << 18;
133 nShift = 12;
134 nMin = 0x10000;
136 sal_Unicode const * p = *pBegin;
137 bool bUTF8 = true;
138 for (; nShift >= 0; nShift -= 6)
140 if (pEnd - p < 3 || p[0] != cEscapePrefix
141 || (nWeight1 = getHexWeight(p[1])) < 8
142 || nWeight1 > 11
143 || (nWeight2 = getHexWeight(p[2])) < 0)
145 bUTF8 = sal_False;
146 break;
148 p += 3;
149 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
151 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
152 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
154 *pBegin = p;
155 *pType = EscapeChar;
156 return nEncoded;
159 *pType = EscapeOctet;
161 else
163 rtl::OStringBuffer aBuf;
164 aBuf.append(static_cast< char >(nChar));
165 rtl_TextToUnicodeConverter aConverter
166 = rtl_createTextToUnicodeConverter(eCharset);
167 sal_Unicode const * p = *pBegin;
168 for (;;)
170 sal_Unicode aDst[2];
171 sal_uInt32 nInfo;
172 sal_Size nConverted;
173 sal_Size nDstSize = rtl_convertTextToUnicode(
174 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
175 SAL_N_ELEMENTS( aDst ),
176 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
177 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
178 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
179 &nInfo, &nConverted);
180 if (nInfo == 0)
182 assert( nConverted
183 == sal::static_int_cast< sal_uInt32 >(
184 aBuf.getLength()));
185 rtl_destroyTextToUnicodeConverter(aConverter);
186 *pBegin = p;
187 *pType = EscapeChar;
188 assert( nDstSize == 1
189 || (nDstSize == 2 && isHighSurrogate(aDst[0])
190 && isLowSurrogate(aDst[1])));
191 return nDstSize == 1
192 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
194 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
195 && pEnd - p >= 3 && p[0] == cEscapePrefix
196 && (nWeight1 = getHexWeight(p[1])) >= 0
197 && (nWeight2 = getHexWeight(p[2])) >= 0)
199 p += 3;
200 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
202 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
203 && p != pEnd && *p <= 0x7F)
205 aBuf.append(static_cast< char >(*p++));
207 else
209 assert(
210 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
211 == 0);
212 break;
215 rtl_destroyTextToUnicodeConverter(aConverter);
216 *pType = EscapeOctet;
218 return nChar;
220 else
222 *pType = EscapeNo;
223 return isHighSurrogate(nChar) && *pBegin < pEnd
224 && isLowSurrogate(**pBegin) ?
225 combineSurrogates(nChar, *(*pBegin)++) : nChar;
229 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
231 assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
232 if (nUtf32 <= 0xFFFF) {
233 writeUnicode(
234 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
235 } else {
236 nUtf32 -= 0x10000;
237 writeUnicode(
238 pBuffer, pCapacity,
239 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
240 writeUnicode(
241 pBuffer, pCapacity,
242 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
246 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
247 sal_uInt32 nOctet)
249 assert(nOctet <= 0xFF); // bad octet
251 static sal_Unicode const aHex[16]
252 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
253 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
255 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
256 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
257 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
260 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
261 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
263 assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
264 if (eCharset == RTL_TEXTENCODING_UTF8) {
265 if (nUtf32 < 0x80)
266 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
267 else if (nUtf32 < 0x800)
269 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
270 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
272 else if (nUtf32 < 0x10000)
274 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
275 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
276 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
278 else
280 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
281 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
282 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
283 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
285 } else {
286 rtl_UnicodeToTextConverter aConverter
287 = rtl_createUnicodeToTextConverter(eCharset);
288 sal_Unicode aSrc[2];
289 sal_Size nSrcSize;
290 if (nUtf32 <= 0xFFFF)
292 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
293 nSrcSize = 1;
295 else
297 aSrc[0] = static_cast< sal_Unicode >(
298 ((nUtf32 - 0x10000) >> 10) | 0xD800);
299 aSrc[1] = static_cast< sal_Unicode >(
300 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
301 nSrcSize = 2;
303 sal_Char aDst[32]; // FIXME random value
304 sal_uInt32 nInfo;
305 sal_Size nConverted;
306 sal_Size nDstSize = rtl_convertUnicodeToText(
307 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
308 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
309 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
310 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
311 &nInfo, &nConverted);
312 assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
313 rtl_destroyUnicodeToTextConverter(aConverter);
314 if (nInfo == 0) {
315 assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
316 for (sal_Size i = 0; i < nDstSize; ++i)
317 writeEscapeOctet(pBuffer, pCapacity,
318 static_cast< unsigned char >(aDst[i]));
319 // FIXME all octets are escaped, even if there is no need
320 } else {
321 if (bStrict) {
322 return false;
323 } else {
324 writeUcs4(pBuffer, pCapacity, nUtf32);
328 return true;
331 struct Component
333 sal_Unicode const * pBegin;
334 sal_Unicode const * pEnd;
336 inline Component(): pBegin(0), pEnd(0) {}
338 inline bool isPresent() const { return pBegin != 0; }
340 inline sal_Int32 getLength() const;
343 inline sal_Int32 Component::getLength() const
345 assert(isPresent()); // taking length of non-present component
346 return static_cast< sal_Int32 >(pEnd - pBegin);
349 struct Components
351 Component aScheme;
352 Component aAuthority;
353 Component aPath;
354 Component aQuery;
355 Component aFragment;
358 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
360 // This algorithm is liberal and accepts various forms of illegal input.
362 sal_Unicode const * pBegin = pUriRef->buffer;
363 sal_Unicode const * pEnd = pBegin + pUriRef->length;
364 sal_Unicode const * pPos = pBegin;
366 if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
368 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
370 if (*p == ':')
372 pComponents->aScheme.pBegin = pBegin;
373 pComponents->aScheme.pEnd = ++p;
374 pPos = p;
375 break;
377 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
378 && *p != '.')
380 break;
385 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
387 pComponents->aAuthority.pBegin = pPos;
388 pPos += 2;
389 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
390 ++pPos;
391 pComponents->aAuthority.pEnd = pPos;
394 pComponents->aPath.pBegin = pPos;
395 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
396 ++pPos;
397 pComponents->aPath.pEnd = pPos;
399 if (pPos != pEnd && *pPos == '?')
401 pComponents->aQuery.pBegin = pPos++;
402 while (pPos != pEnd && * pPos != '#')
403 ++pPos;
404 pComponents->aQuery.pEnd = pPos;
407 if (pPos != pEnd)
409 assert(*pPos == '#');
410 pComponents->aFragment.pBegin = pPos;
411 pComponents->aFragment.pEnd = pEnd;
415 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
417 assert(rBasePath.isPresent() && *rBasePath.pBegin == '/');
418 assert(rRelPath.isPresent());
420 // The invariant of aBuffer is that it always starts and ends with a slash
421 // (until probably right at the end of the algorithm, when the last segment
422 // of rRelPath is added, which does not necessarily end in a slash):
423 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
424 // XXX numeric overflow
426 // Segments "." and ".." within rBasePath are not conisdered special (but
427 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
428 // bit unclear about this point:
429 sal_Int32 nFixed = 1;
430 sal_Unicode const * p = rBasePath.pBegin + 1;
431 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
432 if (*q == '/')
434 if (
435 (q - p == 1 && p[0] == '.') ||
436 (q - p == 2 && p[0] == '.' && p[1] == '.')
439 nFixed = q + 1 - rBasePath.pBegin;
441 p = q + 1;
443 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
445 p = rRelPath.pBegin;
446 if (p != rRelPath.pEnd)
447 for (;;)
449 sal_Unicode const * q = p;
450 sal_Unicode const * r;
451 for (;;)
453 if (q == rRelPath.pEnd)
455 r = q;
456 break;
458 if (*q == '/')
460 r = q + 1;
461 break;
463 ++q;
465 if (q - p == 2 && p[0] == '.' && p[1] == '.')
467 // Erroneous excess segments ".." within rRelPath are left
468 // intact, as the examples in RFC 2396, section C.2, suggest:
469 sal_Int32 i = aBuffer.getLength() - 1;
470 if (i < nFixed)
472 aBuffer.append(p, r - p);
473 nFixed += 3;
475 else
477 while (i > 0 && aBuffer[i - 1] != '/')
478 --i;
479 aBuffer.setLength(i);
482 else if (q - p != 1 || *p != '.')
483 aBuffer.append(p, r - p);
484 if (q == rRelPath.pEnd)
485 break;
486 p = q + 1;
489 return aBuffer.makeStringAndClear();
494 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
495 SAL_THROW_EXTERN_C()
497 static sal_Bool const aCharClass[][nCharClassSize]
498 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
507 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
510 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
513 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
516 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
519 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
522 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
525 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
531 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
534 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
536 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
540 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
543 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
547 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
548 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
549 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
552 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
558 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
561 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
564 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
566 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
567 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
568 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
570 assert(
571 (eCharClass >= 0
572 && (sal::static_int_cast< std::size_t >(eCharClass)
573 < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
574 return aCharClass[eCharClass];
577 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
578 rtl_UriEncodeMechanism eMechanism,
579 rtl_TextEncoding eCharset, rtl_uString ** pResult)
580 SAL_THROW_EXTERN_C()
582 assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
584 sal_Unicode const * p = pText->buffer;
585 sal_Unicode const * pEnd = p + pText->length;
586 sal_Int32 nCapacity = pText->length;
587 rtl_uString_new_WithLength(pResult, nCapacity);
588 while (p < pEnd)
590 EscapeType eType;
591 sal_uInt32 nUtf32 = readUcs4(
592 &p, pEnd,
593 (eMechanism == rtl_UriEncodeKeepEscapes
594 || eMechanism == rtl_UriEncodeCheckEscapes
595 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
596 eCharset, &eType);
597 switch (eType)
599 case EscapeNo:
600 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
601 writeUnicode(pResult, &nCapacity,
602 static_cast< sal_Unicode >(nUtf32));
603 else if (!writeEscapeChar(
604 pResult, &nCapacity, nUtf32, eCharset,
605 (eMechanism == rtl_UriEncodeStrict
606 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
608 rtl_uString_new(pResult);
609 return;
611 break;
613 case EscapeChar:
614 if (eMechanism == rtl_UriEncodeCheckEscapes
615 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
616 writeUnicode(pResult, &nCapacity,
617 static_cast< sal_Unicode >(nUtf32));
618 else if (!writeEscapeChar(
619 pResult, &nCapacity, nUtf32, eCharset,
620 (eMechanism == rtl_UriEncodeStrict
621 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
623 rtl_uString_new(pResult);
624 return;
626 break;
628 case EscapeOctet:
629 writeEscapeOctet(pResult, &nCapacity, nUtf32);
630 break;
633 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
636 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
637 rtl_UriDecodeMechanism eMechanism,
638 rtl_TextEncoding eCharset, rtl_uString ** pResult)
639 SAL_THROW_EXTERN_C()
641 switch (eMechanism)
643 case rtl_UriDecodeNone:
644 rtl_uString_assign(pResult, pText);
645 break;
647 case rtl_UriDecodeToIuri:
648 eCharset = RTL_TEXTENCODING_UTF8;
649 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
651 sal_Unicode const * p = pText->buffer;
652 sal_Unicode const * pEnd = p + pText->length;
653 sal_Int32 nCapacity = pText->length;
654 rtl_uString_new_WithLength(pResult, nCapacity);
655 while (p < pEnd)
657 EscapeType eType;
658 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
659 switch (eType)
661 case EscapeChar:
662 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
664 writeEscapeOctet(pResult, &nCapacity, nUtf32);
665 break;
667 case EscapeNo:
668 writeUcs4(pResult, &nCapacity, nUtf32);
669 break;
671 case EscapeOctet:
672 if (eMechanism == rtl_UriDecodeStrict) {
673 rtl_uString_new(pResult);
674 return;
676 writeEscapeOctet(pResult, &nCapacity, nUtf32);
677 break;
680 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
682 break;
686 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
687 rtl_uString * pRelUriRef,
688 rtl_uString ** pResult,
689 rtl_uString ** pException)
690 SAL_THROW_EXTERN_C()
692 // If pRelUriRef starts with a scheme component it is an absolute URI
693 // reference, and we are done (i.e., this algorithm does not support
694 // backwards-compatible relative URIs starting with a scheme component, see
695 // RFC 2396, section 5.2, step 3):
696 Components aRelComponents;
697 parseUriRef(pRelUriRef, &aRelComponents);
698 if (aRelComponents.aScheme.isPresent())
700 rtl_uString_assign(pResult, pRelUriRef);
701 return true;
704 // Parse pBaseUriRef; if the scheme component is not present or not valid,
705 // or the path component is not empty and starts with anything but a slash,
706 // an exception is raised:
707 Components aBaseComponents;
708 parseUriRef(pBaseUriRef, &aBaseComponents);
709 if (!aBaseComponents.aScheme.isPresent())
711 rtl::OUString aMessage(pBaseUriRef);
712 aMessage += rtl::OUString(
713 " does not start with a scheme component");
714 rtl_uString_assign(pException,
715 const_cast< rtl::OUString & >(aMessage).pData);
716 return false;
718 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
719 && *aBaseComponents.aPath.pBegin != '/')
721 rtl::OUString aMessage(pBaseUriRef);
722 aMessage += rtl::OUString(
723 "path component does not start with slash");
724 rtl_uString_assign(pException, aMessage.pData);
725 return false;
728 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
729 // into an absolute one (if the relative URI is a reference to the "current
730 // document," the "current document" is here taken to be the base URI):
731 rtl::OUStringBuffer aBuffer;
732 aBuffer.append(aBaseComponents.aScheme.pBegin,
733 aBaseComponents.aScheme.getLength());
734 if (aRelComponents.aAuthority.isPresent())
736 aBuffer.append(aRelComponents.aAuthority.pBegin,
737 aRelComponents.aAuthority.getLength());
738 aBuffer.append(aRelComponents.aPath.pBegin,
739 aRelComponents.aPath.getLength());
740 if (aRelComponents.aQuery.isPresent())
741 aBuffer.append(aRelComponents.aQuery.pBegin,
742 aRelComponents.aQuery.getLength());
744 else
746 if (aBaseComponents.aAuthority.isPresent())
747 aBuffer.append(aBaseComponents.aAuthority.pBegin,
748 aBaseComponents.aAuthority.getLength());
749 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
750 && !aRelComponents.aQuery.isPresent())
752 aBuffer.append(aBaseComponents.aPath.pBegin,
753 aBaseComponents.aPath.getLength());
754 if (aBaseComponents.aQuery.isPresent())
755 aBuffer.append(aBaseComponents.aQuery.pBegin,
756 aBaseComponents.aQuery.getLength());
758 else
760 if (*aRelComponents.aPath.pBegin == '/')
761 aBuffer.append(aRelComponents.aPath.pBegin,
762 aRelComponents.aPath.getLength());
763 else
764 aBuffer.append(joinPaths(aBaseComponents.aPath,
765 aRelComponents.aPath));
766 if (aRelComponents.aQuery.isPresent())
767 aBuffer.append(aRelComponents.aQuery.pBegin,
768 aRelComponents.aQuery.getLength());
771 if (aRelComponents.aFragment.isPresent())
772 aBuffer.append(aRelComponents.aFragment.pBegin,
773 aRelComponents.aFragment.getLength());
774 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
775 return true;
778 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */