Version 4.3.0.0.beta1, tag libreoffice-4.3.0.0.beta1
[LibreOffice.git] / sal / rtl / uri.cxx
blob20daac36024c8a7410d18008a76217adb88b2f70
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "surrogates.hxx"
22 #include "osl/diagnose.h"
23 #include "rtl/character.hxx"
24 #include "rtl/strbuf.hxx"
25 #include "rtl/textenc.h"
26 #include "rtl/textcvt.h"
27 #include "rtl/uri.h"
28 #include "rtl/ustrbuf.h"
29 #include "rtl/ustrbuf.hxx"
30 #include "rtl/ustring.h"
31 #include "rtl/ustring.hxx"
32 #include "sal/types.h"
33 #include "sal/macros.h"
35 #include <algorithm>
36 #include <cstddef>
38 namespace {
40 std::size_t const nCharClassSize = 128;
42 sal_Unicode const cEscapePrefix = 0x25; // '%'
44 inline bool isHighSurrogate(sal_uInt32 nUtf16)
46 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
49 inline bool isLowSurrogate(sal_uInt32 nUtf16)
51 return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
54 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
56 return SAL_RTL_COMBINE_SURROGATES(high, low);
59 inline int getHexWeight(sal_uInt32 nUtf32)
61 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
62 static_cast< int >(nUtf32 - 0x30) :
63 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
64 static_cast< int >(nUtf32 - 0x41 + 10) :
65 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
66 static_cast< int >(nUtf32 - 0x61 + 10) :
67 -1; // not a hex digit
70 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
72 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
75 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
76 sal_Unicode cChar)
78 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
81 enum EscapeType
83 EscapeNo,
84 EscapeChar,
85 EscapeOctet
88 /* Read any of the following:
90 - sequence of escape sequences representing character from eCharset,
91 translated to single UCS4 character; or
93 - pair of UTF-16 surrogates, translated to single UCS4 character; or
95 _ single UTF-16 character, extended to UCS4 character.
97 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
98 bool bEncoded, rtl_TextEncoding eCharset,
99 EscapeType * pType)
101 sal_uInt32 nChar = *(*pBegin)++;
102 int nWeight1;
103 int nWeight2;
104 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
105 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
106 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
108 *pBegin += 2;
109 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
110 if (nChar <= 0x7F)
111 *pType = EscapeChar;
112 else if (eCharset == RTL_TEXTENCODING_UTF8)
114 if (nChar >= 0xC0 && nChar <= 0xF4)
116 sal_uInt32 nEncoded;
117 int nShift;
118 sal_uInt32 nMin;
119 if (nChar <= 0xDF)
121 nEncoded = (nChar & 0x1F) << 6;
122 nShift = 0;
123 nMin = 0x80;
125 else if (nChar <= 0xEF)
127 nEncoded = (nChar & 0x0F) << 12;
128 nShift = 6;
129 nMin = 0x800;
131 else
133 nEncoded = (nChar & 0x07) << 18;
134 nShift = 12;
135 nMin = 0x10000;
137 sal_Unicode const * p = *pBegin;
138 bool bUTF8 = true;
139 for (; nShift >= 0; nShift -= 6)
141 if (pEnd - p < 3 || p[0] != cEscapePrefix
142 || (nWeight1 = getHexWeight(p[1])) < 8
143 || nWeight1 > 11
144 || (nWeight2 = getHexWeight(p[2])) < 0)
146 bUTF8 = false;
147 break;
149 p += 3;
150 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
152 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
153 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
155 *pBegin = p;
156 *pType = EscapeChar;
157 return nEncoded;
160 *pType = EscapeOctet;
162 else
164 rtl::OStringBuffer aBuf;
165 aBuf.append(static_cast< char >(nChar));
166 rtl_TextToUnicodeConverter aConverter
167 = rtl_createTextToUnicodeConverter(eCharset);
168 sal_Unicode const * p = *pBegin;
169 for (;;)
171 sal_Unicode aDst[2];
172 sal_uInt32 nInfo;
173 sal_Size nConverted;
174 sal_Size nDstSize = rtl_convertTextToUnicode(
175 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
176 SAL_N_ELEMENTS( aDst ),
177 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
178 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
179 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
180 &nInfo, &nConverted);
181 if (nInfo == 0)
183 assert( nConverted
184 == sal::static_int_cast< sal_uInt32 >(
185 aBuf.getLength()));
186 rtl_destroyTextToUnicodeConverter(aConverter);
187 *pBegin = p;
188 *pType = EscapeChar;
189 assert( nDstSize == 1
190 || (nDstSize == 2 && isHighSurrogate(aDst[0])
191 && isLowSurrogate(aDst[1])));
192 return nDstSize == 1
193 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
195 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
196 && pEnd - p >= 3 && p[0] == cEscapePrefix
197 && (nWeight1 = getHexWeight(p[1])) >= 0
198 && (nWeight2 = getHexWeight(p[2])) >= 0)
200 p += 3;
201 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
203 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
204 && p != pEnd && *p <= 0x7F)
206 aBuf.append(static_cast< char >(*p++));
208 else
210 assert(
211 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
212 == 0);
213 break;
216 rtl_destroyTextToUnicodeConverter(aConverter);
217 *pType = EscapeOctet;
219 return nChar;
221 else
223 *pType = EscapeNo;
224 return isHighSurrogate(nChar) && *pBegin < pEnd
225 && isLowSurrogate(**pBegin) ?
226 combineSurrogates(nChar, *(*pBegin)++) : nChar;
230 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
232 assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
233 if (nUtf32 <= 0xFFFF) {
234 writeUnicode(
235 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
236 } else {
237 nUtf32 -= 0x10000;
238 writeUnicode(
239 pBuffer, pCapacity,
240 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
241 writeUnicode(
242 pBuffer, pCapacity,
243 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
247 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
248 sal_uInt32 nOctet)
250 assert(nOctet <= 0xFF); // bad octet
252 static sal_Unicode const aHex[16]
253 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
254 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
256 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
257 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
258 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
261 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
262 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
264 assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
265 if (eCharset == RTL_TEXTENCODING_UTF8) {
266 if (nUtf32 < 0x80)
267 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
268 else if (nUtf32 < 0x800)
270 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
271 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
273 else if (nUtf32 < 0x10000)
275 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
276 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
277 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
279 else
281 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
282 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
283 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
284 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
286 } else {
287 rtl_UnicodeToTextConverter aConverter
288 = rtl_createUnicodeToTextConverter(eCharset);
289 sal_Unicode aSrc[2];
290 sal_Size nSrcSize;
291 if (nUtf32 <= 0xFFFF)
293 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
294 nSrcSize = 1;
296 else
298 aSrc[0] = static_cast< sal_Unicode >(
299 ((nUtf32 - 0x10000) >> 10) | 0xD800);
300 aSrc[1] = static_cast< sal_Unicode >(
301 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
302 nSrcSize = 2;
304 sal_Char aDst[32]; // FIXME random value
305 sal_uInt32 nInfo;
306 sal_Size nConverted;
307 sal_Size nDstSize = rtl_convertUnicodeToText(
308 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
309 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
310 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
311 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
312 &nInfo, &nConverted);
313 assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
314 rtl_destroyUnicodeToTextConverter(aConverter);
315 if (nInfo == 0) {
316 assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
317 for (sal_Size i = 0; i < nDstSize; ++i)
318 writeEscapeOctet(pBuffer, pCapacity,
319 static_cast< unsigned char >(aDst[i]));
320 // FIXME all octets are escaped, even if there is no need
321 } else {
322 if (bStrict) {
323 return false;
324 } else {
325 writeUcs4(pBuffer, pCapacity, nUtf32);
329 return true;
332 struct Component
334 sal_Unicode const * pBegin;
335 sal_Unicode const * pEnd;
337 inline Component(): pBegin(0), pEnd(0) {}
339 inline bool isPresent() const { return pBegin != 0; }
341 inline sal_Int32 getLength() const;
344 inline sal_Int32 Component::getLength() const
346 assert(isPresent()); // taking length of non-present component
347 return static_cast< sal_Int32 >(pEnd - pBegin);
350 struct Components
352 Component aScheme;
353 Component aAuthority;
354 Component aPath;
355 Component aQuery;
356 Component aFragment;
359 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
361 // This algorithm is liberal and accepts various forms of illegal input.
363 sal_Unicode const * pBegin = pUriRef->buffer;
364 sal_Unicode const * pEnd = pBegin + pUriRef->length;
365 sal_Unicode const * pPos = pBegin;
367 if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
369 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
371 if (*p == ':')
373 pComponents->aScheme.pBegin = pBegin;
374 pComponents->aScheme.pEnd = ++p;
375 pPos = p;
376 break;
378 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
379 && *p != '.')
381 break;
386 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
388 pComponents->aAuthority.pBegin = pPos;
389 pPos += 2;
390 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
391 ++pPos;
392 pComponents->aAuthority.pEnd = pPos;
395 pComponents->aPath.pBegin = pPos;
396 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
397 ++pPos;
398 pComponents->aPath.pEnd = pPos;
400 if (pPos != pEnd && *pPos == '?')
402 pComponents->aQuery.pBegin = pPos++;
403 while (pPos != pEnd && * pPos != '#')
404 ++pPos;
405 pComponents->aQuery.pEnd = pPos;
408 if (pPos != pEnd)
410 assert(*pPos == '#');
411 pComponents->aFragment.pBegin = pPos;
412 pComponents->aFragment.pEnd = pEnd;
416 void appendPath(
417 rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
418 sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
420 while (precedingSlash || pathBegin != pathEnd) {
421 sal_Unicode const * p = pathBegin;
422 while (p != pathEnd && *p != '/') {
423 ++p;
425 std::size_t n = p - pathBegin;
426 if (n == 1 && pathBegin[0] == '.') {
427 // input begins with "." -> remove from input (and done):
428 // i.e., !precedingSlash -> !precedingSlash
429 // input begins with "./" -> remove from input:
430 // i.e., !precedingSlash -> !precedingSlash
431 // input begins with "/." -> replace with "/" in input (and not yet
432 // done):
433 // i.e., precedingSlash -> precedingSlash
434 // input begins with "/./" -> replace with "/" in input:
435 // i.e., precedingSlash -> precedingSlash
436 } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
437 // input begins with ".." -> remove from input (and done):
438 // i.e., !precedingSlash -> !precedingSlash
439 // input begins with "../" -> remove from input
440 // i.e., !precedingSlash -> !precedingSlash
441 // input begins with "/.." -> replace with "/" in input, and shrink
442 // output (not not yet done):
443 // i.e., precedingSlash -> precedingSlash
444 // input begins with "/../" -> replace with "/" in input, and shrink
445 // output:
446 // i.e., precedingSlash -> precedingSlash
447 if (precedingSlash) {
448 buffer.truncate(
449 bufferStart
450 + std::max<sal_Int32>(
451 rtl_ustr_lastIndexOfChar_WithLength(
452 buffer.getStr() + bufferStart,
453 buffer.getLength() - bufferStart, '/'),
454 0));
456 } else {
457 if (precedingSlash) {
458 buffer.append('/');
460 buffer.append(pathBegin, n);
461 precedingSlash = p != pathEnd;
463 pathBegin = p + (p == pathEnd ? 0 : 1);
469 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
470 SAL_THROW_EXTERN_C()
472 static sal_Bool const aCharClass[][nCharClassSize]
473 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
476 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
482 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
488 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
491 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
496 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
497 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
506 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
507 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
509 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
513 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
515 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
518 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
522 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
524 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
527 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
533 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
536 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
542 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
545 assert(
546 (eCharClass >= 0
547 && (sal::static_int_cast< std::size_t >(eCharClass)
548 < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
549 return aCharClass[eCharClass];
552 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
553 rtl_UriEncodeMechanism eMechanism,
554 rtl_TextEncoding eCharset, rtl_uString ** pResult)
555 SAL_THROW_EXTERN_C()
557 assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
559 sal_Unicode const * p = pText->buffer;
560 sal_Unicode const * pEnd = p + pText->length;
561 sal_Int32 nCapacity = pText->length;
562 rtl_uString_new_WithLength(pResult, nCapacity);
563 while (p < pEnd)
565 EscapeType eType;
566 sal_uInt32 nUtf32 = readUcs4(
567 &p, pEnd,
568 (eMechanism == rtl_UriEncodeKeepEscapes
569 || eMechanism == rtl_UriEncodeCheckEscapes
570 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
571 eCharset, &eType);
572 switch (eType)
574 case EscapeNo:
575 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
576 writeUnicode(pResult, &nCapacity,
577 static_cast< sal_Unicode >(nUtf32));
578 else if (!writeEscapeChar(
579 pResult, &nCapacity, nUtf32, eCharset,
580 (eMechanism == rtl_UriEncodeStrict
581 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
583 rtl_uString_new(pResult);
584 return;
586 break;
588 case EscapeChar:
589 if (eMechanism == rtl_UriEncodeCheckEscapes
590 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
591 writeUnicode(pResult, &nCapacity,
592 static_cast< sal_Unicode >(nUtf32));
593 else if (!writeEscapeChar(
594 pResult, &nCapacity, nUtf32, eCharset,
595 (eMechanism == rtl_UriEncodeStrict
596 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
598 rtl_uString_new(pResult);
599 return;
601 break;
603 case EscapeOctet:
604 writeEscapeOctet(pResult, &nCapacity, nUtf32);
605 break;
608 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
611 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
612 rtl_UriDecodeMechanism eMechanism,
613 rtl_TextEncoding eCharset, rtl_uString ** pResult)
614 SAL_THROW_EXTERN_C()
616 switch (eMechanism)
618 case rtl_UriDecodeNone:
619 rtl_uString_assign(pResult, pText);
620 break;
622 case rtl_UriDecodeToIuri:
623 eCharset = RTL_TEXTENCODING_UTF8;
624 //fall-through
625 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
627 sal_Unicode const * p = pText->buffer;
628 sal_Unicode const * pEnd = p + pText->length;
629 sal_Int32 nCapacity = pText->length;
630 rtl_uString_new_WithLength(pResult, nCapacity);
631 while (p < pEnd)
633 EscapeType eType;
634 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
635 switch (eType)
637 case EscapeChar:
638 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
640 writeEscapeOctet(pResult, &nCapacity, nUtf32);
641 break;
643 case EscapeNo:
644 writeUcs4(pResult, &nCapacity, nUtf32);
645 break;
647 case EscapeOctet:
648 if (eMechanism == rtl_UriDecodeStrict) {
649 rtl_uString_new(pResult);
650 return;
652 writeEscapeOctet(pResult, &nCapacity, nUtf32);
653 break;
656 *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
658 break;
662 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
663 rtl_uString * pRelUriRef,
664 rtl_uString ** pResult,
665 rtl_uString ** pException)
666 SAL_THROW_EXTERN_C()
668 // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
669 // relative URI into an absolute one:
670 rtl::OUStringBuffer aBuffer;
671 Components aRelComponents;
672 parseUriRef(pRelUriRef, &aRelComponents);
673 if (aRelComponents.aScheme.isPresent())
675 aBuffer.append(aRelComponents.aScheme.pBegin,
676 aRelComponents.aScheme.getLength());
677 if (aRelComponents.aAuthority.isPresent())
678 aBuffer.append(aRelComponents.aAuthority.pBegin,
679 aRelComponents.aAuthority.getLength());
680 appendPath(
681 aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
682 aRelComponents.aPath.pEnd);
683 if (aRelComponents.aQuery.isPresent())
684 aBuffer.append(aRelComponents.aQuery.pBegin,
685 aRelComponents.aQuery.getLength());
687 else
689 Components aBaseComponents;
690 parseUriRef(pBaseUriRef, &aBaseComponents);
691 if (!aBaseComponents.aScheme.isPresent())
693 rtl_uString_assign(
694 pException,
695 (rtl::OUString(
696 "<" + rtl::OUString(pBaseUriRef)
697 + "> does not start with a scheme component")
698 .pData));
699 return false;
701 aBuffer.append(aBaseComponents.aScheme.pBegin,
702 aBaseComponents.aScheme.getLength());
703 if (aRelComponents.aAuthority.isPresent())
705 aBuffer.append(aRelComponents.aAuthority.pBegin,
706 aRelComponents.aAuthority.getLength());
707 appendPath(
708 aBuffer, aBuffer.getLength(), false,
709 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
710 if (aRelComponents.aQuery.isPresent())
711 aBuffer.append(aRelComponents.aQuery.pBegin,
712 aRelComponents.aQuery.getLength());
714 else
716 if (aBaseComponents.aAuthority.isPresent())
717 aBuffer.append(aBaseComponents.aAuthority.pBegin,
718 aBaseComponents.aAuthority.getLength());
719 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
721 aBuffer.append(aBaseComponents.aPath.pBegin,
722 aBaseComponents.aPath.getLength());
723 if (aRelComponents.aQuery.isPresent())
724 aBuffer.append(aRelComponents.aQuery.pBegin,
725 aRelComponents.aQuery.getLength());
726 else if (aBaseComponents.aQuery.isPresent())
727 aBuffer.append(aBaseComponents.aQuery.pBegin,
728 aBaseComponents.aQuery.getLength());
730 else
732 if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
733 && *aRelComponents.aPath.pBegin == '/')
734 appendPath(
735 aBuffer, aBuffer.getLength(), false,
736 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
737 else if (aBaseComponents.aAuthority.isPresent()
738 && aBaseComponents.aPath.pBegin
739 == aBaseComponents.aPath.pEnd)
740 appendPath(
741 aBuffer, aBuffer.getLength(), true,
742 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
743 else
745 sal_Int32 n = aBuffer.getLength();
746 sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
747 aBaseComponents.aPath.pBegin,
748 aBaseComponents.aPath.getLength(), '/');
749 if (i >= 0) {
750 appendPath(
751 aBuffer, n, false, aBaseComponents.aPath.pBegin,
752 aBaseComponents.aPath.pBegin + i);
754 appendPath(
755 aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
756 aRelComponents.aPath.pEnd);
758 if (aRelComponents.aQuery.isPresent())
759 aBuffer.append(aRelComponents.aQuery.pBegin,
760 aRelComponents.aQuery.getLength());
764 if (aRelComponents.aFragment.isPresent())
765 aBuffer.append(aRelComponents.aFragment.pBegin,
766 aRelComponents.aFragment.getLength());
767 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
768 return true;
771 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */