1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "surrogates.hxx"
22 #include "osl/diagnose.h"
23 #include "rtl/character.hxx"
24 #include "rtl/strbuf.hxx"
25 #include "rtl/textenc.h"
26 #include "rtl/textcvt.h"
28 #include "rtl/ustrbuf.h"
29 #include "rtl/ustrbuf.hxx"
30 #include "rtl/ustring.h"
31 #include "rtl/ustring.hxx"
32 #include "sal/types.h"
33 #include "sal/macros.h"
40 std::size_t const nCharClassSize
= 128;
42 sal_Unicode
const cEscapePrefix
= 0x25; // '%'
44 inline bool isHighSurrogate(sal_uInt32 nUtf16
)
46 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16
);
49 inline bool isLowSurrogate(sal_uInt32 nUtf16
)
51 return SAL_RTL_IS_LOW_SURROGATE(nUtf16
);
54 inline sal_uInt32
combineSurrogates(sal_uInt32 high
, sal_uInt32 low
)
56 return SAL_RTL_COMBINE_SURROGATES(high
, low
);
59 inline int getHexWeight(sal_uInt32 nUtf32
)
61 return nUtf32
>= 0x30 && nUtf32
<= 0x39 ? // '0'--'9'
62 static_cast< int >(nUtf32
- 0x30) :
63 nUtf32
>= 0x41 && nUtf32
<= 0x46 ? // 'A'--'F'
64 static_cast< int >(nUtf32
- 0x41 + 10) :
65 nUtf32
>= 0x61 && nUtf32
<= 0x66 ? // 'a'--'f'
66 static_cast< int >(nUtf32
- 0x61 + 10) :
67 -1; // not a hex digit
70 inline bool isValid(sal_Bool
const * pCharClass
, sal_uInt32 nUtf32
)
72 return nUtf32
< nCharClassSize
&& pCharClass
[nUtf32
];
75 inline void writeUnicode(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
78 rtl_uStringbuffer_insert(pBuffer
, pCapacity
, (*pBuffer
)->length
, &cChar
, 1);
88 /* Read any of the following:
90 - sequence of escape sequences representing character from eCharset,
91 translated to single UCS4 character; or
93 - pair of UTF-16 surrogates, translated to single UCS4 character; or
95 _ single UTF-16 character, extended to UCS4 character.
97 sal_uInt32
readUcs4(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
98 bool bEncoded
, rtl_TextEncoding eCharset
,
101 sal_uInt32 nChar
= *(*pBegin
)++;
104 if (nChar
== cEscapePrefix
&& bEncoded
&& pEnd
- *pBegin
>= 2
105 && (nWeight1
= getHexWeight((*pBegin
)[0])) >= 0
106 && (nWeight2
= getHexWeight((*pBegin
)[1])) >= 0)
109 nChar
= static_cast< sal_uInt32
>(nWeight1
<< 4 | nWeight2
);
112 else if (eCharset
== RTL_TEXTENCODING_UTF8
)
114 if (nChar
>= 0xC0 && nChar
<= 0xF4)
121 nEncoded
= (nChar
& 0x1F) << 6;
125 else if (nChar
<= 0xEF)
127 nEncoded
= (nChar
& 0x0F) << 12;
133 nEncoded
= (nChar
& 0x07) << 18;
137 sal_Unicode
const * p
= *pBegin
;
139 for (; nShift
>= 0; nShift
-= 6)
141 if (pEnd
- p
< 3 || p
[0] != cEscapePrefix
142 || (nWeight1
= getHexWeight(p
[1])) < 8
144 || (nWeight2
= getHexWeight(p
[2])) < 0)
150 nEncoded
|= ((nWeight1
& 3) << 4 | nWeight2
) << nShift
;
152 if (bUTF8
&& nEncoded
>= nMin
&& !isHighSurrogate(nEncoded
)
153 && !isLowSurrogate(nEncoded
) && nEncoded
<= 0x10FFFF)
160 *pType
= EscapeOctet
;
164 rtl::OStringBuffer aBuf
;
165 aBuf
.append(static_cast< char >(nChar
));
166 rtl_TextToUnicodeConverter aConverter
167 = rtl_createTextToUnicodeConverter(eCharset
);
168 sal_Unicode
const * p
= *pBegin
;
174 sal_Size nDstSize
= rtl_convertTextToUnicode(
175 aConverter
, 0, aBuf
.getStr(), aBuf
.getLength(), aDst
,
176 SAL_N_ELEMENTS( aDst
),
177 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
178 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
179 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
),
180 &nInfo
, &nConverted
);
184 == sal::static_int_cast
< sal_uInt32
>(
186 rtl_destroyTextToUnicodeConverter(aConverter
);
189 assert( nDstSize
== 1
190 || (nDstSize
== 2 && isHighSurrogate(aDst
[0])
191 && isLowSurrogate(aDst
[1])));
193 ? aDst
[0] : combineSurrogates(aDst
[0], aDst
[1]);
195 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
196 && pEnd
- p
>= 3 && p
[0] == cEscapePrefix
197 && (nWeight1
= getHexWeight(p
[1])) >= 0
198 && (nWeight2
= getHexWeight(p
[2])) >= 0)
201 aBuf
.append(static_cast< char >(nWeight1
<< 4 | nWeight2
));
203 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
204 && p
!= pEnd
&& *p
<= 0x7F)
206 aBuf
.append(static_cast< char >(*p
++));
211 (nInfo
& RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
)
216 rtl_destroyTextToUnicodeConverter(aConverter
);
217 *pType
= EscapeOctet
;
224 return isHighSurrogate(nChar
) && *pBegin
< pEnd
225 && isLowSurrogate(**pBegin
) ?
226 combineSurrogates(nChar
, *(*pBegin
)++) : nChar
;
230 void writeUcs4(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
, sal_uInt32 nUtf32
)
232 assert(nUtf32
<= 0x10FFFF); // bad UTF-32 char
233 if (nUtf32
<= 0xFFFF) {
235 pBuffer
, pCapacity
, static_cast< sal_Unicode
>(nUtf32
));
240 static_cast< sal_Unicode
>(nUtf32
>> 10 | 0xD800));
243 static_cast< sal_Unicode
>((nUtf32
& 0x3FF) | 0xDC00));
247 void writeEscapeOctet(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
250 assert(nOctet
<= 0xFF); // bad octet
252 static sal_Unicode
const aHex
[16]
253 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
254 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
256 writeUnicode(pBuffer
, pCapacity
, cEscapePrefix
);
257 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
>> 4]);
258 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
& 15]);
261 bool writeEscapeChar(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
262 sal_uInt32 nUtf32
, rtl_TextEncoding eCharset
, bool bStrict
)
264 assert(nUtf32
<= 0x10FFFF); // bad UTF-32 char
265 if (eCharset
== RTL_TEXTENCODING_UTF8
) {
267 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
);
268 else if (nUtf32
< 0x800)
270 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 6 | 0xC0);
271 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
273 else if (nUtf32
< 0x10000)
275 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 12 | 0xE0);
276 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
277 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
281 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 18 | 0xF0);
282 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 12 & 0x3F) | 0x80);
283 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
284 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
287 rtl_UnicodeToTextConverter aConverter
288 = rtl_createUnicodeToTextConverter(eCharset
);
291 if (nUtf32
<= 0xFFFF)
293 aSrc
[0] = static_cast< sal_Unicode
>(nUtf32
);
298 aSrc
[0] = static_cast< sal_Unicode
>(
299 ((nUtf32
- 0x10000) >> 10) | 0xD800);
300 aSrc
[1] = static_cast< sal_Unicode
>(
301 ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00);
304 sal_Char aDst
[32]; // FIXME random value
307 sal_Size nDstSize
= rtl_convertUnicodeToText(
308 aConverter
, 0, aSrc
, nSrcSize
, aDst
, sizeof aDst
,
309 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
310 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
311 | RTL_UNICODETOTEXT_FLAGS_FLUSH
,
312 &nInfo
, &nConverted
);
313 assert((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0);
314 rtl_destroyUnicodeToTextConverter(aConverter
);
316 assert(nConverted
== nSrcSize
); // bad rtl_convertUnicodeToText
317 for (sal_Size i
= 0; i
< nDstSize
; ++i
)
318 writeEscapeOctet(pBuffer
, pCapacity
,
319 static_cast< unsigned char >(aDst
[i
]));
320 // FIXME all octets are escaped, even if there is no need
325 writeUcs4(pBuffer
, pCapacity
, nUtf32
);
334 sal_Unicode
const * pBegin
;
335 sal_Unicode
const * pEnd
;
337 inline Component(): pBegin(0), pEnd(0) {}
339 inline bool isPresent() const { return pBegin
!= 0; }
341 inline sal_Int32
getLength() const;
344 inline sal_Int32
Component::getLength() const
346 assert(isPresent()); // taking length of non-present component
347 return static_cast< sal_Int32
>(pEnd
- pBegin
);
353 Component aAuthority
;
359 void parseUriRef(rtl_uString
const * pUriRef
, Components
* pComponents
)
361 // This algorithm is liberal and accepts various forms of illegal input.
363 sal_Unicode
const * pBegin
= pUriRef
->buffer
;
364 sal_Unicode
const * pEnd
= pBegin
+ pUriRef
->length
;
365 sal_Unicode
const * pPos
= pBegin
;
367 if (pPos
!= pEnd
&& rtl::isAsciiAlpha(*pPos
))
369 for (sal_Unicode
const * p
= pPos
+ 1; p
!= pEnd
; ++p
)
373 pComponents
->aScheme
.pBegin
= pBegin
;
374 pComponents
->aScheme
.pEnd
= ++p
;
378 else if (!rtl::isAsciiAlphanumeric(*p
) && *p
!= '+' && *p
!= '-'
386 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
388 pComponents
->aAuthority
.pBegin
= pPos
;
390 while (pPos
!= pEnd
&& *pPos
!= '/' && *pPos
!= '?' && *pPos
!= '#')
392 pComponents
->aAuthority
.pEnd
= pPos
;
395 pComponents
->aPath
.pBegin
= pPos
;
396 while (pPos
!= pEnd
&& *pPos
!= '?' && * pPos
!= '#')
398 pComponents
->aPath
.pEnd
= pPos
;
400 if (pPos
!= pEnd
&& *pPos
== '?')
402 pComponents
->aQuery
.pBegin
= pPos
++;
403 while (pPos
!= pEnd
&& * pPos
!= '#')
405 pComponents
->aQuery
.pEnd
= pPos
;
410 assert(*pPos
== '#');
411 pComponents
->aFragment
.pBegin
= pPos
;
412 pComponents
->aFragment
.pEnd
= pEnd
;
417 rtl::OUStringBuffer
& buffer
, sal_Int32 bufferStart
, bool precedingSlash
,
418 sal_Unicode
const * pathBegin
, sal_Unicode
const * pathEnd
)
420 while (precedingSlash
|| pathBegin
!= pathEnd
) {
421 sal_Unicode
const * p
= pathBegin
;
422 while (p
!= pathEnd
&& *p
!= '/') {
425 std::size_t n
= p
- pathBegin
;
426 if (n
== 1 && pathBegin
[0] == '.') {
427 // input begins with "." -> remove from input (and done):
428 // i.e., !precedingSlash -> !precedingSlash
429 // input begins with "./" -> remove from input:
430 // i.e., !precedingSlash -> !precedingSlash
431 // input begins with "/." -> replace with "/" in input (and not yet
433 // i.e., precedingSlash -> precedingSlash
434 // input begins with "/./" -> replace with "/" in input:
435 // i.e., precedingSlash -> precedingSlash
436 } else if (n
== 2 && pathBegin
[0] == '.' && pathBegin
[1] == '.') {
437 // input begins with ".." -> remove from input (and done):
438 // i.e., !precedingSlash -> !precedingSlash
439 // input begins with "../" -> remove from input
440 // i.e., !precedingSlash -> !precedingSlash
441 // input begins with "/.." -> replace with "/" in input, and shrink
442 // output (not not yet done):
443 // i.e., precedingSlash -> precedingSlash
444 // input begins with "/../" -> replace with "/" in input, and shrink
446 // i.e., precedingSlash -> precedingSlash
447 if (precedingSlash
) {
450 + std::max
<sal_Int32
>(
451 rtl_ustr_lastIndexOfChar_WithLength(
452 buffer
.getStr() + bufferStart
,
453 buffer
.getLength() - bufferStart
, '/'),
457 if (precedingSlash
) {
460 buffer
.append(pathBegin
, n
);
461 precedingSlash
= p
!= pathEnd
;
463 pathBegin
= p
+ (p
== pathEnd
? 0 : 1);
469 sal_Bool
const * SAL_CALL
rtl_getUriCharClass(rtl_UriCharClass eCharClass
)
472 static sal_Bool
const aCharClass
[][nCharClassSize
]
473 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
474 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
476 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
482 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
488 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
491 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
496 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
497 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
506 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
507 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
509 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
513 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
515 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
518 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
522 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
524 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
527 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
533 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
536 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
542 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
547 && (sal::static_int_cast
< std::size_t >(eCharClass
)
548 < SAL_N_ELEMENTS(aCharClass
)))); // bad eCharClass
549 return aCharClass
[eCharClass
];
552 void SAL_CALL
rtl_uriEncode(rtl_uString
* pText
, sal_Bool
const * pCharClass
,
553 rtl_UriEncodeMechanism eMechanism
,
554 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
557 assert(!pCharClass
[0x25]); // make sure the percent sign is encoded...
559 sal_Unicode
const * p
= pText
->buffer
;
560 sal_Unicode
const * pEnd
= p
+ pText
->length
;
561 sal_Int32 nCapacity
= pText
->length
;
562 rtl_uString_new_WithLength(pResult
, nCapacity
);
566 sal_uInt32 nUtf32
= readUcs4(
568 (eMechanism
== rtl_UriEncodeKeepEscapes
569 || eMechanism
== rtl_UriEncodeCheckEscapes
570 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
),
575 if (isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
576 writeUnicode(pResult
, &nCapacity
,
577 static_cast< sal_Unicode
>(nUtf32
));
578 else if (!writeEscapeChar(
579 pResult
, &nCapacity
, nUtf32
, eCharset
,
580 (eMechanism
== rtl_UriEncodeStrict
581 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
583 rtl_uString_new(pResult
);
589 if (eMechanism
== rtl_UriEncodeCheckEscapes
590 && isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
591 writeUnicode(pResult
, &nCapacity
,
592 static_cast< sal_Unicode
>(nUtf32
));
593 else if (!writeEscapeChar(
594 pResult
, &nCapacity
, nUtf32
, eCharset
,
595 (eMechanism
== rtl_UriEncodeStrict
596 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
598 rtl_uString_new(pResult
);
604 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
608 *pResult
= rtl_uStringBuffer_makeStringAndClear( pResult
, &nCapacity
);
611 void SAL_CALL
rtl_uriDecode(rtl_uString
* pText
,
612 rtl_UriDecodeMechanism eMechanism
,
613 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
618 case rtl_UriDecodeNone
:
619 rtl_uString_assign(pResult
, pText
);
622 case rtl_UriDecodeToIuri
:
623 eCharset
= RTL_TEXTENCODING_UTF8
;
625 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
627 sal_Unicode
const * p
= pText
->buffer
;
628 sal_Unicode
const * pEnd
= p
+ pText
->length
;
629 sal_Int32 nCapacity
= pText
->length
;
630 rtl_uString_new_WithLength(pResult
, nCapacity
);
634 sal_uInt32 nUtf32
= readUcs4(&p
, pEnd
, true, eCharset
, &eType
);
638 if (nUtf32
<= 0x7F && eMechanism
== rtl_UriDecodeToIuri
)
640 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
644 writeUcs4(pResult
, &nCapacity
, nUtf32
);
648 if (eMechanism
== rtl_UriDecodeStrict
) {
649 rtl_uString_new(pResult
);
652 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
656 *pResult
= rtl_uStringBuffer_makeStringAndClear( pResult
, &nCapacity
);
662 sal_Bool SAL_CALL
rtl_uriConvertRelToAbs(rtl_uString
* pBaseUriRef
,
663 rtl_uString
* pRelUriRef
,
664 rtl_uString
** pResult
,
665 rtl_uString
** pException
)
668 // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
669 // relative URI into an absolute one:
670 rtl::OUStringBuffer aBuffer
;
671 Components aRelComponents
;
672 parseUriRef(pRelUriRef
, &aRelComponents
);
673 if (aRelComponents
.aScheme
.isPresent())
675 aBuffer
.append(aRelComponents
.aScheme
.pBegin
,
676 aRelComponents
.aScheme
.getLength());
677 if (aRelComponents
.aAuthority
.isPresent())
678 aBuffer
.append(aRelComponents
.aAuthority
.pBegin
,
679 aRelComponents
.aAuthority
.getLength());
681 aBuffer
, aBuffer
.getLength(), false, aRelComponents
.aPath
.pBegin
,
682 aRelComponents
.aPath
.pEnd
);
683 if (aRelComponents
.aQuery
.isPresent())
684 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
685 aRelComponents
.aQuery
.getLength());
689 Components aBaseComponents
;
690 parseUriRef(pBaseUriRef
, &aBaseComponents
);
691 if (!aBaseComponents
.aScheme
.isPresent())
696 "<" + rtl::OUString(pBaseUriRef
)
697 + "> does not start with a scheme component")
701 aBuffer
.append(aBaseComponents
.aScheme
.pBegin
,
702 aBaseComponents
.aScheme
.getLength());
703 if (aRelComponents
.aAuthority
.isPresent())
705 aBuffer
.append(aRelComponents
.aAuthority
.pBegin
,
706 aRelComponents
.aAuthority
.getLength());
708 aBuffer
, aBuffer
.getLength(), false,
709 aRelComponents
.aPath
.pBegin
, aRelComponents
.aPath
.pEnd
);
710 if (aRelComponents
.aQuery
.isPresent())
711 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
712 aRelComponents
.aQuery
.getLength());
716 if (aBaseComponents
.aAuthority
.isPresent())
717 aBuffer
.append(aBaseComponents
.aAuthority
.pBegin
,
718 aBaseComponents
.aAuthority
.getLength());
719 if (aRelComponents
.aPath
.pBegin
== aRelComponents
.aPath
.pEnd
)
721 aBuffer
.append(aBaseComponents
.aPath
.pBegin
,
722 aBaseComponents
.aPath
.getLength());
723 if (aRelComponents
.aQuery
.isPresent())
724 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
725 aRelComponents
.aQuery
.getLength());
726 else if (aBaseComponents
.aQuery
.isPresent())
727 aBuffer
.append(aBaseComponents
.aQuery
.pBegin
,
728 aBaseComponents
.aQuery
.getLength());
732 if (aRelComponents
.aPath
.pBegin
!= aRelComponents
.aPath
.pEnd
733 && *aRelComponents
.aPath
.pBegin
== '/')
735 aBuffer
, aBuffer
.getLength(), false,
736 aRelComponents
.aPath
.pBegin
, aRelComponents
.aPath
.pEnd
);
737 else if (aBaseComponents
.aAuthority
.isPresent()
738 && aBaseComponents
.aPath
.pBegin
739 == aBaseComponents
.aPath
.pEnd
)
741 aBuffer
, aBuffer
.getLength(), true,
742 aRelComponents
.aPath
.pBegin
, aRelComponents
.aPath
.pEnd
);
745 sal_Int32 n
= aBuffer
.getLength();
746 sal_Int32 i
= rtl_ustr_lastIndexOfChar_WithLength(
747 aBaseComponents
.aPath
.pBegin
,
748 aBaseComponents
.aPath
.getLength(), '/');
751 aBuffer
, n
, false, aBaseComponents
.aPath
.pBegin
,
752 aBaseComponents
.aPath
.pBegin
+ i
);
755 aBuffer
, n
, i
>= 0, aRelComponents
.aPath
.pBegin
,
756 aRelComponents
.aPath
.pEnd
);
758 if (aRelComponents
.aQuery
.isPresent())
759 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
760 aRelComponents
.aQuery
.getLength());
764 if (aRelComponents
.aFragment
.isPresent())
765 aBuffer
.append(aRelComponents
.aFragment
.pBegin
,
766 aRelComponents
.aFragment
.getLength());
767 rtl_uString_assign(pResult
, aBuffer
.makeStringAndClear().pData
);
771 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */