1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sal.hxx"
33 #include "surrogates.h"
35 #include "osl/diagnose.h"
36 #include "rtl/strbuf.hxx"
37 #include "rtl/textenc.h"
38 #include "rtl/textcvt.h"
40 #include "rtl/ustrbuf.h"
41 #include "rtl/ustrbuf.hxx"
42 #include "rtl/ustring.h"
43 #include "rtl/ustring.hxx"
44 #include "sal/types.h"
50 std::size_t const nCharClassSize
= 128;
52 sal_Unicode
const cEscapePrefix
= 0x25; // '%'
54 inline bool isDigit(sal_uInt32 nUtf32
)
56 return nUtf32
>= 0x30 && nUtf32
<= 0x39; // '0'--'9'
59 inline bool isAlpha(sal_uInt32 nUtf32
)
63 (nUtf32
>= 0x41 && nUtf32
<= 0x5A) ||
64 (nUtf32
>= 0x61 && nUtf32
<= 0x7A)
68 inline bool isHighSurrogate(sal_uInt32 nUtf16
)
70 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16
);
73 inline bool isLowSurrogate(sal_uInt32 nUtf16
)
75 return SAL_RTL_IS_LOW_SURROGATE(nUtf16
);
78 inline sal_uInt32
combineSurrogates(sal_uInt32 high
, sal_uInt32 low
)
80 return SAL_RTL_COMBINE_SURROGATES(high
, low
);
83 inline int getHexWeight(sal_uInt32 nUtf32
)
85 return nUtf32
>= 0x30 && nUtf32
<= 0x39 ? // '0'--'9'
86 static_cast< int >(nUtf32
- 0x30) :
87 nUtf32
>= 0x41 && nUtf32
<= 0x46 ? // 'A'--'F'
88 static_cast< int >(nUtf32
- 0x41 + 10) :
89 nUtf32
>= 0x61 && nUtf32
<= 0x66 ? // 'a'--'f'
90 static_cast< int >(nUtf32
- 0x61 + 10) :
91 -1; // not a hex digit
94 inline bool isValid(sal_Bool
const * pCharClass
, sal_uInt32 nUtf32
)
96 return nUtf32
< nCharClassSize
&& pCharClass
[nUtf32
];
99 inline void writeUnicode(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
102 rtl_uStringbuffer_insert(pBuffer
, pCapacity
, (*pBuffer
)->length
, &cChar
, 1);
112 /* Read any of the following:
114 - sequence of escape sequences representing character from eCharset,
115 translated to single UCS4 character; or
117 - pair of UTF-16 surrogates, translated to single UCS4 character; or
119 _ single UTF-16 character, extended to UCS4 character.
121 sal_uInt32
readUcs4(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
122 bool bEncoded
, rtl_TextEncoding eCharset
,
125 sal_uInt32 nChar
= *(*pBegin
)++;
128 if (nChar
== cEscapePrefix
&& bEncoded
&& pEnd
- *pBegin
>= 2
129 && (nWeight1
= getHexWeight((*pBegin
)[0])) >= 0
130 && (nWeight2
= getHexWeight((*pBegin
)[1])) >= 0)
133 nChar
= static_cast< sal_uInt32
>(nWeight1
<< 4 | nWeight2
);
136 else if (eCharset
== RTL_TEXTENCODING_UTF8
)
138 if (nChar
>= 0xC0 && nChar
<= 0xF4)
145 nEncoded
= (nChar
& 0x1F) << 6;
149 else if (nChar
<= 0xEF)
151 nEncoded
= (nChar
& 0x0F) << 12;
157 nEncoded
= (nChar
& 0x07) << 18;
161 sal_Unicode
const * p
= *pBegin
;
163 for (; nShift
>= 0; nShift
-= 6)
165 if (pEnd
- p
< 3 || p
[0] != cEscapePrefix
166 || (nWeight1
= getHexWeight(p
[1])) < 8
168 || (nWeight2
= getHexWeight(p
[2])) < 0)
174 nEncoded
|= ((nWeight1
& 3) << 4 | nWeight2
) << nShift
;
176 if (bUTF8
&& nEncoded
>= nMin
&& !isHighSurrogate(nEncoded
)
177 && !isLowSurrogate(nEncoded
) && nEncoded
<= 0x10FFFF)
184 *pType
= EscapeOctet
;
188 rtl::OStringBuffer aBuf
;
189 aBuf
.append(static_cast< char >(nChar
));
190 rtl_TextToUnicodeConverter aConverter
191 = rtl_createTextToUnicodeConverter(eCharset
);
192 sal_Unicode
const * p
= *pBegin
;
198 sal_Size nDstSize
= rtl_convertTextToUnicode(
199 aConverter
, 0, aBuf
.getStr(), aBuf
.getLength(), aDst
,
200 sizeof aDst
/ sizeof aDst
[0],
201 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
202 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
203 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
),
204 &nInfo
, &nConverted
);
209 == sal::static_int_cast
< sal_uInt32
>(
211 rtl_destroyTextToUnicodeConverter(aConverter
);
216 || (nDstSize
== 2 && isHighSurrogate(aDst
[0])
217 && isLowSurrogate(aDst
[1])));
219 ? aDst
[0] : combineSurrogates(aDst
[0], aDst
[1]);
221 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
222 && pEnd
- p
>= 3 && p
[0] == cEscapePrefix
223 && (nWeight1
= getHexWeight(p
[1])) >= 0
224 && (nWeight2
= getHexWeight(p
[2])) >= 0)
227 aBuf
.append(static_cast< char >(nWeight1
<< 4 | nWeight2
));
229 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
230 && p
!= pEnd
&& *p
<= 0x7F)
232 aBuf
.append(static_cast< char >(*p
++));
237 (nInfo
& RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
)
242 rtl_destroyTextToUnicodeConverter(aConverter
);
243 *pType
= EscapeOctet
;
250 return isHighSurrogate(nChar
) && *pBegin
< pEnd
251 && isLowSurrogate(**pBegin
) ?
252 combineSurrogates(nChar
, *(*pBegin
)++) : nChar
;
256 void writeUcs4(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
, sal_uInt32 nUtf32
)
258 OSL_ENSURE(nUtf32
<= 0x10FFFF, "bad UTF-32 char");
259 if (nUtf32
<= 0xFFFF) {
261 pBuffer
, pCapacity
, static_cast< sal_Unicode
>(nUtf32
));
266 static_cast< sal_Unicode
>(nUtf32
>> 10 | 0xD800));
269 static_cast< sal_Unicode
>((nUtf32
& 0x3FF) | 0xDC00));
273 void writeEscapeOctet(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
276 OSL_ENSURE(nOctet
<= 0xFF, "bad octet");
278 static sal_Unicode
const aHex
[16]
279 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
280 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
282 writeUnicode(pBuffer
, pCapacity
, cEscapePrefix
);
283 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
>> 4]);
284 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
& 15]);
287 bool writeEscapeChar(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
288 sal_uInt32 nUtf32
, rtl_TextEncoding eCharset
, bool bStrict
)
290 OSL_ENSURE(nUtf32
<= 0x10FFFF, "bad UTF-32 char");
291 if (eCharset
== RTL_TEXTENCODING_UTF8
) {
293 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
);
294 else if (nUtf32
< 0x800)
296 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 6 | 0xC0);
297 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
299 else if (nUtf32
< 0x10000)
301 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 12 | 0xE0);
302 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
303 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
307 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 18 | 0xF0);
308 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 12 & 0x3F) | 0x80);
309 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
310 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
313 rtl_UnicodeToTextConverter aConverter
314 = rtl_createUnicodeToTextConverter(eCharset
);
317 if (nUtf32
<= 0xFFFF)
319 aSrc
[0] = static_cast< sal_Unicode
>(nUtf32
);
324 aSrc
[0] = static_cast< sal_Unicode
>(
325 ((nUtf32
- 0x10000) >> 10) | 0xD800);
326 aSrc
[1] = static_cast< sal_Unicode
>(
327 ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00);
330 sal_Char aDst
[32]; // FIXME random value
333 sal_Size nDstSize
= rtl_convertUnicodeToText(
334 aConverter
, 0, aSrc
, nSrcSize
, aDst
, sizeof aDst
,
335 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
336 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
337 | RTL_UNICODETOTEXT_FLAGS_FLUSH
,
338 &nInfo
, &nConverted
);
339 OSL_ASSERT((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0);
340 rtl_destroyUnicodeToTextConverter(aConverter
);
342 OSL_ENSURE(nConverted
== nSrcSize
, "bad rtl_convertUnicodeToText");
343 for (sal_Size i
= 0; i
< nDstSize
; ++i
)
344 writeEscapeOctet(pBuffer
, pCapacity
,
345 static_cast< unsigned char >(aDst
[i
]));
346 // FIXME all octets are escaped, even if there is no need
351 writeUcs4(pBuffer
, pCapacity
, nUtf32
);
360 sal_Unicode
const * pBegin
;
361 sal_Unicode
const * pEnd
;
363 inline Component(): pBegin(0) {}
365 inline bool isPresent() const { return pBegin
!= 0; }
367 inline sal_Int32
getLength() const;
370 inline sal_Int32
Component::getLength() const
372 OSL_ENSURE(isPresent(), "taking length of non-present component");
373 return static_cast< sal_Int32
>(pEnd
- pBegin
);
379 Component aAuthority
;
385 void parseUriRef(rtl_uString
const * pUriRef
, Components
* pComponents
)
387 // This algorithm is liberal and accepts various forms of illegal input.
389 sal_Unicode
const * pBegin
= pUriRef
->buffer
;
390 sal_Unicode
const * pEnd
= pBegin
+ pUriRef
->length
;
391 sal_Unicode
const * pPos
= pBegin
;
393 if (pPos
!= pEnd
&& isAlpha(*pPos
))
394 for (sal_Unicode
const * p
= pPos
+ 1; p
!= pEnd
; ++p
)
397 pComponents
->aScheme
.pBegin
= pBegin
;
398 pComponents
->aScheme
.pEnd
= ++p
;
402 else if (!isAlpha(*p
) && !isDigit(*p
) && *p
!= '+' && *p
!= '-'
406 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
408 pComponents
->aAuthority
.pBegin
= pPos
;
410 while (pPos
!= pEnd
&& *pPos
!= '/' && *pPos
!= '?' && *pPos
!= '#')
412 pComponents
->aAuthority
.pEnd
= pPos
;
415 pComponents
->aPath
.pBegin
= pPos
;
416 while (pPos
!= pEnd
&& *pPos
!= '?' && * pPos
!= '#')
418 pComponents
->aPath
.pEnd
= pPos
;
420 if (pPos
!= pEnd
&& *pPos
== '?')
422 pComponents
->aQuery
.pBegin
= pPos
++;
423 while (pPos
!= pEnd
&& * pPos
!= '#')
425 pComponents
->aQuery
.pEnd
= pPos
;
430 OSL_ASSERT(*pPos
== '#');
431 pComponents
->aFragment
.pBegin
= pPos
;
432 pComponents
->aFragment
.pEnd
= pEnd
;
436 rtl::OUString
joinPaths(Component
const & rBasePath
, Component
const & rRelPath
)
438 OSL_ASSERT(rBasePath
.isPresent() && *rBasePath
.pBegin
== '/');
439 OSL_ASSERT(rRelPath
.isPresent());
441 // The invariant of aBuffer is that it always starts and ends with a slash
442 // (until probably right at the end of the algorithm, when the last segment
443 // of rRelPath is added, which does not necessarily end in a slash):
444 rtl::OUStringBuffer
aBuffer(rBasePath
.getLength() + rRelPath
.getLength());
445 // XXX numeric overflow
447 // Segments "." and ".." within rBasePath are not conisdered special (but
448 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
449 // bit unclear about this point:
450 sal_Int32 nFixed
= 1;
451 sal_Unicode
const * p
= rBasePath
.pBegin
+ 1;
452 for (sal_Unicode
const * q
= p
; q
!= rBasePath
.pEnd
; ++q
)
456 (q
- p
== 1 && p
[0] == '.') ||
457 (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
460 nFixed
= q
+ 1 - rBasePath
.pBegin
;
464 aBuffer
.append(rBasePath
.pBegin
, p
- rBasePath
.pBegin
);
467 if (p
!= rRelPath
.pEnd
)
470 sal_Unicode
const * q
= p
;
471 sal_Unicode
const * r
;
474 if (q
== rRelPath
.pEnd
)
486 if (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
488 // Erroneous excess segments ".." within rRelPath are left
489 // intact, as the examples in RFC 2396, section C.2, suggest:
490 sal_Int32 i
= aBuffer
.getLength() - 1;
493 aBuffer
.append(p
, r
- p
);
498 while (aBuffer
.charAt(i
- 1) != '/')
500 aBuffer
.setLength(i
);
503 else if (q
- p
!= 1 || *p
!= '.')
504 aBuffer
.append(p
, r
- p
);
505 if (q
== rRelPath
.pEnd
)
510 return aBuffer
.makeStringAndClear();
515 sal_Bool
const * SAL_CALL
rtl_getUriCharClass(rtl_UriCharClass eCharClass
)
518 static sal_Bool
const aCharClass
[][nCharClassSize
]
519 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
528 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
534 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
537 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
543 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
546 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
551 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
552 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
555 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
561 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
564 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
568 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
569 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
570 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
571 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
573 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
577 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
579 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
582 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
586 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
588 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
593 && (sal::static_int_cast
< std::size_t >(eCharClass
)
594 < sizeof aCharClass
/ sizeof aCharClass
[0])),
596 return aCharClass
[eCharClass
];
599 void SAL_CALL
rtl_uriEncode(rtl_uString
* pText
, sal_Bool
const * pCharClass
,
600 rtl_UriEncodeMechanism eMechanism
,
601 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
604 OSL_ENSURE(!pCharClass
[0x25], "bad pCharClass");
605 // make sure the percent sign is encoded...
607 sal_Unicode
const * p
= pText
->buffer
;
608 sal_Unicode
const * pEnd
= p
+ pText
->length
;
609 sal_Int32 nCapacity
= 0;
610 rtl_uString_new(pResult
);
614 sal_uInt32 nUtf32
= readUcs4(
616 (eMechanism
== rtl_UriEncodeKeepEscapes
617 || eMechanism
== rtl_UriEncodeCheckEscapes
618 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
),
623 if (isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
624 writeUnicode(pResult
, &nCapacity
,
625 static_cast< sal_Unicode
>(nUtf32
));
626 else if (!writeEscapeChar(
627 pResult
, &nCapacity
, nUtf32
, eCharset
,
628 (eMechanism
== rtl_UriEncodeStrict
629 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
631 rtl_uString_new(pResult
);
637 if (eMechanism
== rtl_UriEncodeCheckEscapes
638 && isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
639 writeUnicode(pResult
, &nCapacity
,
640 static_cast< sal_Unicode
>(nUtf32
));
641 else if (!writeEscapeChar(
642 pResult
, &nCapacity
, nUtf32
, eCharset
,
643 (eMechanism
== rtl_UriEncodeStrict
644 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
646 rtl_uString_new(pResult
);
652 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
658 void SAL_CALL
rtl_uriDecode(rtl_uString
* pText
,
659 rtl_UriDecodeMechanism eMechanism
,
660 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
665 case rtl_UriDecodeNone
:
666 rtl_uString_assign(pResult
, pText
);
669 case rtl_UriDecodeToIuri
:
670 eCharset
= RTL_TEXTENCODING_UTF8
;
671 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
673 sal_Unicode
const * p
= pText
->buffer
;
674 sal_Unicode
const * pEnd
= p
+ pText
->length
;
675 sal_Int32 nCapacity
= 0;
676 rtl_uString_new(pResult
);
680 sal_uInt32 nUtf32
= readUcs4(&p
, pEnd
, true, eCharset
, &eType
);
684 if (nUtf32
<= 0x7F && eMechanism
== rtl_UriDecodeToIuri
)
686 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
690 writeUcs4(pResult
, &nCapacity
, nUtf32
);
694 if (eMechanism
== rtl_UriDecodeStrict
) {
695 rtl_uString_new(pResult
);
698 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
707 sal_Bool SAL_CALL
rtl_uriConvertRelToAbs(rtl_uString
* pBaseUriRef
,
708 rtl_uString
* pRelUriRef
,
709 rtl_uString
** pResult
,
710 rtl_uString
** pException
)
713 // If pRelUriRef starts with a scheme component it is an absolute URI
714 // reference, and we are done (i.e., this algorithm does not support
715 // backwards-compatible relative URIs starting with a scheme component, see
716 // RFC 2396, section 5.2, step 3):
717 Components aRelComponents
;
718 parseUriRef(pRelUriRef
, &aRelComponents
);
719 if (aRelComponents
.aScheme
.isPresent())
721 rtl_uString_assign(pResult
, pRelUriRef
);
725 // Parse pBaseUriRef; if the scheme component is not present or not valid,
726 // or the path component is not empty and starts with anything but a slash,
727 // an exception is raised:
728 Components aBaseComponents
;
729 parseUriRef(pBaseUriRef
, &aBaseComponents
);
730 if (!aBaseComponents
.aScheme
.isPresent())
732 rtl::OUString
aMessage(pBaseUriRef
);
733 aMessage
+= rtl::OUString(
734 RTL_CONSTASCII_USTRINGPARAM(
735 " does not start with a scheme component"));
736 rtl_uString_assign(pException
,
737 const_cast< rtl::OUString
& >(aMessage
).pData
);
740 if (aBaseComponents
.aPath
.pBegin
!= aBaseComponents
.aPath
.pEnd
741 && *aBaseComponents
.aPath
.pBegin
!= '/')
743 rtl::OUString
aMessage(pBaseUriRef
);
744 aMessage
+= rtl::OUString(
745 RTL_CONSTASCII_USTRINGPARAM(
746 "path component does not start with slash"));
747 rtl_uString_assign(pException
, aMessage
.pData
);
751 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
752 // into an absolute one (if the relative URI is a reference to the "current
753 // document," the "current document" is here taken to be the base URI):
754 rtl::OUStringBuffer aBuffer
;
755 aBuffer
.append(aBaseComponents
.aScheme
.pBegin
,
756 aBaseComponents
.aScheme
.getLength());
757 if (aRelComponents
.aAuthority
.isPresent())
759 aBuffer
.append(aRelComponents
.aAuthority
.pBegin
,
760 aRelComponents
.aAuthority
.getLength());
761 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
762 aRelComponents
.aPath
.getLength());
763 if (aRelComponents
.aQuery
.isPresent())
764 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
765 aRelComponents
.aQuery
.getLength());
769 if (aBaseComponents
.aAuthority
.isPresent())
770 aBuffer
.append(aBaseComponents
.aAuthority
.pBegin
,
771 aBaseComponents
.aAuthority
.getLength());
772 if (aRelComponents
.aPath
.pBegin
== aRelComponents
.aPath
.pEnd
773 && !aRelComponents
.aQuery
.isPresent())
775 aBuffer
.append(aBaseComponents
.aPath
.pBegin
,
776 aBaseComponents
.aPath
.getLength());
777 if (aBaseComponents
.aQuery
.isPresent())
778 aBuffer
.append(aBaseComponents
.aQuery
.pBegin
,
779 aBaseComponents
.aQuery
.getLength());
783 if (*aRelComponents
.aPath
.pBegin
== '/')
784 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
785 aRelComponents
.aPath
.getLength());
787 aBuffer
.append(joinPaths(aBaseComponents
.aPath
,
788 aRelComponents
.aPath
));
789 if (aRelComponents
.aQuery
.isPresent())
790 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
791 aRelComponents
.aQuery
.getLength());
794 if (aRelComponents
.aFragment
.isPresent())
795 aBuffer
.append(aRelComponents
.aFragment
.pBegin
,
796 aRelComponents
.aFragment
.getLength());
797 rtl_uString_assign(pResult
, aBuffer
.makeStringAndClear().pData
);