1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "surrogates.hxx"
22 #include "osl/diagnose.h"
23 #include "rtl/character.hxx"
24 #include "rtl/strbuf.hxx"
25 #include "rtl/textenc.h"
26 #include "rtl/textcvt.h"
28 #include "rtl/ustrbuf.h"
29 #include "rtl/ustrbuf.hxx"
30 #include "rtl/ustring.h"
31 #include "rtl/ustring.hxx"
32 #include "sal/types.h"
33 #include "sal/macros.h"
39 std::size_t const nCharClassSize
= 128;
41 sal_Unicode
const cEscapePrefix
= 0x25; // '%'
43 inline bool isHighSurrogate(sal_uInt32 nUtf16
)
45 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16
);
48 inline bool isLowSurrogate(sal_uInt32 nUtf16
)
50 return SAL_RTL_IS_LOW_SURROGATE(nUtf16
);
53 inline sal_uInt32
combineSurrogates(sal_uInt32 high
, sal_uInt32 low
)
55 return SAL_RTL_COMBINE_SURROGATES(high
, low
);
58 inline int getHexWeight(sal_uInt32 nUtf32
)
60 return nUtf32
>= 0x30 && nUtf32
<= 0x39 ? // '0'--'9'
61 static_cast< int >(nUtf32
- 0x30) :
62 nUtf32
>= 0x41 && nUtf32
<= 0x46 ? // 'A'--'F'
63 static_cast< int >(nUtf32
- 0x41 + 10) :
64 nUtf32
>= 0x61 && nUtf32
<= 0x66 ? // 'a'--'f'
65 static_cast< int >(nUtf32
- 0x61 + 10) :
66 -1; // not a hex digit
69 inline bool isValid(sal_Bool
const * pCharClass
, sal_uInt32 nUtf32
)
71 return nUtf32
< nCharClassSize
&& pCharClass
[nUtf32
];
74 inline void writeUnicode(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
77 rtl_uStringbuffer_insert(pBuffer
, pCapacity
, (*pBuffer
)->length
, &cChar
, 1);
87 /* Read any of the following:
89 - sequence of escape sequences representing character from eCharset,
90 translated to single UCS4 character; or
92 - pair of UTF-16 surrogates, translated to single UCS4 character; or
94 _ single UTF-16 character, extended to UCS4 character.
96 sal_uInt32
readUcs4(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
97 bool bEncoded
, rtl_TextEncoding eCharset
,
100 sal_uInt32 nChar
= *(*pBegin
)++;
103 if (nChar
== cEscapePrefix
&& bEncoded
&& pEnd
- *pBegin
>= 2
104 && (nWeight1
= getHexWeight((*pBegin
)[0])) >= 0
105 && (nWeight2
= getHexWeight((*pBegin
)[1])) >= 0)
108 nChar
= static_cast< sal_uInt32
>(nWeight1
<< 4 | nWeight2
);
111 else if (eCharset
== RTL_TEXTENCODING_UTF8
)
113 if (nChar
>= 0xC0 && nChar
<= 0xF4)
120 nEncoded
= (nChar
& 0x1F) << 6;
124 else if (nChar
<= 0xEF)
126 nEncoded
= (nChar
& 0x0F) << 12;
132 nEncoded
= (nChar
& 0x07) << 18;
136 sal_Unicode
const * p
= *pBegin
;
138 for (; nShift
>= 0; nShift
-= 6)
140 if (pEnd
- p
< 3 || p
[0] != cEscapePrefix
141 || (nWeight1
= getHexWeight(p
[1])) < 8
143 || (nWeight2
= getHexWeight(p
[2])) < 0)
149 nEncoded
|= ((nWeight1
& 3) << 4 | nWeight2
) << nShift
;
151 if (bUTF8
&& nEncoded
>= nMin
&& !isHighSurrogate(nEncoded
)
152 && !isLowSurrogate(nEncoded
) && nEncoded
<= 0x10FFFF)
159 *pType
= EscapeOctet
;
163 rtl::OStringBuffer aBuf
;
164 aBuf
.append(static_cast< char >(nChar
));
165 rtl_TextToUnicodeConverter aConverter
166 = rtl_createTextToUnicodeConverter(eCharset
);
167 sal_Unicode
const * p
= *pBegin
;
173 sal_Size nDstSize
= rtl_convertTextToUnicode(
174 aConverter
, 0, aBuf
.getStr(), aBuf
.getLength(), aDst
,
175 SAL_N_ELEMENTS( aDst
),
176 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
177 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
178 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
),
179 &nInfo
, &nConverted
);
183 == sal::static_int_cast
< sal_uInt32
>(
185 rtl_destroyTextToUnicodeConverter(aConverter
);
188 assert( nDstSize
== 1
189 || (nDstSize
== 2 && isHighSurrogate(aDst
[0])
190 && isLowSurrogate(aDst
[1])));
192 ? aDst
[0] : combineSurrogates(aDst
[0], aDst
[1]);
194 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
195 && pEnd
- p
>= 3 && p
[0] == cEscapePrefix
196 && (nWeight1
= getHexWeight(p
[1])) >= 0
197 && (nWeight2
= getHexWeight(p
[2])) >= 0)
200 aBuf
.append(static_cast< char >(nWeight1
<< 4 | nWeight2
));
202 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
203 && p
!= pEnd
&& *p
<= 0x7F)
205 aBuf
.append(static_cast< char >(*p
++));
210 (nInfo
& RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
)
215 rtl_destroyTextToUnicodeConverter(aConverter
);
216 *pType
= EscapeOctet
;
223 return isHighSurrogate(nChar
) && *pBegin
< pEnd
224 && isLowSurrogate(**pBegin
) ?
225 combineSurrogates(nChar
, *(*pBegin
)++) : nChar
;
229 void writeUcs4(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
, sal_uInt32 nUtf32
)
231 assert(nUtf32
<= 0x10FFFF); // bad UTF-32 char
232 if (nUtf32
<= 0xFFFF) {
234 pBuffer
, pCapacity
, static_cast< sal_Unicode
>(nUtf32
));
239 static_cast< sal_Unicode
>(nUtf32
>> 10 | 0xD800));
242 static_cast< sal_Unicode
>((nUtf32
& 0x3FF) | 0xDC00));
246 void writeEscapeOctet(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
249 assert(nOctet
<= 0xFF); // bad octet
251 static sal_Unicode
const aHex
[16]
252 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
253 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
255 writeUnicode(pBuffer
, pCapacity
, cEscapePrefix
);
256 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
>> 4]);
257 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
& 15]);
260 bool writeEscapeChar(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
261 sal_uInt32 nUtf32
, rtl_TextEncoding eCharset
, bool bStrict
)
263 assert(nUtf32
<= 0x10FFFF); // bad UTF-32 char
264 if (eCharset
== RTL_TEXTENCODING_UTF8
) {
266 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
);
267 else if (nUtf32
< 0x800)
269 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 6 | 0xC0);
270 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
272 else if (nUtf32
< 0x10000)
274 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 12 | 0xE0);
275 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
276 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
280 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 18 | 0xF0);
281 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 12 & 0x3F) | 0x80);
282 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
283 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
286 rtl_UnicodeToTextConverter aConverter
287 = rtl_createUnicodeToTextConverter(eCharset
);
290 if (nUtf32
<= 0xFFFF)
292 aSrc
[0] = static_cast< sal_Unicode
>(nUtf32
);
297 aSrc
[0] = static_cast< sal_Unicode
>(
298 ((nUtf32
- 0x10000) >> 10) | 0xD800);
299 aSrc
[1] = static_cast< sal_Unicode
>(
300 ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00);
303 sal_Char aDst
[32]; // FIXME random value
306 sal_Size nDstSize
= rtl_convertUnicodeToText(
307 aConverter
, 0, aSrc
, nSrcSize
, aDst
, sizeof aDst
,
308 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
309 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
310 | RTL_UNICODETOTEXT_FLAGS_FLUSH
,
311 &nInfo
, &nConverted
);
312 assert((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0);
313 rtl_destroyUnicodeToTextConverter(aConverter
);
315 assert(nConverted
== nSrcSize
); // bad rtl_convertUnicodeToText
316 for (sal_Size i
= 0; i
< nDstSize
; ++i
)
317 writeEscapeOctet(pBuffer
, pCapacity
,
318 static_cast< unsigned char >(aDst
[i
]));
319 // FIXME all octets are escaped, even if there is no need
324 writeUcs4(pBuffer
, pCapacity
, nUtf32
);
333 sal_Unicode
const * pBegin
;
334 sal_Unicode
const * pEnd
;
336 inline Component(): pBegin(0), pEnd(0) {}
338 inline bool isPresent() const { return pBegin
!= 0; }
340 inline sal_Int32
getLength() const;
343 inline sal_Int32
Component::getLength() const
345 assert(isPresent()); // taking length of non-present component
346 return static_cast< sal_Int32
>(pEnd
- pBegin
);
352 Component aAuthority
;
358 void parseUriRef(rtl_uString
const * pUriRef
, Components
* pComponents
)
360 // This algorithm is liberal and accepts various forms of illegal input.
362 sal_Unicode
const * pBegin
= pUriRef
->buffer
;
363 sal_Unicode
const * pEnd
= pBegin
+ pUriRef
->length
;
364 sal_Unicode
const * pPos
= pBegin
;
366 if (pPos
!= pEnd
&& rtl::isAsciiAlpha(*pPos
))
368 for (sal_Unicode
const * p
= pPos
+ 1; p
!= pEnd
; ++p
)
372 pComponents
->aScheme
.pBegin
= pBegin
;
373 pComponents
->aScheme
.pEnd
= ++p
;
377 else if (!rtl::isAsciiAlphanumeric(*p
) && *p
!= '+' && *p
!= '-'
385 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
387 pComponents
->aAuthority
.pBegin
= pPos
;
389 while (pPos
!= pEnd
&& *pPos
!= '/' && *pPos
!= '?' && *pPos
!= '#')
391 pComponents
->aAuthority
.pEnd
= pPos
;
394 pComponents
->aPath
.pBegin
= pPos
;
395 while (pPos
!= pEnd
&& *pPos
!= '?' && * pPos
!= '#')
397 pComponents
->aPath
.pEnd
= pPos
;
399 if (pPos
!= pEnd
&& *pPos
== '?')
401 pComponents
->aQuery
.pBegin
= pPos
++;
402 while (pPos
!= pEnd
&& * pPos
!= '#')
404 pComponents
->aQuery
.pEnd
= pPos
;
409 assert(*pPos
== '#');
410 pComponents
->aFragment
.pBegin
= pPos
;
411 pComponents
->aFragment
.pEnd
= pEnd
;
415 rtl::OUString
joinPaths(Component
const & rBasePath
, Component
const & rRelPath
)
417 assert(rBasePath
.isPresent() && *rBasePath
.pBegin
== '/');
418 assert(rRelPath
.isPresent());
420 // The invariant of aBuffer is that it always starts and ends with a slash
421 // (until probably right at the end of the algorithm, when the last segment
422 // of rRelPath is added, which does not necessarily end in a slash):
423 rtl::OUStringBuffer
aBuffer(rBasePath
.getLength() + rRelPath
.getLength());
424 // XXX numeric overflow
426 // Segments "." and ".." within rBasePath are not conisdered special (but
427 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
428 // bit unclear about this point:
429 sal_Int32 nFixed
= 1;
430 sal_Unicode
const * p
= rBasePath
.pBegin
+ 1;
431 for (sal_Unicode
const * q
= p
; q
!= rBasePath
.pEnd
; ++q
)
435 (q
- p
== 1 && p
[0] == '.') ||
436 (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
439 nFixed
= q
+ 1 - rBasePath
.pBegin
;
443 aBuffer
.append(rBasePath
.pBegin
, p
- rBasePath
.pBegin
);
446 if (p
!= rRelPath
.pEnd
)
449 sal_Unicode
const * q
= p
;
450 sal_Unicode
const * r
;
453 if (q
== rRelPath
.pEnd
)
465 if (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
467 // Erroneous excess segments ".." within rRelPath are left
468 // intact, as the examples in RFC 2396, section C.2, suggest:
469 sal_Int32 i
= aBuffer
.getLength() - 1;
472 aBuffer
.append(p
, r
- p
);
477 while (i
> 0 && aBuffer
[i
- 1] != '/')
479 aBuffer
.setLength(i
);
482 else if (q
- p
!= 1 || *p
!= '.')
483 aBuffer
.append(p
, r
- p
);
484 if (q
== rRelPath
.pEnd
)
489 return aBuffer
.makeStringAndClear();
494 sal_Bool
const * SAL_CALL
rtl_getUriCharClass(rtl_UriCharClass eCharClass
)
497 static sal_Bool
const aCharClass
[][nCharClassSize
]
498 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
507 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
510 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
513 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
516 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
519 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
522 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
525 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
531 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
534 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
536 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
540 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
543 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
547 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
548 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
549 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
552 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
558 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
561 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
564 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
566 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
567 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
568 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
572 && (sal::static_int_cast
< std::size_t >(eCharClass
)
573 < SAL_N_ELEMENTS(aCharClass
)))); // bad eCharClass
574 return aCharClass
[eCharClass
];
577 void SAL_CALL
rtl_uriEncode(rtl_uString
* pText
, sal_Bool
const * pCharClass
,
578 rtl_UriEncodeMechanism eMechanism
,
579 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
582 assert(!pCharClass
[0x25]); // make sure the percent sign is encoded...
584 sal_Unicode
const * p
= pText
->buffer
;
585 sal_Unicode
const * pEnd
= p
+ pText
->length
;
586 sal_Int32 nCapacity
= pText
->length
;
587 rtl_uString_new_WithLength(pResult
, nCapacity
);
591 sal_uInt32 nUtf32
= readUcs4(
593 (eMechanism
== rtl_UriEncodeKeepEscapes
594 || eMechanism
== rtl_UriEncodeCheckEscapes
595 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
),
600 if (isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
601 writeUnicode(pResult
, &nCapacity
,
602 static_cast< sal_Unicode
>(nUtf32
));
603 else if (!writeEscapeChar(
604 pResult
, &nCapacity
, nUtf32
, eCharset
,
605 (eMechanism
== rtl_UriEncodeStrict
606 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
608 rtl_uString_new(pResult
);
614 if (eMechanism
== rtl_UriEncodeCheckEscapes
615 && isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
616 writeUnicode(pResult
, &nCapacity
,
617 static_cast< sal_Unicode
>(nUtf32
));
618 else if (!writeEscapeChar(
619 pResult
, &nCapacity
, nUtf32
, eCharset
,
620 (eMechanism
== rtl_UriEncodeStrict
621 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
623 rtl_uString_new(pResult
);
629 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
633 *pResult
= rtl_uStringBuffer_makeStringAndClear( pResult
, &nCapacity
);
636 void SAL_CALL
rtl_uriDecode(rtl_uString
* pText
,
637 rtl_UriDecodeMechanism eMechanism
,
638 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
643 case rtl_UriDecodeNone
:
644 rtl_uString_assign(pResult
, pText
);
647 case rtl_UriDecodeToIuri
:
648 eCharset
= RTL_TEXTENCODING_UTF8
;
649 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
651 sal_Unicode
const * p
= pText
->buffer
;
652 sal_Unicode
const * pEnd
= p
+ pText
->length
;
653 sal_Int32 nCapacity
= pText
->length
;
654 rtl_uString_new_WithLength(pResult
, nCapacity
);
658 sal_uInt32 nUtf32
= readUcs4(&p
, pEnd
, true, eCharset
, &eType
);
662 if (nUtf32
<= 0x7F && eMechanism
== rtl_UriDecodeToIuri
)
664 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
668 writeUcs4(pResult
, &nCapacity
, nUtf32
);
672 if (eMechanism
== rtl_UriDecodeStrict
) {
673 rtl_uString_new(pResult
);
676 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
680 *pResult
= rtl_uStringBuffer_makeStringAndClear( pResult
, &nCapacity
);
686 sal_Bool SAL_CALL
rtl_uriConvertRelToAbs(rtl_uString
* pBaseUriRef
,
687 rtl_uString
* pRelUriRef
,
688 rtl_uString
** pResult
,
689 rtl_uString
** pException
)
692 // If pRelUriRef starts with a scheme component it is an absolute URI
693 // reference, and we are done (i.e., this algorithm does not support
694 // backwards-compatible relative URIs starting with a scheme component, see
695 // RFC 2396, section 5.2, step 3):
696 Components aRelComponents
;
697 parseUriRef(pRelUriRef
, &aRelComponents
);
698 if (aRelComponents
.aScheme
.isPresent())
700 rtl_uString_assign(pResult
, pRelUriRef
);
704 // Parse pBaseUriRef; if the scheme component is not present or not valid,
705 // or the path component is not empty and starts with anything but a slash,
706 // an exception is raised:
707 Components aBaseComponents
;
708 parseUriRef(pBaseUriRef
, &aBaseComponents
);
709 if (!aBaseComponents
.aScheme
.isPresent())
711 rtl::OUString
aMessage(pBaseUriRef
);
712 aMessage
+= rtl::OUString(
713 " does not start with a scheme component");
714 rtl_uString_assign(pException
,
715 const_cast< rtl::OUString
& >(aMessage
).pData
);
718 if (aBaseComponents
.aPath
.pBegin
!= aBaseComponents
.aPath
.pEnd
719 && *aBaseComponents
.aPath
.pBegin
!= '/')
721 rtl::OUString
aMessage(pBaseUriRef
);
722 aMessage
+= rtl::OUString(
723 "path component does not start with slash");
724 rtl_uString_assign(pException
, aMessage
.pData
);
728 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
729 // into an absolute one (if the relative URI is a reference to the "current
730 // document," the "current document" is here taken to be the base URI):
731 rtl::OUStringBuffer aBuffer
;
732 aBuffer
.append(aBaseComponents
.aScheme
.pBegin
,
733 aBaseComponents
.aScheme
.getLength());
734 if (aRelComponents
.aAuthority
.isPresent())
736 aBuffer
.append(aRelComponents
.aAuthority
.pBegin
,
737 aRelComponents
.aAuthority
.getLength());
738 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
739 aRelComponents
.aPath
.getLength());
740 if (aRelComponents
.aQuery
.isPresent())
741 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
742 aRelComponents
.aQuery
.getLength());
746 if (aBaseComponents
.aAuthority
.isPresent())
747 aBuffer
.append(aBaseComponents
.aAuthority
.pBegin
,
748 aBaseComponents
.aAuthority
.getLength());
749 if (aRelComponents
.aPath
.pBegin
== aRelComponents
.aPath
.pEnd
750 && !aRelComponents
.aQuery
.isPresent())
752 aBuffer
.append(aBaseComponents
.aPath
.pBegin
,
753 aBaseComponents
.aPath
.getLength());
754 if (aBaseComponents
.aQuery
.isPresent())
755 aBuffer
.append(aBaseComponents
.aQuery
.pBegin
,
756 aBaseComponents
.aQuery
.getLength());
760 if (*aRelComponents
.aPath
.pBegin
== '/')
761 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
762 aRelComponents
.aPath
.getLength());
764 aBuffer
.append(joinPaths(aBaseComponents
.aPath
,
765 aRelComponents
.aPath
));
766 if (aRelComponents
.aQuery
.isPresent())
767 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
768 aRelComponents
.aQuery
.getLength());
771 if (aRelComponents
.aFragment
.isPresent())
772 aBuffer
.append(aRelComponents
.aFragment
.pBegin
,
773 aRelComponents
.aFragment
.getLength());
774 rtl_uString_assign(pResult
, aBuffer
.makeStringAndClear().pData
);
778 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */