1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: uri.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sal.hxx"
36 #include "surrogates.h"
38 #include "osl/diagnose.h"
39 #include "rtl/strbuf.hxx"
40 #include "rtl/textenc.h"
41 #include "rtl/textcvt.h"
43 #include "rtl/ustrbuf.h"
44 #include "rtl/ustrbuf.hxx"
45 #include "rtl/ustring.h"
46 #include "rtl/ustring.hxx"
47 #include "sal/types.h"
53 std::size_t const nCharClassSize
= 128;
55 sal_Unicode
const cEscapePrefix
= 0x25; // '%'
57 inline bool isDigit(sal_uInt32 nUtf32
)
59 return nUtf32
>= 0x30 && nUtf32
<= 0x39; // '0'--'9'
62 inline bool isAlpha(sal_uInt32 nUtf32
)
66 (nUtf32
>= 0x41 && nUtf32
<= 0x5A) ||
67 (nUtf32
>= 0x61 && nUtf32
<= 0x7A)
71 inline bool isHighSurrogate(sal_uInt32 nUtf16
)
73 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16
);
76 inline bool isLowSurrogate(sal_uInt32 nUtf16
)
78 return SAL_RTL_IS_LOW_SURROGATE(nUtf16
);
81 inline sal_uInt32
combineSurrogates(sal_uInt32 high
, sal_uInt32 low
)
83 return SAL_RTL_COMBINE_SURROGATES(high
, low
);
86 inline int getHexWeight(sal_uInt32 nUtf32
)
88 return nUtf32
>= 0x30 && nUtf32
<= 0x39 ? // '0'--'9'
89 static_cast< int >(nUtf32
- 0x30) :
90 nUtf32
>= 0x41 && nUtf32
<= 0x46 ? // 'A'--'F'
91 static_cast< int >(nUtf32
- 0x41 + 10) :
92 nUtf32
>= 0x61 && nUtf32
<= 0x66 ? // 'a'--'f'
93 static_cast< int >(nUtf32
- 0x61 + 10) :
94 -1; // not a hex digit
97 inline bool isValid(sal_Bool
const * pCharClass
, sal_uInt32 nUtf32
)
99 return nUtf32
< nCharClassSize
&& pCharClass
[nUtf32
];
102 inline void writeUnicode(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
105 rtl_uStringbuffer_insert(pBuffer
, pCapacity
, (*pBuffer
)->length
, &cChar
, 1);
115 /* Read any of the following:
117 - sequence of escape sequences representing character from eCharset,
118 translated to single UCS4 character; or
120 - pair of UTF-16 surrogates, translated to single UCS4 character; or
122 _ single UTF-16 character, extended to UCS4 character.
124 sal_uInt32
readUcs4(sal_Unicode
const ** pBegin
, sal_Unicode
const * pEnd
,
125 bool bEncoded
, rtl_TextEncoding eCharset
,
128 sal_uInt32 nChar
= *(*pBegin
)++;
131 if (nChar
== cEscapePrefix
&& bEncoded
&& pEnd
- *pBegin
>= 2
132 && (nWeight1
= getHexWeight((*pBegin
)[0])) >= 0
133 && (nWeight2
= getHexWeight((*pBegin
)[1])) >= 0)
136 nChar
= static_cast< sal_uInt32
>(nWeight1
<< 4 | nWeight2
);
139 else if (eCharset
== RTL_TEXTENCODING_UTF8
)
141 if (nChar
>= 0xC0 && nChar
<= 0xF4)
148 nEncoded
= (nChar
& 0x1F) << 6;
152 else if (nChar
<= 0xEF)
154 nEncoded
= (nChar
& 0x0F) << 12;
160 nEncoded
= (nChar
& 0x07) << 18;
164 sal_Unicode
const * p
= *pBegin
;
166 for (; nShift
>= 0; nShift
-= 6)
168 if (pEnd
- p
< 3 || p
[0] != cEscapePrefix
169 || (nWeight1
= getHexWeight(p
[1])) < 8
171 || (nWeight2
= getHexWeight(p
[2])) < 0)
177 nEncoded
|= ((nWeight1
& 3) << 4 | nWeight2
) << nShift
;
179 if (bUTF8
&& nEncoded
>= nMin
&& !isHighSurrogate(nEncoded
)
180 && !isLowSurrogate(nEncoded
) && nEncoded
<= 0x10FFFF)
187 *pType
= EscapeOctet
;
191 rtl::OStringBuffer aBuf
;
192 aBuf
.append(static_cast< char >(nChar
));
193 rtl_TextToUnicodeConverter aConverter
194 = rtl_createTextToUnicodeConverter(eCharset
);
195 sal_Unicode
const * p
= *pBegin
;
201 sal_Size nDstSize
= rtl_convertTextToUnicode(
202 aConverter
, 0, aBuf
.getStr(), aBuf
.getLength(), aDst
,
203 sizeof aDst
/ sizeof aDst
[0],
204 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
205 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
206 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
),
207 &nInfo
, &nConverted
);
212 == sal::static_int_cast
< sal_uInt32
>(
214 rtl_destroyTextToUnicodeConverter(aConverter
);
219 || (nDstSize
== 2 && isHighSurrogate(aDst
[0])
220 && isLowSurrogate(aDst
[1])));
222 ? aDst
[0] : combineSurrogates(aDst
[0], aDst
[1]);
224 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
225 && pEnd
- p
>= 3 && p
[0] == cEscapePrefix
226 && (nWeight1
= getHexWeight(p
[1])) >= 0
227 && (nWeight2
= getHexWeight(p
[2])) >= 0)
230 aBuf
.append(static_cast< char >(nWeight1
<< 4 | nWeight2
));
232 else if (nInfo
== RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
233 && p
!= pEnd
&& *p
<= 0x7F)
235 aBuf
.append(static_cast< char >(*p
++));
240 (nInfo
& RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL
)
245 rtl_destroyTextToUnicodeConverter(aConverter
);
246 *pType
= EscapeOctet
;
253 return isHighSurrogate(nChar
) && *pBegin
< pEnd
254 && isLowSurrogate(**pBegin
) ?
255 combineSurrogates(nChar
, *(*pBegin
)++) : nChar
;
259 void writeUcs4(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
, sal_uInt32 nUtf32
)
261 OSL_ENSURE(nUtf32
<= 0x10FFFF, "bad UTF-32 char");
262 if (nUtf32
<= 0xFFFF) {
264 pBuffer
, pCapacity
, static_cast< sal_Unicode
>(nUtf32
));
269 static_cast< sal_Unicode
>(nUtf32
>> 10 | 0xD800));
272 static_cast< sal_Unicode
>((nUtf32
& 0x3FF) | 0xDC00));
276 void writeEscapeOctet(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
279 OSL_ENSURE(nOctet
<= 0xFF, "bad octet");
281 static sal_Unicode
const aHex
[16]
282 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
283 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
285 writeUnicode(pBuffer
, pCapacity
, cEscapePrefix
);
286 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
>> 4]);
287 writeUnicode(pBuffer
, pCapacity
, aHex
[nOctet
& 15]);
290 bool writeEscapeChar(rtl_uString
** pBuffer
, sal_Int32
* pCapacity
,
291 sal_uInt32 nUtf32
, rtl_TextEncoding eCharset
, bool bStrict
)
293 OSL_ENSURE(nUtf32
<= 0x10FFFF, "bad UTF-32 char");
294 if (eCharset
== RTL_TEXTENCODING_UTF8
) {
296 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
);
297 else if (nUtf32
< 0x800)
299 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 6 | 0xC0);
300 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
302 else if (nUtf32
< 0x10000)
304 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 12 | 0xE0);
305 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
306 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
310 writeEscapeOctet(pBuffer
, pCapacity
, nUtf32
>> 18 | 0xF0);
311 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 12 & 0x3F) | 0x80);
312 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
>> 6 & 0x3F) | 0x80);
313 writeEscapeOctet(pBuffer
, pCapacity
, (nUtf32
& 0x3F) | 0x80);
316 rtl_UnicodeToTextConverter aConverter
317 = rtl_createUnicodeToTextConverter(eCharset
);
320 if (nUtf32
<= 0xFFFF)
322 aSrc
[0] = static_cast< sal_Unicode
>(nUtf32
);
327 aSrc
[0] = static_cast< sal_Unicode
>(
328 ((nUtf32
- 0x10000) >> 10) | 0xD800);
329 aSrc
[1] = static_cast< sal_Unicode
>(
330 ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00);
333 sal_Char aDst
[32]; // FIXME random value
336 sal_Size nDstSize
= rtl_convertUnicodeToText(
337 aConverter
, 0, aSrc
, nSrcSize
, aDst
, sizeof aDst
,
338 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
339 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
340 | RTL_UNICODETOTEXT_FLAGS_FLUSH
,
341 &nInfo
, &nConverted
);
342 OSL_ASSERT((nInfo
& RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
) == 0);
343 rtl_destroyUnicodeToTextConverter(aConverter
);
345 OSL_ENSURE(nConverted
== nSrcSize
, "bad rtl_convertUnicodeToText");
346 for (sal_Size i
= 0; i
< nDstSize
; ++i
)
347 writeEscapeOctet(pBuffer
, pCapacity
,
348 static_cast< unsigned char >(aDst
[i
]));
349 // FIXME all octets are escaped, even if there is no need
354 writeUcs4(pBuffer
, pCapacity
, nUtf32
);
363 sal_Unicode
const * pBegin
;
364 sal_Unicode
const * pEnd
;
366 inline Component(): pBegin(0) {}
368 inline bool isPresent() const { return pBegin
!= 0; }
370 inline sal_Int32
getLength() const;
373 inline sal_Int32
Component::getLength() const
375 OSL_ENSURE(isPresent(), "taking length of non-present component");
376 return static_cast< sal_Int32
>(pEnd
- pBegin
);
382 Component aAuthority
;
388 void parseUriRef(rtl_uString
const * pUriRef
, Components
* pComponents
)
390 // This algorithm is liberal and accepts various forms of illegal input.
392 sal_Unicode
const * pBegin
= pUriRef
->buffer
;
393 sal_Unicode
const * pEnd
= pBegin
+ pUriRef
->length
;
394 sal_Unicode
const * pPos
= pBegin
;
396 if (pPos
!= pEnd
&& isAlpha(*pPos
))
397 for (sal_Unicode
const * p
= pPos
+ 1; p
!= pEnd
; ++p
)
400 pComponents
->aScheme
.pBegin
= pBegin
;
401 pComponents
->aScheme
.pEnd
= ++p
;
405 else if (!isAlpha(*p
) && !isDigit(*p
) && *p
!= '+' && *p
!= '-'
409 if (pEnd
- pPos
>= 2 && pPos
[0] == '/' && pPos
[1] == '/')
411 pComponents
->aAuthority
.pBegin
= pPos
;
413 while (pPos
!= pEnd
&& *pPos
!= '/' && *pPos
!= '?' && *pPos
!= '#')
415 pComponents
->aAuthority
.pEnd
= pPos
;
418 pComponents
->aPath
.pBegin
= pPos
;
419 while (pPos
!= pEnd
&& *pPos
!= '?' && * pPos
!= '#')
421 pComponents
->aPath
.pEnd
= pPos
;
423 if (pPos
!= pEnd
&& *pPos
== '?')
425 pComponents
->aQuery
.pBegin
= pPos
++;
426 while (pPos
!= pEnd
&& * pPos
!= '#')
428 pComponents
->aQuery
.pEnd
= pPos
;
433 OSL_ASSERT(*pPos
== '#');
434 pComponents
->aFragment
.pBegin
= pPos
;
435 pComponents
->aFragment
.pEnd
= pEnd
;
439 rtl::OUString
joinPaths(Component
const & rBasePath
, Component
const & rRelPath
)
441 OSL_ASSERT(rBasePath
.isPresent() && *rBasePath
.pBegin
== '/');
442 OSL_ASSERT(rRelPath
.isPresent());
444 // The invariant of aBuffer is that it always starts and ends with a slash
445 // (until probably right at the end of the algorithm, when the last segment
446 // of rRelPath is added, which does not necessarily end in a slash):
447 rtl::OUStringBuffer
aBuffer(rBasePath
.getLength() + rRelPath
.getLength());
448 // XXX numeric overflow
450 // Segments "." and ".." within rBasePath are not conisdered special (but
451 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
452 // bit unclear about this point:
453 sal_Int32 nFixed
= 1;
454 sal_Unicode
const * p
= rBasePath
.pBegin
+ 1;
455 for (sal_Unicode
const * q
= p
; q
!= rBasePath
.pEnd
; ++q
)
459 (q
- p
== 1 && p
[0] == '.') ||
460 (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
463 nFixed
= q
+ 1 - rBasePath
.pBegin
;
467 aBuffer
.append(rBasePath
.pBegin
, p
- rBasePath
.pBegin
);
470 if (p
!= rRelPath
.pEnd
)
473 sal_Unicode
const * q
= p
;
474 sal_Unicode
const * r
;
477 if (q
== rRelPath
.pEnd
)
489 if (q
- p
== 2 && p
[0] == '.' && p
[1] == '.')
491 // Erroneous excess segments ".." within rRelPath are left
492 // intact, as the examples in RFC 2396, section C.2, suggest:
493 sal_Int32 i
= aBuffer
.getLength() - 1;
496 aBuffer
.append(p
, r
- p
);
501 while (aBuffer
.charAt(i
- 1) != '/')
503 aBuffer
.setLength(i
);
506 else if (q
- p
!= 1 || *p
!= '.')
507 aBuffer
.append(p
, r
- p
);
508 if (q
== rRelPath
.pEnd
)
513 return aBuffer
.makeStringAndClear();
518 sal_Bool
const * SAL_CALL
rtl_getUriCharClass(rtl_UriCharClass eCharClass
)
521 static sal_Bool
const aCharClass
[][nCharClassSize
]
522 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
531 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
537 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
540 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
542 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
546 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
549 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
552 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
555 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
558 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
561 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
564 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
567 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
570 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
571 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
573 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
576 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
582 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
585 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
588 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
590 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
591 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
592 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
596 && (sal::static_int_cast
< std::size_t >(eCharClass
)
597 < sizeof aCharClass
/ sizeof aCharClass
[0])),
599 return aCharClass
[eCharClass
];
602 void SAL_CALL
rtl_uriEncode(rtl_uString
* pText
, sal_Bool
const * pCharClass
,
603 rtl_UriEncodeMechanism eMechanism
,
604 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
607 OSL_ENSURE(!pCharClass
[0x25], "bad pCharClass");
608 // make sure the percent sign is encoded...
610 sal_Unicode
const * p
= pText
->buffer
;
611 sal_Unicode
const * pEnd
= p
+ pText
->length
;
612 sal_Int32 nCapacity
= 0;
613 rtl_uString_new(pResult
);
617 sal_uInt32 nUtf32
= readUcs4(
619 (eMechanism
== rtl_UriEncodeKeepEscapes
620 || eMechanism
== rtl_UriEncodeCheckEscapes
621 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
),
626 if (isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
627 writeUnicode(pResult
, &nCapacity
,
628 static_cast< sal_Unicode
>(nUtf32
));
629 else if (!writeEscapeChar(
630 pResult
, &nCapacity
, nUtf32
, eCharset
,
631 (eMechanism
== rtl_UriEncodeStrict
632 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
634 rtl_uString_new(pResult
);
640 if (eMechanism
== rtl_UriEncodeCheckEscapes
641 && isValid(pCharClass
, nUtf32
)) // implies nUtf32 <= 0x7F
642 writeUnicode(pResult
, &nCapacity
,
643 static_cast< sal_Unicode
>(nUtf32
));
644 else if (!writeEscapeChar(
645 pResult
, &nCapacity
, nUtf32
, eCharset
,
646 (eMechanism
== rtl_UriEncodeStrict
647 || eMechanism
== rtl_UriEncodeStrictKeepEscapes
)))
649 rtl_uString_new(pResult
);
655 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
661 void SAL_CALL
rtl_uriDecode(rtl_uString
* pText
,
662 rtl_UriDecodeMechanism eMechanism
,
663 rtl_TextEncoding eCharset
, rtl_uString
** pResult
)
668 case rtl_UriDecodeNone
:
669 rtl_uString_assign(pResult
, pText
);
672 case rtl_UriDecodeToIuri
:
673 eCharset
= RTL_TEXTENCODING_UTF8
;
674 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
676 sal_Unicode
const * p
= pText
->buffer
;
677 sal_Unicode
const * pEnd
= p
+ pText
->length
;
678 sal_Int32 nCapacity
= 0;
679 rtl_uString_new(pResult
);
683 sal_uInt32 nUtf32
= readUcs4(&p
, pEnd
, true, eCharset
, &eType
);
687 if (nUtf32
<= 0x7F && eMechanism
== rtl_UriDecodeToIuri
)
689 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
693 writeUcs4(pResult
, &nCapacity
, nUtf32
);
697 if (eMechanism
== rtl_UriDecodeStrict
) {
698 rtl_uString_new(pResult
);
701 writeEscapeOctet(pResult
, &nCapacity
, nUtf32
);
710 sal_Bool SAL_CALL
rtl_uriConvertRelToAbs(rtl_uString
* pBaseUriRef
,
711 rtl_uString
* pRelUriRef
,
712 rtl_uString
** pResult
,
713 rtl_uString
** pException
)
716 // If pRelUriRef starts with a scheme component it is an absolute URI
717 // reference, and we are done (i.e., this algorithm does not support
718 // backwards-compatible relative URIs starting with a scheme component, see
719 // RFC 2396, section 5.2, step 3):
720 Components aRelComponents
;
721 parseUriRef(pRelUriRef
, &aRelComponents
);
722 if (aRelComponents
.aScheme
.isPresent())
724 rtl_uString_assign(pResult
, pRelUriRef
);
728 // Parse pBaseUriRef; if the scheme component is not present or not valid,
729 // or the path component is not empty and starts with anything but a slash,
730 // an exception is raised:
731 Components aBaseComponents
;
732 parseUriRef(pBaseUriRef
, &aBaseComponents
);
733 if (!aBaseComponents
.aScheme
.isPresent())
735 rtl::OUString
aMessage(pBaseUriRef
);
736 aMessage
+= rtl::OUString(
737 RTL_CONSTASCII_USTRINGPARAM(
738 " does not start with a scheme component"));
739 rtl_uString_assign(pException
,
740 const_cast< rtl::OUString
& >(aMessage
).pData
);
743 if (aBaseComponents
.aPath
.pBegin
!= aBaseComponents
.aPath
.pEnd
744 && *aBaseComponents
.aPath
.pBegin
!= '/')
746 rtl::OUString
aMessage(pBaseUriRef
);
747 aMessage
+= rtl::OUString(
748 RTL_CONSTASCII_USTRINGPARAM(
749 "path component does not start with slash"));
750 rtl_uString_assign(pException
, aMessage
.pData
);
754 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
755 // into an absolute one (if the relative URI is a reference to the "current
756 // document," the "current document" is here taken to be the base URI):
757 rtl::OUStringBuffer aBuffer
;
758 aBuffer
.append(aBaseComponents
.aScheme
.pBegin
,
759 aBaseComponents
.aScheme
.getLength());
760 if (aRelComponents
.aAuthority
.isPresent())
762 aBuffer
.append(aRelComponents
.aAuthority
.pBegin
,
763 aRelComponents
.aAuthority
.getLength());
764 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
765 aRelComponents
.aPath
.getLength());
766 if (aRelComponents
.aQuery
.isPresent())
767 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
768 aRelComponents
.aQuery
.getLength());
772 if (aBaseComponents
.aAuthority
.isPresent())
773 aBuffer
.append(aBaseComponents
.aAuthority
.pBegin
,
774 aBaseComponents
.aAuthority
.getLength());
775 if (aRelComponents
.aPath
.pBegin
== aRelComponents
.aPath
.pEnd
776 && !aRelComponents
.aQuery
.isPresent())
778 aBuffer
.append(aBaseComponents
.aPath
.pBegin
,
779 aBaseComponents
.aPath
.getLength());
780 if (aBaseComponents
.aQuery
.isPresent())
781 aBuffer
.append(aBaseComponents
.aQuery
.pBegin
,
782 aBaseComponents
.aQuery
.getLength());
786 if (*aRelComponents
.aPath
.pBegin
== '/')
787 aBuffer
.append(aRelComponents
.aPath
.pBegin
,
788 aRelComponents
.aPath
.getLength());
790 aBuffer
.append(joinPaths(aBaseComponents
.aPath
,
791 aRelComponents
.aPath
));
792 if (aRelComponents
.aQuery
.isPresent())
793 aBuffer
.append(aRelComponents
.aQuery
.pBegin
,
794 aRelComponents
.aQuery
.getLength());
797 if (aRelComponents
.aFragment
.isPresent())
798 aBuffer
.append(aRelComponents
.aFragment
.pBegin
,
799 aRelComponents
.aFragment
.getLength());
800 rtl_uString_assign(pResult
, aBuffer
.makeStringAndClear().pData
);