Branch libreoffice-5-0-4
[LibreOffice.git] / include / rtl / uri.h
blob248a0d42b55fe61513a90870d890c3deec469014
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_RTL_URI_H
21 #define INCLUDED_RTL_URI_H
23 #include <sal/config.h>
25 #include <rtl/textenc.h>
26 #include <rtl/ustring.h>
27 #include <sal/saldllapi.h>
28 #include <sal/types.h>
30 #if defined __cplusplus
31 extern "C" {
32 #endif /* __cplusplus */
34 /** Various predefined URI 'char classes.'
36 A 'char class' defines which (ASCII) characters can be written 'as they
37 are' in a part of a Uri, and which characters have to be written using
38 escape sequences ('%' followed by two hex digits). Characters outside
39 the ASCII range are always written using escape sequences.
41 If there are other frequently used char classes, they can be added to
42 this enumeration; the function rtl_getUriCharClass() has to be adapted
43 then, too.
45 typedef enum
47 /** The empty char class.
49 All characters are written using escape sequences.
51 rtl_UriCharClassNone,
53 /** The RFC 2732 @<uric> char class.
55 @verbatim
56 The 'valid' characters are !$&'()*+,-./:;=?@[]_~ plus digits and
57 letters.
58 @endverbatim
60 rtl_UriCharClassUric,
62 /** The RFC 2396 @<uric_no_slash> char class.
64 @verbatim
65 The 'valid' characters are !$&'()*+,-.:;=?@_~ plus digits and letters.
66 @endverbatim
68 rtl_UriCharClassUricNoSlash,
70 /** The RFC 2396 @<rel_segment> char class.
72 @verbatim
73 The 'valid' characters are !$&'()*+,-.;=@_~ plus digits and letters.
74 @endverbatim
76 rtl_UriCharClassRelSegment,
78 /** The RFC 2396 @<reg_name> char class.
80 @verbatim
81 The 'valid' characters are !$&'()*+,-.:;=@_~ plus digits and letters.
82 @endverbatim
84 rtl_UriCharClassRegName,
86 /** The RFC 2396 @<userinfo> char class.
88 @verbatim
89 The 'valid' characters are !$&'()*+,-.:;=_~ plus digits and letters.
90 @endverbatim
92 rtl_UriCharClassUserinfo,
94 /** The RFC 2396 @<pchar> char class.
96 @verbatim
97 The 'valid' characters are !$&'()*+,-.:=@_~ plus digits and letters.
98 @endverbatim
100 rtl_UriCharClassPchar,
102 /** The char class for the values of uno URL parameters.
104 @verbatim
105 The 'valid' characters are !$&'()*+-./:?@_~ plus digits and letters.
106 @endverbatim
108 rtl_UriCharClassUnoParamValue,
110 rtl_UriCharClass_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
112 rtl_UriCharClass;
114 /** The mechanism describing how escape sequences in the input of
115 rtl_uriEncode() are handled.
117 typedef enum
119 /** The special meaning of '%' is ignored (i.e., there are by definition
120 no escape sequences in the input).
122 This mechanism is useful to encode user input as part of a URI (e.g.,
123 the user-supplied password in an ftp URL---'%20abcde' is a valid
124 password, so do not assume that the '%20' is an escaped space).
126 rtl_UriEncodeIgnoreEscapes,
128 /** All escape sequences ('%' followed by two hex digits) are kept intact,
129 even if they represent characters that need not be escaped or if they
130 do not even map to characters in the given charset.
132 This mechanism is useful when passing on complete URIs more or less
133 unmodified (e.g., within an HTTP proxy): missing escape sequences are
134 added, but existing escape sequences are not touched (except that any
135 lower case hex digits are replaced by upper case hex digits).
137 rtl_UriEncodeKeepEscapes,
139 /** All escape sequences ('%' followed by two hex digits) are resolved in
140 a first step; only those that represent characters that need to be
141 escaped are kept intact.
143 This mechanism is useful to properly encode complete URIs entered by
144 the user: the URI is brought into a 'canonic form,' but care is taken
145 not to damage (valid) escape sequences the (careful) user already
146 entered as such.
148 rtl_UriEncodeCheckEscapes,
150 /** Like rtl_UriEncodeIgnoreEscapes, but indicating failure when converting
151 unmappable characters.
153 @since UDK 3.2.0
155 rtl_UriEncodeStrict,
157 /** Like rtl_UriEncodeKeepEscapes, but indicating failure when converting
158 unmappable characters.
160 @since UDK 3.2.7
162 rtl_UriEncodeStrictKeepEscapes,
164 rtl_UriEncode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
166 rtl_UriEncodeMechanism;
168 /** The mechanism describing how rtl_uriDecode() translates (part of) a URI
169 into a Unicode string.
171 typedef enum
173 /** The text is returned completely unmodified.
175 rtl_UriDecodeNone,
177 /** The text is returned in the form of an IURI (cf.
178 draft-masinter-url-i18n-05.txt).
180 All escape sequences representing ASCII characters (%00--%7F) are
181 kept, all other escape sequences are interpreted as UTF-8 characters
182 and translated to Unicode, if possible.
184 rtl_UriDecodeToIuri,
186 /** The text is decoded.
188 All escape sequences representing characters from the given charset
189 are decoded and translated to Unicode, if possible.
191 rtl_UriDecodeWithCharset,
193 /** Like rtl_UriDecodeWithCharset, but indicating failure when converting
194 unmappable characters.
196 @since UDK 3.2.0
198 rtl_UriDecodeStrict,
200 rtl_UriDecode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
202 rtl_UriDecodeMechanism;
204 /** Map a predefined rtl_UriCharClass to a form usable by rtl_uriEncode().
206 The function rtl_uriEncode() expects an array of 128 booleans, and this
207 function maps rtl_UriCharClass enumeration members to such arrays.
209 @param eCharClass
210 Any valid member of rtl_UriCharClass.
212 @return
213 An array of 128 booleans, to be used in calls to rtl_uriEncode().
215 SAL_DLLPUBLIC sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
216 SAL_THROW_EXTERN_C();
218 /** Encode a text as (part of) a URI.
220 @param pText
221 Any Unicode string. Must not be null.
223 @param pCharClass
224 A char class, represented as an array of 128 booleans (true means keep the
225 corresponding ASCII character unencoded, false means encode it). Must not
226 be null, and the boolean corresponding to the percent sign (0x25) must be
227 false. (See rtl_getUriCharClass() for a function mapping from
228 rtl_UriCharClass to such arrays.)
230 @param eMechanism
231 The mechanism describing how escape sequences in the input text are
232 handled.
234 @param eCharset
235 When Unicode characters from the input text have to be written using
236 escape sequences (because they are either outside the ASCII range or do
237 not belong to the given char class), they are first translated into this
238 charset before being encoded using escape sequences.
240 Also, if the encode mechanism is rtl_UriEncodeCheckEscapes, all escape
241 sequences already present in the input text are interpreted as characters
242 from this charset.
244 @param pResult
245 Returns an encoded representation of the input text. Must itself not be
246 null, and must point to either null or a valid string.
248 If the encode mechanism is rtl_UriEncodeStrict, and pText cannot be
249 converted to eCharset because it contains unmappable characters (which
250 implies that pText is not empty), then an empty string is returned.
252 SAL_DLLPUBLIC void SAL_CALL rtl_uriEncode(
253 rtl_uString * pText,
254 sal_Bool const * pCharClass,
255 rtl_UriEncodeMechanism eMechanism,
256 rtl_TextEncoding eCharset,
257 rtl_uString ** pResult)
258 SAL_THROW_EXTERN_C();
260 /** Decode (a part of) a URI.
262 @param pText
263 Any Unicode string. Must not be null. (If the input is indeed part of a
264 valid URI, this string will only contain a subset of the ASCII characters,
265 but this function also handles other Unicode characters properly.)
267 @param eMechanism
268 The mechanism describing how the input text is translated into a Unicode
269 string.
271 @param eCharset
272 When the decode mechanism is rtl_UriDecodeWithCharset, all escape
273 sequences in the input text are interpreted as characters from this
274 charset. Those characters are translated to Unicode characters in the
275 resulting output, if possible.
277 When the decode mechanism is rtl_UriDecodeNone or rtl_UriDecodeToIuri,
278 this parameter is ignored (and is best specified as
279 RTL_TEXTENCODING_UTF8).
281 @param pResult
282 Returns a decoded representation of the input text. Must itself not be
283 null, and must point to either null or a valid string.
285 If the decode mechanism is rtl_UriDecodeStrict, and pText cannot be
286 converted to eCharset because it contains (encodings of) unmappable
287 characters (which implies that pText is not empty), then an empty string is
288 returned.
290 SAL_DLLPUBLIC void SAL_CALL rtl_uriDecode(
291 rtl_uString * pText,
292 rtl_UriDecodeMechanism eMechanism,
293 rtl_TextEncoding eCharset,
294 rtl_uString ** pResult)
295 SAL_THROW_EXTERN_C();
297 /** Convert a relative URI reference into an absolute URI.
299 This function uses the strict parser algorithm described in RFC 3986,
300 section 5.2.
302 This function signals exceptions by returning false and letting pException
303 point to a message explaining the exception.
305 @param pBaseUriRef
306 An absolute URI that serves as the base URI. If it has to be inspected
307 (i.e., pRelUriRef is not an absolute URI already), and it is not an absolute
308 URI (i.e., does not begin with a @<scheme ":"> part), an exception will be
309 signaled.
311 @param pRelUriRef
312 An URI reference that may be either absolute or relative. If it is
313 absolute, it will be returned unmodified.
315 @param pResult
316 Returns an absolute URI. Must itself not be null, and must point to either
317 null or a valid string. If an exception is signalled, it is left unchanged.
319 @param pException
320 Returns an explanatory message in case an exception is signalled. Must
321 itself not be null, and must point to either null or a valid string. If no
322 exception is signalled, it is left unchanged.
324 @return
325 True if no exception is signalled, otherwise false.
327 SAL_DLLPUBLIC sal_Bool SAL_CALL rtl_uriConvertRelToAbs(
328 rtl_uString * pBaseUriRef,
329 rtl_uString * pRelUriRef,
330 rtl_uString ** pResult,
331 rtl_uString ** pException)
332 SAL_THROW_EXTERN_C();
334 #if defined __cplusplus
336 #endif /* __cplusplus */
338 #endif // INCLUDED_RTL_URI_H
340 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */