tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / include / rtl / uri.h
blob28975060c03e9c806eb8a2b5a33db71ba8d45242
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 * This file is part of LibreOffice published API.
24 #ifndef INCLUDED_RTL_URI_H
25 #define INCLUDED_RTL_URI_H
27 #include "sal/config.h"
29 #include "rtl/textenc.h"
30 #include "rtl/ustring.h"
31 #include "sal/saldllapi.h"
32 #include "sal/types.h"
34 #if defined __cplusplus
35 extern "C" {
36 #endif /* __cplusplus */
38 /** Various predefined URI 'char classes.'
40 A 'char class' defines which (ASCII) characters can be written 'as they
41 are' in a part of a Uri, and which characters have to be written using
42 escape sequences ('%' followed by two hex digits). Characters outside
43 the ASCII range are always written using escape sequences.
45 If there are other frequently used char classes, they can be added to
46 this enumeration; the function rtl_getUriCharClass() has to be adapted
47 then, too.
49 typedef enum
51 /** The empty char class.
53 All characters are written using escape sequences.
55 rtl_UriCharClassNone,
57 /** The RFC 2732 @<uric> char class.
59 @verbatim
60 The 'valid' characters are !$&'()*+,-./:;=?@[]_~ plus digits and
61 letters.
63 This differs from RFC 3986 @<fragment> in additionally allowing []
64 @endverbatim
66 rtl_UriCharClassUric,
68 /** The RFC 2396 @<uric_no_slash> char class.
70 @verbatim
71 The 'valid' characters are !$&'()*+,-.:;=?@_~ plus digits and letters.
73 This differs from RFC 3986 @<fragment> in additionally encoding /
74 This differs from RFC 3986 @<pchar> in additionally allowing ?
75 @endverbatim
77 rtl_UriCharClassUricNoSlash,
79 /** The RFC 2396 @<rel_segment> char class.
81 @verbatim
82 The 'valid' characters are !$&'()*+,-.;=@_~ plus digits and letters.
84 This is the same as RFC 3986 @<segment-nz-nc>
85 @endverbatim
87 rtl_UriCharClassRelSegment,
89 /** The RFC 2396 @<reg_name> char class.
91 @verbatim
92 The 'valid' characters are !$&'()*+,-.:;=@_~ plus digits and letters.
94 This differs from RFC 3986 @<reg_name> in additionally allowing @
95 @endverbatim
97 rtl_UriCharClassRegName,
99 /** The RFC 2396 @<userinfo> char class.
101 @verbatim
102 The 'valid' characters are !$&'()*+,-.:;=_~ plus digits and letters.
104 This is the same as RFC 3986 @<userinfo>
105 @endverbatim
107 rtl_UriCharClassUserinfo,
109 /** The RFC 2396 @<pchar> char class.
111 @verbatim
112 The 'valid' characters are !$&'()*+,-.:=@_~ plus digits and letters.
114 This differs from RFC 3986 @<pchar> in additionally encoding ;
115 @endverbatim
117 rtl_UriCharClassPchar,
119 /** The char class for the values of uno URL parameters.
121 @verbatim
122 The 'valid' characters are !$&'()*+-./:?@_~ plus digits and letters.
123 @endverbatim
125 rtl_UriCharClassUnoParamValue,
127 rtl_UriCharClass_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
129 rtl_UriCharClass;
131 /** The mechanism describing how escape sequences in the input of
132 rtl_uriEncode() are handled.
134 typedef enum
136 /** The special meaning of '%' is ignored (i.e., there are by definition
137 no escape sequences in the input).
139 This mechanism is useful to encode user input as part of a URI (e.g.,
140 the user-supplied password in an ftp URL---'%20abcde' is a valid
141 password, so do not assume that the '%20' is an escaped space).
143 rtl_UriEncodeIgnoreEscapes,
145 /** All escape sequences ('%' followed by two hex digits) are kept intact,
146 even if they represent characters that need not be escaped or if they
147 do not even map to characters in the given charset.
149 This mechanism is useful when passing on complete URIs more or less
150 unmodified (e.g., within an HTTP proxy): missing escape sequences are
151 added, but existing escape sequences are not touched (except that any
152 lower case hex digits are replaced by upper case hex digits).
154 rtl_UriEncodeKeepEscapes,
156 /** All escape sequences ('%' followed by two hex digits) are resolved in
157 a first step; only those that represent characters that need to be
158 escaped are kept intact.
160 This mechanism is useful to properly encode complete URIs entered by
161 the user: the URI is brought into a 'canonic form,' but care is taken
162 not to damage (valid) escape sequences the (careful) user already
163 entered as such.
165 rtl_UriEncodeCheckEscapes,
167 /** Like rtl_UriEncodeIgnoreEscapes, but indicating failure when converting
168 unmappable characters.
170 @since UDK 3.2.0
172 rtl_UriEncodeStrict,
174 /** Like rtl_UriEncodeKeepEscapes, but indicating failure when converting
175 unmappable characters.
177 Also, any escape sequences that are present are always considered to be (potentially broken)
178 UTF-8. This mechanism is meant to be used on the result of a rtl_UriDecodeToIuri decoding,
179 which will thus only contain escape sequences representing either ASCII characters or broken
180 UTF-8 sequences, and which will all be kept as-is.
182 @since UDK 3.2.7
184 rtl_UriEncodeStrictKeepEscapes,
186 rtl_UriEncode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
188 rtl_UriEncodeMechanism;
190 /** The mechanism describing how rtl_uriDecode() translates (part of) a URI
191 into a Unicode string.
193 typedef enum
195 /** The text is returned completely unmodified.
197 rtl_UriDecodeNone,
199 /** The text is returned in the form of an IURI (cf.
200 draft-masinter-url-i18n-05.txt).
202 All escape sequences representing ASCII characters (%00--%7F) are
203 kept, all other escape sequences are interpreted as UTF-8 characters
204 and translated to Unicode, if possible.
206 rtl_UriDecodeToIuri,
208 /** The text is decoded.
210 All escape sequences representing characters from the given charset
211 are decoded and translated to Unicode, if possible.
213 rtl_UriDecodeWithCharset,
215 /** Like rtl_UriDecodeWithCharset, but indicating failure when converting
216 unmappable characters.
218 @since UDK 3.2.0
220 rtl_UriDecodeStrict,
222 rtl_UriDecode_FORCE_EQUAL_SIZE = SAL_MAX_ENUM
224 rtl_UriDecodeMechanism;
226 /** Map a predefined rtl_UriCharClass to a form usable by rtl_uriEncode().
228 The function rtl_uriEncode() expects an array of 128 booleans, and this
229 function maps rtl_UriCharClass enumeration members to such arrays.
231 @param eCharClass
232 Any valid member of rtl_UriCharClass.
234 @return
235 An array of 128 booleans, to be used in calls to rtl_uriEncode().
237 SAL_DLLPUBLIC sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
238 SAL_THROW_EXTERN_C();
240 /** Encode a text as (part of) a URI.
242 @param pText
243 Any Unicode string. Must not be null.
245 @param pCharClass
246 A char class, represented as an array of 128 booleans (true means keep the
247 corresponding ASCII character unencoded, false means encode it). Must not
248 be null, and the boolean corresponding to the percent sign (0x25) must be
249 false. (See rtl_getUriCharClass() for a function mapping from
250 rtl_UriCharClass to such arrays.)
252 @param eMechanism
253 The mechanism describing how escape sequences in the input text are
254 handled.
256 @param eCharset
257 When Unicode characters from the input text have to be written using
258 escape sequences (because they are either outside the ASCII range or do
259 not belong to the given char class), they are first translated into this
260 charset before being encoded using escape sequences.
262 Also, if the encode mechanism is rtl_UriEncodeCheckEscapes, all escape
263 sequences already present in the input text are interpreted as characters
264 from this charset.
266 @param pResult
267 Returns an encoded representation of the input text. Must itself not be
268 null, and must point to either null or a valid string.
270 If the encode mechanism is rtl_UriEncodeStrict, and pText cannot be
271 converted to eCharset because it contains unmappable characters (which
272 implies that pText is not empty), then an empty string is returned.
274 SAL_DLLPUBLIC void SAL_CALL rtl_uriEncode(
275 rtl_uString * pText,
276 sal_Bool const * pCharClass,
277 rtl_UriEncodeMechanism eMechanism,
278 rtl_TextEncoding eCharset,
279 rtl_uString ** pResult)
280 SAL_THROW_EXTERN_C();
282 /** Decode (a part of) a URI.
284 @param pText
285 Any Unicode string. Must not be null. (If the input is indeed part of a
286 valid URI, this string will only contain a subset of the ASCII characters,
287 but this function also handles other Unicode characters properly.)
289 @param eMechanism
290 The mechanism describing how the input text is translated into a Unicode
291 string.
293 @param eCharset
294 When the decode mechanism is rtl_UriDecodeWithCharset, all escape
295 sequences in the input text are interpreted as characters from this
296 charset. Those characters are translated to Unicode characters in the
297 resulting output, if possible.
299 When the decode mechanism is rtl_UriDecodeNone or rtl_UriDecodeToIuri,
300 this parameter is ignored (and is best specified as
301 RTL_TEXTENCODING_UTF8).
303 @param pResult
304 Returns a decoded representation of the input text. Must itself not be
305 null, and must point to either null or a valid string.
307 If the decode mechanism is rtl_UriDecodeStrict, and pText cannot be
308 converted to eCharset because it contains (encodings of) unmappable
309 characters (which implies that pText is not empty), then an empty string is
310 returned.
312 SAL_DLLPUBLIC void SAL_CALL rtl_uriDecode(
313 rtl_uString * pText,
314 rtl_UriDecodeMechanism eMechanism,
315 rtl_TextEncoding eCharset,
316 rtl_uString ** pResult)
317 SAL_THROW_EXTERN_C();
319 /** Convert a relative URI reference into an absolute URI.
321 This function uses the strict parser algorithm described in RFC 3986,
322 section 5.2.
324 This function signals exceptions by returning false and letting pException
325 point to a message explaining the exception.
327 @param pBaseUriRef
328 An absolute URI that serves as the base URI. If it has to be inspected
329 (i.e., pRelUriRef is not an absolute URI already), and it is not an absolute
330 URI (i.e., does not begin with a @<scheme ":"> part), an exception will be
331 signaled.
333 @param pRelUriRef
334 A URI reference that may be either absolute or relative. If it is
335 absolute, it will be returned unmodified.
337 @param pResult
338 Returns an absolute URI. Must itself not be null, and must point to either
339 null or a valid string. If an exception is signalled, it is left unchanged.
341 @param pException
342 Returns an explanatory message in case an exception is signalled. Must
343 itself not be null, and must point to either null or a valid string. If no
344 exception is signalled, it is left unchanged.
346 @return
347 True if no exception is signalled, otherwise false.
349 SAL_DLLPUBLIC sal_Bool SAL_CALL rtl_uriConvertRelToAbs(
350 rtl_uString * pBaseUriRef,
351 rtl_uString * pRelUriRef,
352 rtl_uString ** pResult,
353 rtl_uString ** pException)
354 SAL_THROW_EXTERN_C();
356 #if defined __cplusplus
358 #endif /* __cplusplus */
360 #endif // INCLUDED_RTL_URI_H
362 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */