1 /* GIO - GLib Input, Output and Streaming Library
3 * Copyright (C) 2006-2007 Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 * Author: Alexander Larsson <alexl@redhat.com>
23 #include "gurifuncs.h"
25 #include <glib/gstrfuncs.h>
26 #include <glib/gmessages.h>
27 #include <glib/gstring.h>
28 #include <glib/gmem.h>
36 * @title: URI Functions
37 * @short_description: manipulating URIs
39 * Functions for manipulating Universal Resource Identifiers (URIs) as
41 * [RFC 3986](http://www.ietf.org/rfc/rfc3986.txt).
42 * It is highly recommended that you have read and
43 * understand RFC 3986 for understanding this API.
47 unescape_character (const char *scanner
)
52 first_digit
= g_ascii_xdigit_value (*scanner
++);
56 second_digit
= g_ascii_xdigit_value (*scanner
++);
60 return (first_digit
<< 4) | second_digit
;
64 * g_uri_unescape_segment:
65 * @escaped_string: (nullable): A string, may be %NULL
66 * @escaped_string_end: (nullable): Pointer to end of @escaped_string, may be %NULL
67 * @illegal_characters: (nullable): An optional string of illegal characters not to be allowed, may be %NULL
69 * Unescapes a segment of an escaped string.
71 * If any of the characters in @illegal_characters or the character zero appears
72 * as an escaped character in @escaped_string then that is an error and %NULL
73 * will be returned. This is useful it you want to avoid for instance having a
74 * slash being expanded in an escaped path element, which might confuse pathname
77 * Returns: an unescaped version of @escaped_string or %NULL on error.
78 * The returned string should be freed when no longer needed. As a
79 * special case if %NULL is given for @escaped_string, this function
85 g_uri_unescape_segment (const char *escaped_string
,
86 const char *escaped_string_end
,
87 const char *illegal_characters
)
93 if (escaped_string
== NULL
)
96 if (escaped_string_end
== NULL
)
97 escaped_string_end
= escaped_string
+ strlen (escaped_string
);
99 result
= g_malloc (escaped_string_end
- escaped_string
+ 1);
102 for (in
= escaped_string
; in
< escaped_string_end
; in
++)
110 if (escaped_string_end
- in
< 2)
112 /* Invalid escaped char (to short) */
117 character
= unescape_character (in
);
119 /* Check for an illegal character. We consider '\0' illegal here. */
120 if (character
<= 0 ||
121 (illegal_characters
!= NULL
&&
122 strchr (illegal_characters
, (char)character
) != NULL
))
128 in
++; /* The other char will be eaten in the loop header */
130 *out
++ = (char)character
;
139 * g_uri_unescape_string:
140 * @escaped_string: an escaped string to be unescaped.
141 * @illegal_characters: (nullable): a string of illegal characters not to be
144 * Unescapes a whole escaped string.
146 * If any of the characters in @illegal_characters or the character zero appears
147 * as an escaped character in @escaped_string then that is an error and %NULL
148 * will be returned. This is useful it you want to avoid for instance having a
149 * slash being expanded in an escaped path element, which might confuse pathname
152 * Returns: an unescaped version of @escaped_string. The returned string
153 * should be freed when no longer needed.
158 g_uri_unescape_string (const char *escaped_string
,
159 const char *illegal_characters
)
161 return g_uri_unescape_segment (escaped_string
, NULL
, illegal_characters
);
165 * g_uri_parse_scheme:
168 * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
170 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
172 * Common schemes include "file", "http", "svn+ssh", etc.
174 * Returns: The "Scheme" component of the URI, or %NULL on error.
175 * The returned string should be freed when no longer needed.
180 g_uri_parse_scheme (const char *uri
)
185 g_return_val_if_fail (uri
!= NULL
, NULL
);
187 /* From RFC 3986 Decodes:
188 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
194 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
197 if (!g_ascii_isalpha (*p
))
207 if (!(g_ascii_isalnum(c
) ||
214 return g_strndup (uri
, p
- uri
- 1);
218 * g_uri_escape_string:
219 * @unescaped: the unescaped input string.
220 * @reserved_chars_allowed: (nullable): a string of reserved characters that
221 * are allowed to be used, or %NULL.
222 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
224 * Escapes a string for use in a URI.
226 * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
227 * characters plus dash, dot, underscore and tilde) are escaped.
228 * But if you specify characters in @reserved_chars_allowed they are not
229 * escaped. This is useful for the "reserved" characters in the URI
230 * specification, since those are allowed unescaped in some portions of
233 * Returns: an escaped version of @unescaped. The returned string should be
234 * freed when no longer needed.
239 g_uri_escape_string (const char *unescaped
,
240 const char *reserved_chars_allowed
,
245 g_return_val_if_fail (unescaped
!= NULL
, NULL
);
247 s
= g_string_sized_new (strlen (unescaped
) + 10);
249 g_string_append_uri_escaped (s
, unescaped
, reserved_chars_allowed
, allow_utf8
);
251 return g_string_free (s
, FALSE
);