Deprecated GSlice config API
[glib.git] / glib / gurifuncs.c
blobd4382053eb854495dbf848013f430c06cf6d2027
1 /* GIO - GLib Input, Output and Streaming Library
2 *
3 * Copyright (C) 2006-2007 Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18 * Boston, MA 02111-1307, USA.
20 * Author: Alexander Larsson <alexl@redhat.com>
23 #include "config.h"
25 #include "gurifuncs.h"
27 #include <glib/gstrfuncs.h>
28 #include <glib/gmessages.h>
29 #include <glib/gstring.h>
30 #include <glib/gmem.h>
32 #include <string.h>
34 #include "config.h"
36 /**
37 * SECTION:gurifuncs
38 * @title: URI Functions
39 * @short_description: manipulating URIs
41 * Functions for manipulating Universal Resource Identifiers (URIs) as
42 * defined by <ulink url="http://www.ietf.org/rfc/rfc3986.txt">
43 * RFC 3986</ulink>. It is highly recommended that you have read and
44 * understand RFC 3986 for understanding this API.
47 static int
48 unescape_character (const char *scanner)
50 int first_digit;
51 int second_digit;
53 first_digit = g_ascii_xdigit_value (*scanner++);
54 if (first_digit < 0)
55 return -1;
57 second_digit = g_ascii_xdigit_value (*scanner++);
58 if (second_digit < 0)
59 return -1;
61 return (first_digit << 4) | second_digit;
64 /**
65 * g_uri_unescape_segment:
66 * @escaped_string: (allow-none): A string, may be %NULL
67 * @escaped_string_end: (allow-none): Pointer to end of @escaped_string, may be %NULL
68 * @illegal_characters: (allow-none): An optional string of illegal characters not to be allowed, may be %NULL
70 * Unescapes a segment of an escaped string.
72 * If any of the characters in @illegal_characters or the character zero appears
73 * as an escaped character in @escaped_string then that is an error and %NULL
74 * will be returned. This is useful it you want to avoid for instance having a
75 * slash being expanded in an escaped path element, which might confuse pathname
76 * handling.
78 * Returns: an unescaped version of @escaped_string or %NULL on error.
79 * The returned string should be freed when no longer needed. As a
80 * special case if %NULL is given for @escaped_string, this function
81 * will return %NULL.
83 * Since: 2.16
84 **/
85 char *
86 g_uri_unescape_segment (const char *escaped_string,
87 const char *escaped_string_end,
88 const char *illegal_characters)
90 const char *in;
91 char *out, *result;
92 gint character;
94 if (escaped_string == NULL)
95 return NULL;
97 if (escaped_string_end == NULL)
98 escaped_string_end = escaped_string + strlen (escaped_string);
100 result = g_malloc (escaped_string_end - escaped_string + 1);
102 out = result;
103 for (in = escaped_string; in < escaped_string_end; in++)
105 character = *in;
107 if (*in == '%')
109 in++;
111 if (escaped_string_end - in < 2)
113 /* Invalid escaped char (to short) */
114 g_free (result);
115 return NULL;
118 character = unescape_character (in);
120 /* Check for an illegal character. We consider '\0' illegal here. */
121 if (character <= 0 ||
122 (illegal_characters != NULL &&
123 strchr (illegal_characters, (char)character) != NULL))
125 g_free (result);
126 return NULL;
129 in++; /* The other char will be eaten in the loop header */
131 *out++ = (char)character;
134 *out = '\0';
136 return result;
140 * g_uri_unescape_string:
141 * @escaped_string: an escaped string to be unescaped.
142 * @illegal_characters: an optional string of illegal characters not to be allowed.
144 * Unescapes a whole escaped string.
146 * If any of the characters in @illegal_characters or the character zero appears
147 * as an escaped character in @escaped_string then that is an error and %NULL
148 * will be returned. This is useful it you want to avoid for instance having a
149 * slash being expanded in an escaped path element, which might confuse pathname
150 * handling.
152 * Returns: an unescaped version of @escaped_string. The returned string
153 * should be freed when no longer needed.
155 * Since: 2.16
157 char *
158 g_uri_unescape_string (const char *escaped_string,
159 const char *illegal_characters)
161 return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
165 * g_uri_parse_scheme:
166 * @uri: a valid URI.
168 * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
169 * <programlisting>
170 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
171 * </programlisting>
172 * Common schemes include "file", "http", "svn+ssh", etc.
174 * Returns: The "Scheme" component of the URI, or %NULL on error.
175 * The returned string should be freed when no longer needed.
177 * Since: 2.16
179 char *
180 g_uri_parse_scheme (const char *uri)
182 const char *p;
183 char c;
185 g_return_val_if_fail (uri != NULL, NULL);
187 /* From RFC 3986 Decodes:
188 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
191 p = uri;
193 /* Decode scheme:
194 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
197 if (!g_ascii_isalpha (*p))
198 return NULL;
200 while (1)
202 c = *p++;
204 if (c == ':')
205 break;
207 if (!(g_ascii_isalnum(c) ||
208 c == '+' ||
209 c == '-' ||
210 c == '.'))
211 return NULL;
214 return g_strndup (uri, p - uri - 1);
218 * g_uri_escape_string:
219 * @unescaped: the unescaped input string.
220 * @reserved_chars_allowed: a string of reserved characters that are
221 * allowed to be used, or %NULL.
222 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
224 * Escapes a string for use in a URI.
226 * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
227 * characters plus dash, dot, underscore and tilde) are escaped.
228 * But if you specify characters in @reserved_chars_allowed they are not
229 * escaped. This is useful for the "reserved" characters in the URI
230 * specification, since those are allowed unescaped in some portions of
231 * a URI.
233 * Returns: an escaped version of @unescaped. The returned string should be
234 * freed when no longer needed.
236 * Since: 2.16
238 char *
239 g_uri_escape_string (const char *unescaped,
240 const char *reserved_chars_allowed,
241 gboolean allow_utf8)
243 GString *s;
245 g_return_val_if_fail (unescaped != NULL, NULL);
247 s = g_string_sized_new (strlen (unescaped) + 10);
249 g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
251 return g_string_free (s, FALSE);