2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 FILE_LICENCE ( GPL2_OR_LATER
);
23 * Uniform Resource Identifiers
32 #include <gpxe/vsprintf.h>
36 * Dump URI for debugging
40 static void dump_uri ( struct uri
*uri
) {
44 DBG ( " scheme \"%s\"", uri
->scheme
);
46 DBG ( " opaque \"%s\"", uri
->opaque
);
48 DBG ( " user \"%s\"", uri
->user
);
50 DBG ( " password \"%s\"", uri
->password
);
52 DBG ( " host \"%s\"", uri
->host
);
54 DBG ( " port \"%s\"", uri
->port
);
56 DBG ( " path \"%s\"", uri
->path
);
58 DBG ( " query \"%s\"", uri
->query
);
60 DBG ( " fragment \"%s\"", uri
->fragment
);
66 * @v uri_string URI as a string
69 * Splits a URI into its component parts. The return URI structure is
70 * dynamically allocated and must eventually be freed by calling
73 struct uri
* parse_uri ( const char *uri_string
) {
78 char *authority
= NULL
;
82 /* Allocate space for URI struct and a copy of the string */
83 raw_len
= ( strlen ( uri_string
) + 1 /* NUL */ );
84 uri
= zalloc ( sizeof ( *uri
) + raw_len
);
87 raw
= ( ( ( char * ) uri
) + sizeof ( *uri
) );
89 /* Copy in the raw string */
90 memcpy ( raw
, uri_string
, raw_len
);
92 /* Start by chopping off the fragment, if it exists */
93 if ( ( tmp
= strchr ( raw
, '#' ) ) ) {
98 /* Identify absolute/relative URI. We ignore schemes that are
99 * apparently only a single character long, since otherwise we
100 * misinterpret a DOS-style path name ("C:\path\to\file") as a
101 * URI with scheme="C",opaque="\path\to\file".
103 if ( ( tmp
= strchr ( raw
, ':' ) ) && ( tmp
> ( raw
+ 1 ) ) ) {
104 /* Absolute URI: identify hierarchical/opaque */
108 /* Absolute URI with hierarchical part */
111 /* Absolute URI with opaque part */
119 /* If we don't have a path (i.e. we have an absolute URI with
120 * an opaque portion, we're already finished processing
125 /* Chop off the query, if it exists */
126 if ( ( tmp
= strchr ( path
, '?' ) ) ) {
131 /* Identify net/absolute/relative path */
132 if ( strncmp ( path
, "//", 2 ) != 0 ) {
133 /* Absolute/relative path */
136 /* Net path. If this is terminated by the first '/'
137 * of an absolute path, then we have no space for a
138 * terminator after the authority field, so shuffle
139 * the authority down by one byte, overwriting one of
142 authority
= ( path
+ 2 );
143 if ( ( tmp
= strchr ( authority
, '/' ) ) ) {
146 memmove ( ( authority
- 1 ), authority
,
147 ( tmp
- authority
) );
152 /* Split authority into user[:password] and host[:port] portions */
153 if ( ( tmp
= strchr ( authority
, '@' ) ) ) {
154 /* Has user[:password] */
157 uri
->user
= authority
;
158 if ( ( tmp
= strchr ( authority
, ':' ) ) ) {
164 /* No user:password */
165 uri
->host
= authority
;
168 /* Split host into host[:port] */
169 if ( ( tmp
= strrchr ( uri
->host
, ':' ) ) ) {
170 /* Make sure an IPv6 address isn't broken up. */
171 if ( ( tmp
> strchr ( uri
->host
, ']' ) ) ) {
177 /* Handle IPv6 case. */
178 if ( ( uri
->host
<= strchr ( uri
->host
, '[' ) ) &&
179 ( tmp
= strchr ( uri
->host
, ']' ) ) ) {
185 /* Decode fields that should be decoded */
186 for ( i
= URI_FIRST_FIELD
; i
<= URI_LAST_FIELD
; i
++ ) {
187 const char *field
= uri_get_field ( uri
, i
);
188 if ( field
&& ( URI_ENCODED
& ( 1 << i
) ) )
189 uri_decode ( field
, ( char * ) field
,
190 strlen ( field
) + 1 /* NUL */ );
194 DBG ( "URI \"%s\" split into", uri_string
);
204 * @v uri URI, or NULL
205 * @v default_port Default port to use if none specified in URI
208 unsigned int uri_port ( struct uri
*uri
, unsigned int default_port
) {
209 if ( ( ! uri
) || ( ! uri
->port
) )
211 return ( strtoul ( uri
->port
, NULL
, 0 ) );
217 * @v buf Buffer to fill with URI string
218 * @v size Size of buffer
219 * @v uri URI to write into buffer, or NULL
220 * @v fields Bitmask of fields to include in URI string, or URI_ALL
221 * @ret len Length of URI string
223 int unparse_uri ( char *buf
, size_t size
, struct uri
*uri
,
224 unsigned int fields
) {
225 /* List of characters that typically go before certain fields */
226 static char separators
[] = { /* scheme */ 0, /* opaque */ ':',
227 /* user */ 0, /* password */ ':',
228 /* host */ '@', /* port */ ':',
229 /* path */ 0, /* query */ '?',
230 /* fragment */ '#' };
234 DBG ( "URI unparsing" );
238 /* Ensure buffer is NUL-terminated */
242 /* Special-case NULL URI */
246 /* Iterate through requested fields */
247 for ( i
= URI_FIRST_FIELD
; i
<= URI_LAST_FIELD
; i
++ ) {
248 const char *field
= uri_get_field ( uri
, i
);
249 char sep
= separators
[i
];
251 /* Ensure `fields' only contains bits for fields that exist */
253 fields
&= ~( 1 << i
);
255 /* Store this field if we were asked to */
256 if ( fields
& ( 1 << i
) ) {
257 /* Print :// if we're non-opaque and had a scheme */
258 if ( ( fields
& URI_SCHEME_BIT
) &&
259 ( i
> URI_OPAQUE
) ) {
260 used
+= ssnprintf ( buf
+ used
, size
- used
,
262 /* Only print :// once */
263 fields
&= ~URI_SCHEME_BIT
;
266 /* Only print separator if an earlier field exists */
267 if ( sep
&& ( fields
& ( ( 1 << i
) - 1 ) ) )
268 used
+= ssnprintf ( buf
+ used
, size
- used
,
271 /* Print contents of field, possibly encoded */
272 if ( URI_ENCODED
& ( 1 << i
) )
273 used
+= uri_encode ( field
, buf
+ used
,
276 used
+= ssnprintf ( buf
+ used
, size
- used
,
288 * @ret uri Duplicate URI
290 * Creates a modifiable copy of a URI.
292 struct uri
* uri_dup ( struct uri
*uri
) {
293 size_t len
= ( unparse_uri ( NULL
, 0, uri
, URI_ALL
) + 1 );
296 unparse_uri ( buf
, len
, uri
, URI_ALL
);
297 return parse_uri ( buf
);
301 * Resolve base+relative path
303 * @v base_uri Base path
304 * @v relative_uri Relative path
305 * @ret resolved_uri Resolved path
307 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
308 * path (e.g. "initrd.gz") and produces a new path
309 * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
310 * portion of the base path will automatically be stripped; this
311 * matches the semantics used when resolving the path component of
314 char * resolve_path ( const char *base_path
,
315 const char *relative_path
) {
316 size_t base_len
= ( strlen ( base_path
) + 1 );
317 char base_path_copy
[base_len
];
318 char *base_tmp
= base_path_copy
;
321 /* If relative path is absolute, just re-use it */
322 if ( relative_path
[0] == '/' )
323 return strdup ( relative_path
);
325 /* Create modifiable copy of path for dirname() */
326 memcpy ( base_tmp
, base_path
, base_len
);
327 base_tmp
= dirname ( base_tmp
);
329 /* Process "./" and "../" elements */
330 while ( *relative_path
== '.' ) {
332 if ( *relative_path
== 0 ) {
334 } else if ( *relative_path
== '/' ) {
336 } else if ( *relative_path
== '.' ) {
338 if ( *relative_path
== 0 ) {
339 base_tmp
= dirname ( base_tmp
);
340 } else if ( *relative_path
== '/' ) {
341 base_tmp
= dirname ( base_tmp
);
353 /* Create and return new path */
354 if ( asprintf ( &resolved
, "%s%s%s", base_tmp
,
355 ( ( base_tmp
[ strlen ( base_tmp
) - 1 ] == '/' ) ?
356 "" : "/" ), relative_path
) < 0 )
363 * Resolve base+relative URI
365 * @v base_uri Base URI, or NULL
366 * @v relative_uri Relative URI
367 * @ret resolved_uri Resolved URI
369 * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
370 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
371 * (e.g. "http://etherboot.org/initrds/initrd.gz").
373 struct uri
* resolve_uri ( struct uri
*base_uri
,
374 struct uri
*relative_uri
) {
376 char *tmp_path
= NULL
;
379 /* If relative URI is absolute, just re-use it */
380 if ( uri_is_absolute ( relative_uri
) || ( ! base_uri
) )
381 return uri_get ( relative_uri
);
384 memcpy ( &tmp_uri
, base_uri
, sizeof ( tmp_uri
) );
385 if ( relative_uri
->path
) {
386 tmp_path
= resolve_path ( ( base_uri
->path
?
387 base_uri
->path
: "/" ),
388 relative_uri
->path
);
389 tmp_uri
.path
= tmp_path
;
390 tmp_uri
.query
= relative_uri
->query
;
391 tmp_uri
.fragment
= relative_uri
->fragment
;
392 } else if ( relative_uri
->query
) {
393 tmp_uri
.query
= relative_uri
->query
;
394 tmp_uri
.fragment
= relative_uri
->fragment
;
395 } else if ( relative_uri
->fragment
) {
396 tmp_uri
.fragment
= relative_uri
->fragment
;
399 /* Create demangled URI */
400 new_uri
= uri_dup ( &tmp_uri
);
406 * Test for unreserved URI characters
408 * @v c Character to test
409 * @v field Field of URI in which character lies
410 * @ret is_unreserved Character is an unreserved character
412 static int is_unreserved_uri_char ( int c
, int field
) {
413 /* According to RFC3986, the unreserved character set is
415 * A-Z a-z 0-9 - _ . ~
417 * but we also pass & ; = in queries, / in paths,
418 * and everything in opaques
420 int ok
= ( isupper ( c
) || islower ( c
) || isdigit ( c
) ||
421 ( c
== '-' ) || ( c
== '_' ) ||
422 ( c
== '.' ) || ( c
== '~' ) );
424 /* : is valid for an IPv6 host address */
425 if ( field
== URI_HOST
)
426 ok
= ok
|| (c
== ':');
428 if ( field
== URI_QUERY
)
429 ok
= ok
|| ( c
== ';' ) || ( c
== '&' ) || ( c
== '=' );
431 if ( field
== URI_PATH
)
432 ok
= ok
|| ( c
== '/' );
434 if ( field
== URI_OPAQUE
)
443 * @v raw_string String to be URI-encoded
444 * @v buf Buffer to contain encoded string
445 * @v len Length of buffer
446 * @v field Field of URI in which string lies
447 * @ret len Length of encoded string (excluding NUL)
449 size_t uri_encode ( const char *raw_string
, char *buf
, ssize_t len
,
451 ssize_t remaining
= len
;
458 while ( ( c
= *(raw_string
++) ) ) {
459 if ( is_unreserved_uri_char ( c
, field
) ) {
460 used
= ssnprintf ( buf
, remaining
, "%c", c
);
462 used
= ssnprintf ( buf
, remaining
, "%%%02X", c
);
468 return ( len
- remaining
);
472 * Decode URI-encoded string
474 * @v encoded_string URI-encoded string
475 * @v buf Buffer to contain decoded string
476 * @v len Length of buffer
477 * @ret len Length of decoded string (excluding NUL)
479 * This function may be used in-place, with @a buf the same as
482 size_t uri_decode ( const char *encoded_string
, char *buf
, ssize_t len
) {
488 for ( remaining
= len
; *encoded_string
; remaining
-- ) {
489 if ( *encoded_string
== '%' ) {
491 snprintf ( hexbuf
, sizeof ( hexbuf
), "%s",
493 c
= strtoul ( hexbuf
, &hexbuf_end
, 16 );
494 encoded_string
+= ( hexbuf_end
- hexbuf
);
496 c
= *(encoded_string
++);
505 return ( len
- remaining
);