src/core/uri.c

   1 /*
   2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License as
   6  * published by the Free Software Foundation; either version 2 of the
   7  * License, or any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17  */
  18
  19 FILE_LICENCE ( GPL2_OR_LATER );
  20
  21 /** @file
  22  *
  23  * Uniform Resource Identifiers
  24  *
  25  */
  26
  27 #include <stdint.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <libgen.h>
  31 #include <ctype.h>
  32 #include <gpxe/vsprintf.h>
  33 #include <gpxe/uri.h>
  34
  35 /**
  36  * Dump URI for debugging
  37  *
  38  * @v uri               URI
  39  */
  40 static void dump_uri ( struct uri *uri ) {
  41         if ( ! uri )
  42                 return;
  43         if ( uri->scheme )
  44                 DBG ( " scheme \"%s\"", uri->scheme );
  45         if ( uri->opaque )
  46                 DBG ( " opaque \"%s\"", uri->opaque );
  47         if ( uri->user )
  48                 DBG ( " user \"%s\"", uri->user );
  49         if ( uri->password )
  50                 DBG ( " password \"%s\"", uri->password );
  51         if ( uri->host )
  52                 DBG ( " host \"%s\"", uri->host );
  53         if ( uri->port )
  54                 DBG ( " port \"%s\"", uri->port );
  55         if ( uri->path )
  56                 DBG ( " path \"%s\"", uri->path );
  57         if ( uri->query )
  58                 DBG ( " query \"%s\"", uri->query );
  59         if ( uri->fragment )
  60                 DBG ( " fragment \"%s\"", uri->fragment );
  61 }
  62
  63 /**
  64  * Parse URI
  65  *
  66  * @v uri_string        URI as a string
  67  * @ret uri             URI
  68  *
  69  * Splits a URI into its component parts.  The return URI structure is
  70  * dynamically allocated and must eventually be freed by calling
  71  * uri_put().
  72  */
  73 struct uri * parse_uri ( const char *uri_string ) {
  74         struct uri *uri;
  75         char *raw;
  76         char *tmp;
  77         char *path = NULL;
  78         char *authority = NULL;
  79         size_t raw_len;
  80
  81         /* Allocate space for URI struct and a copy of the string */
  82         raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
  83         uri = zalloc ( sizeof ( *uri ) + raw_len );
  84         if ( ! uri )
  85                 return NULL;
  86         raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
  87
  88         /* Zero URI struct and copy in the raw string */
  89         memcpy ( raw, uri_string, raw_len );
  90
  91         /* Start by chopping off the fragment, if it exists */
  92         if ( ( tmp = strchr ( raw, '#' ) ) ) {
  93                 *(tmp++) = '\0';
  94                 uri->fragment = tmp;
  95         }
  96
  97         /* Identify absolute/relative URI.  We ignore schemes that are
  98          * apparently only a single character long, since otherwise we
  99          * misinterpret a DOS-style path name ("C:\path\to\file") as a
 100          * URI with scheme="C",opaque="\path\to\file".
 101          */
 102         if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
 103                 /* Absolute URI: identify hierarchical/opaque */
 104                 uri->scheme = raw;
 105                 *(tmp++) = '\0';
 106                 if ( *tmp == '/' ) {
 107                         /* Absolute URI with hierarchical part */
 108                         path = tmp;
 109                 } else {
 110                         /* Absolute URI with opaque part */
 111                         uri->opaque = tmp;
 112                 }
 113         } else {
 114                 /* Relative URI */
 115                 path = raw;
 116         }
 117
 118         /* If we don't have a path (i.e. we have an absolute URI with
 119          * an opaque portion, we're already finished processing
 120          */
 121         if ( ! path )
 122                 goto done;
 123
 124         /* Chop off the query, if it exists */
 125         if ( ( tmp = strchr ( path, '?' ) ) ) {
 126                 *(tmp++) = '\0';
 127                 uri->query = tmp;
 128         }
 129
 130         /* Identify net/absolute/relative path */
 131         if ( strncmp ( path, "//", 2 ) == 0 ) {
 132                 /* Net path.  If this is terminated by the first '/'
 133                  * of an absolute path, then we have no space for a
 134                  * terminator after the authority field, so shuffle
 135                  * the authority down by one byte, overwriting one of
 136                  * the two slashes.
 137                  */
 138                 authority = ( path + 2 );
 139                 if ( ( tmp = strchr ( authority, '/' ) ) ) {
 140                         /* Shuffle down */
 141                         uri->path = tmp;
 142                         memmove ( ( authority - 1 ), authority,
 143                                   ( tmp - authority ) );
 144                         authority--;
 145                         *(--tmp) = '\0';
 146                 }
 147         } else {
 148                 /* Absolute/relative path */
 149                 uri->path = path;
 150         }
 151
 152         /* Split authority into user[:password] and host[:port] portions */
 153         if ( ( tmp = strchr ( authority, '@' ) ) ) {
 154                 /* Has user[:password] */
 155                 *(tmp++) = '\0';
 156                 uri->host = tmp;
 157                 uri->user = authority;
 158                 if ( ( tmp = strchr ( authority, ':' ) ) ) {
 159                         /* Has password */
 160                         *(tmp++) = '\0';
 161                         uri->password = tmp;
 162                 }
 163         } else {
 164                 /* No user:password */
 165                 uri->host = authority;
 166         }
 167
 168         /* Split host into host[:port] */
 169         if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
 170                 *(tmp++) = '\0';
 171                 uri->port = tmp;
 172         }
 173
 174  done:
 175         DBG ( "URI \"%s\" split into", uri_string );
 176         dump_uri ( uri );
 177         DBG ( "\n" );
 178
 179         return uri;
 180 }
 181
 182 /**
 183  * Get port from URI
 184  *
 185  * @v uri               URI, or NULL
 186  * @v default_port      Default port to use if none specified in URI
 187  * @ret port            Port
 188  */
 189 unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
 190         if ( ( ! uri ) || ( ! uri->port ) )
 191                 return default_port;
 192         return ( strtoul ( uri->port, NULL, 0 ) );
 193 }
 194
 195 /**
 196  * Unparse URI
 197  *
 198  * @v buf               Buffer to fill with URI string
 199  * @v size              Size of buffer
 200  * @v uri               URI to write into buffer, or NULL
 201  * @ret len             Length of URI string
 202  */
 203 int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
 204         int used = 0;
 205
 206         DBG ( "URI unparsing" );
 207         dump_uri ( uri );
 208         DBG ( "\n" );
 209
 210         /* Special-case NULL URI */
 211         if ( ! uri ) {
 212                 if ( size )
 213                         buf[0] = '\0';
 214                 return 0;
 215         }
 216
 217         /* Special-case opaque URIs */
 218         if ( uri->opaque ) {
 219                 return ssnprintf ( ( buf + used ), ( size - used ),
 220                                    "%s:%s", uri->scheme, uri->opaque );
 221         }
 222
 223         /* scheme:// */
 224         if ( uri->scheme ) {
 225                 used += ssnprintf ( ( buf + used ), ( size - used ),
 226                                     "%s://", uri->scheme );
 227         }
 228
 229         /* [user[:password]@]host[:port] */
 230         if ( uri->host ) {
 231                 if ( uri->user ) {
 232                         used += ssnprintf ( ( buf + used ), ( size - used ),
 233                                             "%s", uri->user );
 234                         if ( uri->password ) {
 235                                 used += ssnprintf ( ( buf + used ),
 236                                                     ( size - used ),
 237                                                     ":%s", uri->password );
 238                         }
 239                         used += ssnprintf ( ( buf + used ), ( size - used ),
 240                                             "@" );
 241                 }
 242                 used += ssnprintf ( ( buf + used ), ( size - used ), "%s",
 243                                     uri->host );
 244                 if ( uri->port ) {
 245                         used += ssnprintf ( ( buf + used ), ( size - used ),
 246                                             ":%s", uri->port );
 247                 }
 248         }
 249
 250         /* /path */
 251         if ( uri->path ) {
 252                 used += ssnprintf ( ( buf + used ), ( size - used ),
 253                                     "%s", uri->path );
 254         }
 255
 256         /* ?query */
 257         if ( uri->query ) {
 258                 used += ssnprintf ( ( buf + used ), ( size - used ),
 259                                     "?%s", uri->query );
 260         }
 261
 262         /* #fragment */
 263         if ( uri->fragment ) {
 264                 used += ssnprintf ( ( buf + used ), ( size - used ),
 265                                     "#%s", uri->fragment );
 266         }
 267
 268         return used;
 269 }
 270
 271 /**
 272  * Duplicate URI
 273  *
 274  * @v uri               URI
 275  * @ret uri             Duplicate URI
 276  *
 277  * Creates a modifiable copy of a URI.
 278  */
 279 struct uri * uri_dup ( struct uri *uri ) {
 280         size_t len = ( unparse_uri ( NULL, 0, uri ) + 1 );
 281         char buf[len];
 282
 283         unparse_uri ( buf, len, uri );
 284         return parse_uri ( buf );
 285 }
 286
 287 /**
 288  * Resolve base+relative path
 289  *
 290  * @v base_uri          Base path
 291  * @v relative_uri      Relative path
 292  * @ret resolved_uri    Resolved path
 293  *
 294  * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
 295  * path (e.g. "initrd.gz") and produces a new path
 296  * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
 297  * portion of the base path will automatically be stripped; this
 298  * matches the semantics used when resolving the path component of
 299  * URIs.
 300  */
 301 char * resolve_path ( const char *base_path,
 302                       const char *relative_path ) {
 303         size_t base_len = ( strlen ( base_path ) + 1 );
 304         char base_path_copy[base_len];
 305         char *base_tmp = base_path_copy;
 306         char *resolved;
 307
 308         /* If relative path is absolute, just re-use it */
 309         if ( relative_path[0] == '/' )
 310                 return strdup ( relative_path );
 311
 312         /* Create modifiable copy of path for dirname() */
 313         memcpy ( base_tmp, base_path, base_len );
 314         base_tmp = dirname ( base_tmp );
 315
 316         /* Process "./" and "../" elements */
 317         while ( *relative_path == '.' ) {
 318                 relative_path++;
 319                 if ( *relative_path == 0 ) {
 320                         /* Do nothing */
 321                 } else if ( *relative_path == '/' ) {
 322                         relative_path++;
 323                 } else if ( *relative_path == '.' ) {
 324                         relative_path++;
 325                         if ( *relative_path == 0 ) {
 326                                 base_tmp = dirname ( base_tmp );
 327                         } else if ( *relative_path == '/' ) {
 328                                 base_tmp = dirname ( base_tmp );
 329                                 relative_path++;
 330                         } else {
 331                                 relative_path -= 2;
 332                                 break;
 333                         }
 334                 } else {
 335                         relative_path--;
 336                         break;
 337                 }
 338         }
 339
 340         /* Create and return new path */
 341         if ( asprintf ( &resolved, "%s%s%s", base_tmp,
 342                         ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
 343                           "" : "/" ), relative_path ) < 0 )
 344                 return NULL;
 345
 346         return resolved;
 347 }
 348
 349 /**
 350  * Resolve base+relative URI
 351  *
 352  * @v base_uri          Base URI, or NULL
 353  * @v relative_uri      Relative URI
 354  * @ret resolved_uri    Resolved URI
 355  *
 356  * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
 357  * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
 358  * (e.g. "http://etherboot.org/initrds/initrd.gz").
 359  */
 360 struct uri * resolve_uri ( struct uri *base_uri,
 361                            struct uri *relative_uri ) {
 362         struct uri tmp_uri;
 363         char *tmp_path = NULL;
 364         struct uri *new_uri;
 365
 366         /* If relative URI is absolute, just re-use it */
 367         if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
 368                 return uri_get ( relative_uri );
 369
 370         /* Mangle URI */
 371         memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
 372         if ( relative_uri->path ) {
 373                 tmp_path = resolve_path ( ( base_uri->path ?
 374                                             base_uri->path : "/" ),
 375                                           relative_uri->path );
 376                 tmp_uri.path = tmp_path;
 377                 tmp_uri.query = relative_uri->query;
 378                 tmp_uri.fragment = relative_uri->fragment;
 379         } else if ( relative_uri->query ) {
 380                 tmp_uri.query = relative_uri->query;
 381                 tmp_uri.fragment = relative_uri->fragment;
 382         } else if ( relative_uri->fragment ) {
 383                 tmp_uri.fragment = relative_uri->fragment;
 384         }
 385
 386         /* Create demangled URI */
 387         new_uri = uri_dup ( &tmp_uri );
 388         free ( tmp_path );
 389         return new_uri;
 390 }
 391
 392 /**
 393  * Test for unreserved URI characters
 394  *
 395  * @v c                 Character to test
 396  * @ret is_unreserved   Character is an unreserved character
 397  */
 398 static int is_unreserved_uri_char ( int c ) {
 399         /* According to RFC3986, the unreserved character set is
 400          *
 401          * A-Z a-z 0-9 - _ . ~
 402          */
 403         return ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
 404                  ( c == '-' ) || ( c == '_' ) ||
 405                  ( c == '.' ) || ( c == '~' ) );
 406 }
 407
 408 /**
 409  * URI-encode string
 410  *
 411  * @v raw_string        String to be URI-encoded
 412  * @v buf               Buffer to contain encoded string
 413  * @v len               Length of buffer
 414  * @ret len             Length of encoded string (excluding NUL)
 415  */
 416 size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
 417         ssize_t remaining = len;
 418         size_t used;
 419         unsigned char c;
 420
 421         if ( len )
 422                 buf[0] = '\0';
 423
 424         while ( ( c = *(raw_string++) ) ) {
 425                 if ( is_unreserved_uri_char ( c ) ) {
 426                         used = ssnprintf ( buf, remaining, "%c", c );
 427                 } else {
 428                         used = ssnprintf ( buf, remaining, "%%%02X", c );
 429                 }
 430                 buf += used;
 431                 remaining -= used;
 432         }
 433
 434         return ( len - remaining );
 435 }
 436
 437 /**
 438  * Decode URI-encoded string
 439  *
 440  * @v encoded_string    URI-encoded string
 441  * @v buf               Buffer to contain decoded string
 442  * @v len               Length of buffer
 443  * @ret len             Length of decoded string (excluding NUL)
 444  */
 445 size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
 446         ssize_t remaining = len;
 447         char hexbuf[3];
 448         char *hexbuf_end;
 449         unsigned char c;
 450
 451         if ( len )
 452                 buf[0] = '\0';
 453
 454         while ( *encoded_string ) {
 455                 if ( *encoded_string == '%' ) {
 456                         encoded_string++;
 457                         snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
 458                                    encoded_string );
 459                         c = strtoul ( hexbuf, &hexbuf_end, 16 );
 460                         encoded_string += ( hexbuf_end - hexbuf );
 461                 } else {
 462                         c = *(encoded_string++);
 463                 }
 464                 ssnprintf ( buf++, remaining--, "%c", c );
 465         }
 466         return ( len - remaining );
 467 }