robin.thoni
/
ipxe


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
							/*
 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/** @file
 *
 * Uniform Resource Identifiers
 *
 */

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <libgen.h>
#include <ctype.h>
#include <gpxe/vsprintf.h>
#include <gpxe/uri.h>

/**
 * Dump URI for debugging
 *
 * @v uri		URI
 */
static void dump_uri ( struct uri *uri ) {
	if ( ! uri )
		return;
	if ( uri->scheme )
		DBG ( " scheme \"%s\"", uri->scheme );
	if ( uri->opaque )
		DBG ( " opaque \"%s\"", uri->opaque );
	if ( uri->user )
		DBG ( " user \"%s\"", uri->user );
	if ( uri->password )
		DBG ( " password \"%s\"", uri->password );
	if ( uri->host )
		DBG ( " host \"%s\"", uri->host );
	if ( uri->port )
		DBG ( " port \"%s\"", uri->port );
	if ( uri->path )
		DBG ( " path \"%s\"", uri->path );
	if ( uri->query )
		DBG ( " query \"%s\"", uri->query );
	if ( uri->fragment )
		DBG ( " fragment \"%s\"", uri->fragment );
}

/**
 * Parse URI
 *
 * @v uri_string	URI as a string
 * @ret uri		URI
 *
 * Splits a URI into its component parts.  The return URI structure is
 * dynamically allocated and must eventually be freed by calling
 * uri_put().
 */
struct uri * parse_uri ( const char *uri_string ) {
	struct uri *uri;
	char *raw;
	char *tmp;
	char *path = NULL;
	char *authority = NULL;
	size_t raw_len;

	/* Allocate space for URI struct and a copy of the string */
	raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
	uri = zalloc ( sizeof ( *uri ) + raw_len );
	if ( ! uri )
		return NULL;
	raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );

	/* Zero URI struct and copy in the raw string */
	memcpy ( raw, uri_string, raw_len );

	/* Start by chopping off the fragment, if it exists */
	if ( ( tmp = strchr ( raw, '#' ) ) ) {
		*(tmp++) = '\0';
		uri->fragment = tmp;
	}

	/* Identify absolute/relative URI */
	if ( ( tmp = strchr ( raw, ':' ) ) ) {
		/* Absolute URI: identify hierarchical/opaque */
		uri->scheme = raw;
		*(tmp++) = '\0';
		if ( *tmp == '/' ) {
			/* Absolute URI with hierarchical part */
			path = tmp;
		} else {
			/* Absolute URI with opaque part */
			uri->opaque = tmp;
		}
	} else {
		/* Relative URI */
		path = raw;
	}

	/* If we don't have a path (i.e. we have an absolute URI with
	 * an opaque portion, we're already finished processing
	 */
	if ( ! path )
		goto done;

	/* Chop off the query, if it exists */
	if ( ( tmp = strchr ( path, '?' ) ) ) {
		*(tmp++) = '\0';
		uri->query = tmp;
	}

	/* Identify net/absolute/relative path */
	if ( strncmp ( path, "//", 2 ) == 0 ) {
		/* Net path.  If this is terminated by the first '/'
		 * of an absolute path, then we have no space for a
		 * terminator after the authority field, so shuffle
		 * the authority down by one byte, overwriting one of
		 * the two slashes.
		 */
		authority = ( path + 2 );
		if ( ( tmp = strchr ( authority, '/' ) ) ) {
			/* Shuffle down */
			uri->path = tmp;
			memmove ( ( authority - 1 ), authority,
				  ( tmp - authority ) );
			authority--;
			*(--tmp) = '\0';
		}
	} else {
		/* Absolute/relative path */
		uri->path = path;
	}

	/* Split authority into user[:password] and host[:port] portions */
	if ( ( tmp = strchr ( authority, '@' ) ) ) {
		/* Has user[:password] */
		*(tmp++) = '\0';
		uri->host = tmp;
		uri->user = authority;
		if ( ( tmp = strchr ( authority, ':' ) ) ) {
			/* Has password */
			*(tmp++) = '\0';
			uri->password = tmp;
		}
	} else {
		/* No user:password */
		uri->host = authority;
	}

	/* Split host into host[:port] */
	if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
		*(tmp++) = '\0';
		uri->port = tmp;
	}

 done:
	DBG ( "URI \"%s\" split into", uri_string );
	dump_uri ( uri );
	DBG ( "\n" );

	return uri;
}

/**
 * Get port from URI
 *
 * @v uri		URI, or NULL
 * @v default_port	Default port to use if none specified in URI
 * @ret port		Port
 */
unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
	if ( ( ! uri ) || ( ! uri->port ) )
		return default_port;
	return ( strtoul ( uri->port, NULL, 0 ) );
}

/**
 * Unparse URI
 *
 * @v buf		Buffer to fill with URI string
 * @v size		Size of buffer
 * @v uri		URI to write into buffer, or NULL
 * @ret len		Length of URI string
 */
int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
	int used = 0;

	DBG ( "URI unparsing" );
	dump_uri ( uri );
	DBG ( "\n" );

	/* Special-case NULL URI */
	if ( ! uri ) {
		if ( size )
			buf[0] = '\0';
		return 0;
	}

	/* Special-case opaque URIs */
	if ( uri->opaque ) {
		return ssnprintf ( ( buf + used ), ( size - used ),
				   "%s:%s", uri->scheme, uri->opaque );
	}

	/* scheme:// */
	if ( uri->scheme ) {
		used += ssnprintf ( ( buf + used ), ( size - used ),
				    "%s://", uri->scheme );
	}

	/* [user[:password]@]host[:port] */
	if ( uri->host ) {
		if ( uri->user ) {
			used += ssnprintf ( ( buf + used ), ( size - used ),
					    "%s", uri->user );
			if ( uri->password ) {
				used += ssnprintf ( ( buf + used ),
						    ( size - used ),
						    ":%s", uri->password );
			}
			used += ssnprintf ( ( buf + used ), ( size - used ),
					    "@" );
		}
		used += ssnprintf ( ( buf + used ), ( size - used ), "%s",
				    uri->host );
		if ( uri->port ) {
			used += ssnprintf ( ( buf + used ), ( size - used ),
					    ":%s", uri->port );
		}
	}

	/* /path */
	if ( uri->path ) {
		used += ssnprintf ( ( buf + used ), ( size - used ),
				    "%s", uri->path );
	}

	/* ?query */
	if ( uri->query ) {
		used += ssnprintf ( ( buf + used ), ( size - used ),
				    "?%s", uri->query );
	}

	/* #fragment */
	if ( uri->fragment ) {
		used += ssnprintf ( ( buf + used ), ( size - used ),
				    "#%s", uri->fragment );
	}

	return used;
}

/**
 * Duplicate URI
 *
 * @v uri		URI
 * @ret uri		Duplicate URI
 *
 * Creates a modifiable copy of a URI.
 */
struct uri * uri_dup ( struct uri *uri ) {
	size_t len = ( unparse_uri ( NULL, 0, uri ) + 1 );
	char buf[len];

	unparse_uri ( buf, len, uri );
	return parse_uri ( buf );
}

/**
 * Resolve base+relative path
 *
 * @v base_uri		Base path
 * @v relative_uri	Relative path
 * @ret resolved_uri	Resolved path
 *
 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
 * path (e.g. "initrd.gz") and produces a new path
 * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
 * portion of the base path will automatically be stripped; this
 * matches the semantics used when resolving the path component of
 * URIs.
 */
char * resolve_path ( const char *base_path,
		      const char *relative_path ) {
	size_t base_len = ( strlen ( base_path ) + 1 );
	char base_path_copy[base_len];
	char *base_tmp = base_path_copy;
	char *resolved;

	/* If relative path is absolute, just re-use it */
	if ( relative_path[0] == '/' )
		return strdup ( relative_path );

	/* Create modifiable copy of path for dirname() */
	memcpy ( base_tmp, base_path, base_len );
	base_tmp = dirname ( base_tmp );

	/* Process "./" and "../" elements */
	while ( *relative_path == '.' ) {
		relative_path++;
		if ( *relative_path == 0 ) {
			/* Do nothing */
		} else if ( *relative_path == '/' ) {
			relative_path++;
		} else if ( *relative_path == '.' ) {
			relative_path++;
			if ( *relative_path == 0 ) {
				base_tmp = dirname ( base_tmp );
			} else if ( *relative_path == '/' ) {
				base_tmp = dirname ( base_tmp );
				relative_path++;
			} else {
				relative_path -= 2;
				break;
			}
		} else {
			relative_path--;
			break;
		}
	}

	/* Create and return new path */
	if ( asprintf ( &resolved, "%s%s%s", base_tmp,
			( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
			  "" : "/" ), relative_path ) < 0 )
		return NULL;

	return resolved;
}

/**
 * Resolve base+relative URI
 *
 * @v base_uri		Base URI, or NULL
 * @v relative_uri	Relative URI
 * @ret resolved_uri	Resolved URI
 *
 * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
 * (e.g. "http://etherboot.org/initrds/initrd.gz").
 */
struct uri * resolve_uri ( struct uri *base_uri,
			   struct uri *relative_uri ) {
	struct uri tmp_uri;
	char *tmp_path = NULL;
	struct uri *new_uri;

	/* If relative URI is absolute, just re-use it */
	if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
		return uri_get ( relative_uri );

	/* Mangle URI */
	memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
	if ( relative_uri->path ) {
		tmp_path = resolve_path ( ( base_uri->path ?
					    base_uri->path : "/" ),
					  relative_uri->path );
		tmp_uri.path = tmp_path;
		tmp_uri.query = relative_uri->query;
		tmp_uri.fragment = relative_uri->fragment;
	} else if ( relative_uri->query ) {
		tmp_uri.query = relative_uri->query;
		tmp_uri.fragment = relative_uri->fragment;
	} else if ( relative_uri->fragment ) {
		tmp_uri.fragment = relative_uri->fragment;
	}

	/* Create demangled URI */
	new_uri = uri_dup ( &tmp_uri );
	free ( tmp_path );
	return new_uri;
}

/**
 * Test for unreserved URI characters
 *
 * @v c			Character to test
 * @ret is_unreserved	Character is an unreserved character
 */
static int is_unreserved_uri_char ( int c ) {
	/* According to RFC3986, the unreserved character set is
	 *
	 * A-Z a-z 0-9 - _ . ~
	 */
	return ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
		 ( c == '-' ) || ( c == '_' ) ||
		 ( c == '.' ) || ( c == '~' ) );
}

/**
 * URI-encode string
 *
 * @v raw_string	String to be URI-encoded
 * @v buf		Buffer to contain encoded string
 * @v len		Length of buffer
 * @ret len		Length of encoded string (excluding NUL)
 */
size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
	ssize_t remaining = len;
	size_t used;
	unsigned char c;

	if ( len )
		buf[0] = '\0';

	while ( ( c = *(raw_string++) ) ) {
		if ( is_unreserved_uri_char ( c ) ) {
			used = ssnprintf ( buf, remaining, "%c", c );
		} else {
			used = ssnprintf ( buf, remaining, "%%%02X", c );
		}
		buf += used;
		remaining -= used;
	}

	return ( len - remaining );
}

/**
 * Decode URI-encoded string
 *
 * @v encoded_string	URI-encoded string
 * @v buf		Buffer to contain decoded string
 * @v len		Length of buffer
 * @ret len		Length of decoded string (excluding NUL)
 */
size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
	ssize_t remaining = len;
	char hexbuf[3];
	char *hexbuf_end;
	unsigned char c;

	if ( len )
		buf[0] = '\0';

	while ( *encoded_string ) {
		if ( *encoded_string == '%' ) {
			encoded_string++;
			snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
				   encoded_string );
			c = strtoul ( hexbuf, &hexbuf_end, 16 );
			encoded_string += ( hexbuf_end - hexbuf );
		} else {
			c = *(encoded_string++);
		}
		ssnprintf ( buf++, remaining--, "%c", c );
	}
	return ( len - remaining );
}