|  | @@ -0,0 +1,144 @@
 | 
		
	
		
			
			|  | 1 | +#include "stdlib.h"
 | 
		
	
		
			
			|  | 2 | +#include "string.h"
 | 
		
	
		
			
			|  | 3 | +#include "proto.h"
 | 
		
	
		
			
			|  | 4 | +#include "resolv.h"
 | 
		
	
		
			
			|  | 5 | +#include "url.h"
 | 
		
	
		
			
			|  | 6 | +
 | 
		
	
		
			
			|  | 7 | +static struct protocol protocols[0] __protocol_start;
 | 
		
	
		
			
			|  | 8 | +static struct protocol default_protocols[0] __default_protocol_start;
 | 
		
	
		
			
			|  | 9 | +static struct protocol protocols_end[0] __protocol_end;
 | 
		
	
		
			
			|  | 10 | +
 | 
		
	
		
			
			|  | 11 | +/*
 | 
		
	
		
			
			|  | 12 | + * Parse protocol portion of a URL.  Return 0 if no "proto://" is
 | 
		
	
		
			
			|  | 13 | + * present.
 | 
		
	
		
			
			|  | 14 | + *
 | 
		
	
		
			
			|  | 15 | + */
 | 
		
	
		
			
			|  | 16 | +static inline int parse_protocol ( struct url_info *info, const char **p ) {
 | 
		
	
		
			
			|  | 17 | +	const char *q = *p;
 | 
		
	
		
			
			|  | 18 | +
 | 
		
	
		
			
			|  | 19 | +	info->protocol = q;
 | 
		
	
		
			
			|  | 20 | +	for ( ; *q ; q++ ) {
 | 
		
	
		
			
			|  | 21 | +		if ( memcmp ( q, "://", 3 ) == 0 ) {
 | 
		
	
		
			
			|  | 22 | +			info->protocol_len = q - info->protocol;
 | 
		
	
		
			
			|  | 23 | +			*p = q + 3;
 | 
		
	
		
			
			|  | 24 | +			return 1;
 | 
		
	
		
			
			|  | 25 | +		}
 | 
		
	
		
			
			|  | 26 | +	}
 | 
		
	
		
			
			|  | 27 | +	return 0;
 | 
		
	
		
			
			|  | 28 | +}
 | 
		
	
		
			
			|  | 29 | +
 | 
		
	
		
			
			|  | 30 | +/*
 | 
		
	
		
			
			|  | 31 | + * Parse the host:port portion of a URL.  Also fills in sin_port.
 | 
		
	
		
			
			|  | 32 | + *
 | 
		
	
		
			
			|  | 33 | + */
 | 
		
	
		
			
			|  | 34 | +static inline void parse_host_port ( struct url_info *info, const char **p ) {
 | 
		
	
		
			
			|  | 35 | +	info->host = *p;
 | 
		
	
		
			
			|  | 36 | +	for ( ; **p && ( **p != '/' ) ; (*p)++ ) {
 | 
		
	
		
			
			|  | 37 | +		if ( **p == ':' ) {
 | 
		
	
		
			
			|  | 38 | +			info->host_len = *p - info->host;
 | 
		
	
		
			
			|  | 39 | +			info->port = ++(*p);
 | 
		
	
		
			
			|  | 40 | +			info->sin.sin_port = strtoul ( *p, p, 10 );
 | 
		
	
		
			
			|  | 41 | +			info->port_len = *p - info->port;
 | 
		
	
		
			
			|  | 42 | +			return;
 | 
		
	
		
			
			|  | 43 | +		}
 | 
		
	
		
			
			|  | 44 | +	}
 | 
		
	
		
			
			|  | 45 | +	/* No ':' separator seen; it's all the host part */
 | 
		
	
		
			
			|  | 46 | +	info->host_len = *p - info->host;
 | 
		
	
		
			
			|  | 47 | +}
 | 
		
	
		
			
			|  | 48 | +
 | 
		
	
		
			
			|  | 49 | +/*
 | 
		
	
		
			
			|  | 50 | + * Identify the protocol
 | 
		
	
		
			
			|  | 51 | + *
 | 
		
	
		
			
			|  | 52 | + */
 | 
		
	
		
			
			|  | 53 | +static inline int identify_protocol ( struct url_info *info ) {
 | 
		
	
		
			
			|  | 54 | +	struct protocol *proto;
 | 
		
	
		
			
			|  | 55 | +
 | 
		
	
		
			
			|  | 56 | +	if ( info->protocol_len ) {
 | 
		
	
		
			
			|  | 57 | +		char *terminator;
 | 
		
	
		
			
			|  | 58 | +		char temp;
 | 
		
	
		
			
			|  | 59 | +
 | 
		
	
		
			
			|  | 60 | +		/* Explcitly specified protocol */
 | 
		
	
		
			
			|  | 61 | +		terminator = ( char * ) &info->protocol[info->protocol_len];
 | 
		
	
		
			
			|  | 62 | +		temp = *terminator;
 | 
		
	
		
			
			|  | 63 | +		*terminator = '\0';
 | 
		
	
		
			
			|  | 64 | +		for ( proto = protocols ; proto < protocols_end ; proto++ ) {
 | 
		
	
		
			
			|  | 65 | +			if ( memcmp ( proto->name, info->protocol,
 | 
		
	
		
			
			|  | 66 | +				      info->protocol_len + 1 ) == 0 ) {
 | 
		
	
		
			
			|  | 67 | +				info->proto = proto;
 | 
		
	
		
			
			|  | 68 | +				break;
 | 
		
	
		
			
			|  | 69 | +			}
 | 
		
	
		
			
			|  | 70 | +		}
 | 
		
	
		
			
			|  | 71 | +		*terminator = temp;
 | 
		
	
		
			
			|  | 72 | +	} else {
 | 
		
	
		
			
			|  | 73 | +		/* No explicitly specified protocol */
 | 
		
	
		
			
			|  | 74 | +		if ( default_protocols < protocols_end )
 | 
		
	
		
			
			|  | 75 | +			info->proto = default_protocols;
 | 
		
	
		
			
			|  | 76 | +	}
 | 
		
	
		
			
			|  | 77 | +	return ( ( int ) info->proto ); /* NULL indicates failure */
 | 
		
	
		
			
			|  | 78 | +}
 | 
		
	
		
			
			|  | 79 | +
 | 
		
	
		
			
			|  | 80 | +/*
 | 
		
	
		
			
			|  | 81 | + * Resolve the host portion of the URL
 | 
		
	
		
			
			|  | 82 | + *
 | 
		
	
		
			
			|  | 83 | + */
 | 
		
	
		
			
			|  | 84 | +static inline int resolve_host ( struct url_info *info ) {
 | 
		
	
		
			
			|  | 85 | +	char *terminator;
 | 
		
	
		
			
			|  | 86 | +	char temp;
 | 
		
	
		
			
			|  | 87 | +	int success;
 | 
		
	
		
			
			|  | 88 | +
 | 
		
	
		
			
			|  | 89 | +	if ( ! info->host_len ) {
 | 
		
	
		
			
			|  | 90 | +		/* No host specified - leave sin.sin_addr empty to
 | 
		
	
		
			
			|  | 91 | +		 * indicate use of DHCP-supplied next-server
 | 
		
	
		
			
			|  | 92 | +		 */
 | 
		
	
		
			
			|  | 93 | +		return 1;
 | 
		
	
		
			
			|  | 94 | +	}
 | 
		
	
		
			
			|  | 95 | +
 | 
		
	
		
			
			|  | 96 | +	terminator = ( char * ) &info->host[info->host_len];
 | 
		
	
		
			
			|  | 97 | +	temp = *terminator;
 | 
		
	
		
			
			|  | 98 | +	*terminator = '\0';
 | 
		
	
		
			
			|  | 99 | +	success = resolv ( &info->sin.sin_addr, info->host );
 | 
		
	
		
			
			|  | 100 | +	*terminator = temp;
 | 
		
	
		
			
			|  | 101 | +	return success;
 | 
		
	
		
			
			|  | 102 | +}
 | 
		
	
		
			
			|  | 103 | +
 | 
		
	
		
			
			|  | 104 | +/*
 | 
		
	
		
			
			|  | 105 | + * Parse a URL string into its constituent parts.  Perform name
 | 
		
	
		
			
			|  | 106 | + * resolution if required (and if resolver code is linked in), and
 | 
		
	
		
			
			|  | 107 | + * identify the protocol.
 | 
		
	
		
			
			|  | 108 | + *
 | 
		
	
		
			
			|  | 109 | + * We accept URLs of the form
 | 
		
	
		
			
			|  | 110 | + *
 | 
		
	
		
			
			|  | 111 | + *   [protocol://[host][:port]/]path/to/file
 | 
		
	
		
			
			|  | 112 | + *
 | 
		
	
		
			
			|  | 113 | + * We return true for success, 0 for failure (e.g. unknown protocol).
 | 
		
	
		
			
			|  | 114 | + * Note that the "/" before path/to/file *will* be counted as part of
 | 
		
	
		
			
			|  | 115 | + * the filename, if it is present.
 | 
		
	
		
			
			|  | 116 | + *
 | 
		
	
		
			
			|  | 117 | + */
 | 
		
	
		
			
			|  | 118 | +int parse_url ( struct url_info *info, const char *url ) {
 | 
		
	
		
			
			|  | 119 | +	const char *p;
 | 
		
	
		
			
			|  | 120 | +
 | 
		
	
		
			
			|  | 121 | +	/* Fill in initial values */
 | 
		
	
		
			
			|  | 122 | +	memset ( info, 0, sizeof ( *info ) );
 | 
		
	
		
			
			|  | 123 | +	info->url = url;
 | 
		
	
		
			
			|  | 124 | +	info->protocol = url;
 | 
		
	
		
			
			|  | 125 | +	info->host = url;
 | 
		
	
		
			
			|  | 126 | +	info->port = url;
 | 
		
	
		
			
			|  | 127 | +	info->file = url;
 | 
		
	
		
			
			|  | 128 | +
 | 
		
	
		
			
			|  | 129 | +	/* Split the URL into substrings, and fill in sin.sin_port */
 | 
		
	
		
			
			|  | 130 | +	p = url;
 | 
		
	
		
			
			|  | 131 | +	if ( parse_protocol ( info, &p ) )
 | 
		
	
		
			
			|  | 132 | +		parse_host_port ( info, &p );
 | 
		
	
		
			
			|  | 133 | +	info->file = p;
 | 
		
	
		
			
			|  | 134 | +
 | 
		
	
		
			
			|  | 135 | +	/* Identify the protocol */
 | 
		
	
		
			
			|  | 136 | +	if ( ! identify_protocol ( info ) )
 | 
		
	
		
			
			|  | 137 | +		return 0;
 | 
		
	
		
			
			|  | 138 | +
 | 
		
	
		
			
			|  | 139 | +	/* Resolve the host name to an IP address */
 | 
		
	
		
			
			|  | 140 | +	if ( ! resolve_host ( info ) )
 | 
		
	
		
			
			|  | 141 | +		return 0;
 | 
		
	
		
			
			|  | 142 | +
 | 
		
	
		
			
			|  | 143 | +	return 1;
 | 
		
	
		
			
			|  | 144 | +}
 |