Browse Source

[uri] Decode/encode URIs when parsing/unparsing

Currently, handling of URI escapes is ad-hoc; escaped strings are
stored as-is in the URI structure, and it is up to the individual
protocol to unescape as necessary. This is error-prone and expensive
in terms of code size. Modify this behavior by unescaping in
parse_uri() and escaping in unparse_uri() those fields that typically
handle URI escapes (hostname, user, password, path, query, fragment),
and allowing unparse_uri() to accept a subset of fields to print so
it can be easily used to generate e.g. the escaped HTTP path?query
request.

Signed-off-by: Joshua Oreman <oremanj@rwcr.net>
Signed-off-by: Marty Connor <mdc@etherboot.org>
tags/v1.0.0-rc1
Joshua Oreman 15 years ago
parent
commit
3d9dd93a14
7 changed files with 155 additions and 93 deletions
  1. 1
    1
      src/core/settings.c
  2. 88
    64
      src/core/uri.c
  3. 41
    3
      src/include/gpxe/uri.h
  4. 12
    19
      src/net/tcp/http.c
  5. 5
    2
      src/tests/uri_test.c
  6. 7
    3
      src/usr/autoboot.c
  7. 1
    1
      src/usr/imgmgmt.c

+ 1
- 1
src/core/settings.c View File

@@ -1085,7 +1085,7 @@ static int fetchf_uristring ( struct settings *settings,
1085 1085
        
1086 1086
 		fetch_string_setting ( settings, setting, raw_buf,
1087 1087
 				       sizeof ( raw_buf ) );
1088
-		return uri_encode ( raw_buf, buf, len );
1088
+		return uri_encode ( raw_buf, buf, len, URI_FRAGMENT );
1089 1089
 	}
1090 1090
 }
1091 1091
 

+ 88
- 64
src/core/uri.c View File

@@ -76,6 +76,7 @@ struct uri * parse_uri ( const char *uri_string ) {
76 76
 	char *tmp;
77 77
 	char *path = NULL;
78 78
 	char *authority = NULL;
79
+	int i;
79 80
 	size_t raw_len;
80 81
 
81 82
 	/* Allocate space for URI struct and a copy of the string */
@@ -171,6 +172,14 @@ struct uri * parse_uri ( const char *uri_string ) {
171 172
 		uri->port = tmp;
172 173
 	}
173 174
 
175
+	/* Decode fields that should be decoded */
176
+	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
177
+		const char *field = uri_get_field ( uri, i );
178
+		if ( field && ( URI_ENCODED & ( 1 << i ) ) )
179
+			uri_decode ( field, ( char * ) field,
180
+				     strlen ( field ) + 1 /* NUL */ );
181
+	}
182
+
174 183
  done:
175 184
 	DBG ( "URI \"%s\" split into", uri_string );
176 185
 	dump_uri ( uri );
@@ -198,10 +207,19 @@ unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
198 207
  * @v buf		Buffer to fill with URI string
199 208
  * @v size		Size of buffer
200 209
  * @v uri		URI to write into buffer, or NULL
210
+ * @v fields		Bitmask of fields to include in URI string, or URI_ALL
201 211
  * @ret len		Length of URI string
202 212
  */
203
-int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
213
+int unparse_uri ( char *buf, size_t size, struct uri *uri,
214
+		  unsigned int fields ) {
215
+	/* List of characters that typically go before certain fields */
216
+	static char separators[] = { /* scheme */ 0, /* opaque */ ':',
217
+				     /* user */ 0, /* password */ ':',
218
+				     /* host */ '@', /* port */ ':',
219
+				     /* path */ 0, /* query */ '?',
220
+				     /* fragment */ '#' };
204 221
 	int used = 0;
222
+	int i;
205 223
 
206 224
 	DBG ( "URI unparsing" );
207 225
 	dump_uri ( uri );
@@ -214,55 +232,39 @@ int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
214 232
 		return 0;
215 233
 	}
216 234
 
217
-	/* Special-case opaque URIs */
218
-	if ( uri->opaque ) {
219
-		return ssnprintf ( ( buf + used ), ( size - used ),
220
-				   "%s:%s", uri->scheme, uri->opaque );
221
-	}
222
-
223
-	/* scheme:// */
224
-	if ( uri->scheme ) {
225
-		used += ssnprintf ( ( buf + used ), ( size - used ),
226
-				    "%s://", uri->scheme );
227
-	}
228
-
229
-	/* [user[:password]@]host[:port] */
230
-	if ( uri->host ) {
231
-		if ( uri->user ) {
232
-			used += ssnprintf ( ( buf + used ), ( size - used ),
233
-					    "%s", uri->user );
234
-			if ( uri->password ) {
235
-				used += ssnprintf ( ( buf + used ),
236
-						    ( size - used ),
237
-						    ":%s", uri->password );
235
+	/* Iterate through requested fields */
236
+	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
237
+		const char *field = uri_get_field ( uri, i );
238
+		char sep = separators[i];
239
+
240
+		/* Ensure `fields' only contains bits for fields that exist */
241
+		if ( ! field )
242
+			fields &= ~( 1 << i );
243
+
244
+		/* Store this field if we were asked to */
245
+		if ( fields & ( 1 << i ) ) {
246
+			/* Print :// if we're non-opaque and had a scheme */
247
+			if ( ( fields & URI_SCHEME_BIT ) &&
248
+			     ( i > URI_OPAQUE ) ) {
249
+				used += ssnprintf ( buf + used, size - used,
250
+						    "://" );
251
+				/* Only print :// once */
252
+				fields &= ~URI_SCHEME_BIT;
238 253
 			}
239
-			used += ssnprintf ( ( buf + used ), ( size - used ),
240
-					    "@" );
241
-		}
242
-		used += ssnprintf ( ( buf + used ), ( size - used ), "%s",
243
-				    uri->host );
244
-		if ( uri->port ) {
245
-			used += ssnprintf ( ( buf + used ), ( size - used ),
246
-					    ":%s", uri->port );
247
-		}
248
-	}
249
-
250
-	/* /path */
251
-	if ( uri->path ) {
252
-		used += ssnprintf ( ( buf + used ), ( size - used ),
253
-				    "%s", uri->path );
254
-	}
255 254
 
256
-	/* ?query */
257
-	if ( uri->query ) {
258
-		used += ssnprintf ( ( buf + used ), ( size - used ),
259
-				    "?%s", uri->query );
260
-	}
261
-
262
-	/* #fragment */
263
-	if ( uri->fragment ) {
264
-		used += ssnprintf ( ( buf + used ), ( size - used ),
265
-				    "#%s", uri->fragment );
255
+			/* Only print separator if an earlier field exists */
256
+			if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
257
+				used += ssnprintf ( buf + used, size - used,
258
+						    "%c", sep );
259
+
260
+			/* Print contents of field, possibly encoded */
261
+			if ( URI_ENCODED & ( 1 << i ) )
262
+				used += uri_encode ( field, buf + used,
263
+						     size - used, i );
264
+			else
265
+				used += ssnprintf ( buf + used, size - used,
266
+						    "%s", field );
267
+		}
266 268
 	}
267 269
 
268 270
 	return used;
@@ -277,10 +279,10 @@ int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
277 279
  * Creates a modifiable copy of a URI.
278 280
  */
279 281
 struct uri * uri_dup ( struct uri *uri ) {
280
-	size_t len = ( unparse_uri ( NULL, 0, uri ) + 1 );
282
+	size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
281 283
 	char buf[len];
282 284
 
283
-	unparse_uri ( buf, len, uri );
285
+	unparse_uri ( buf, len, uri, URI_ALL );
284 286
 	return parse_uri ( buf );
285 287
 }
286 288
 
@@ -393,16 +395,31 @@ struct uri * resolve_uri ( struct uri *base_uri,
393 395
  * Test for unreserved URI characters
394 396
  *
395 397
  * @v c			Character to test
398
+ * @v field		Field of URI in which character lies
396 399
  * @ret is_unreserved	Character is an unreserved character
397 400
  */
398
-static int is_unreserved_uri_char ( int c ) {
401
+static int is_unreserved_uri_char ( int c, int field ) {
399 402
 	/* According to RFC3986, the unreserved character set is
400 403
 	 *
401 404
 	 * A-Z a-z 0-9 - _ . ~
405
+	 *
406
+	 * but we also pass & ; = in queries, / in paths,
407
+	 * and everything in opaques
402 408
 	 */
403
-	return ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
404
-		 ( c == '-' ) || ( c == '_' ) ||
405
-		 ( c == '.' ) || ( c == '~' ) );
409
+	int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
410
+		    ( c == '-' ) || ( c == '_' ) ||
411
+		    ( c == '.' ) || ( c == '~' ) );
412
+
413
+	if ( field == URI_QUERY )
414
+		ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
415
+
416
+	if ( field == URI_PATH )
417
+		ok = ok || ( c == '/' );
418
+
419
+	if ( field == URI_OPAQUE )
420
+		ok = 1;
421
+
422
+	return ok;
406 423
 }
407 424
 
408 425
 /**
@@ -411,18 +428,20 @@ static int is_unreserved_uri_char ( int c ) {
411 428
  * @v raw_string	String to be URI-encoded
412 429
  * @v buf		Buffer to contain encoded string
413 430
  * @v len		Length of buffer
431
+ * @v field		Field of URI in which string lies
414 432
  * @ret len		Length of encoded string (excluding NUL)
415 433
  */
416
-size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
434
+size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
435
+		    int field ) {
417 436
 	ssize_t remaining = len;
418 437
 	size_t used;
419 438
 	unsigned char c;
420 439
 
421
-	if ( len )
440
+	if ( len > 0 )
422 441
 		buf[0] = '\0';
423 442
 
424 443
 	while ( ( c = *(raw_string++) ) ) {
425
-		if ( is_unreserved_uri_char ( c ) ) {
444
+		if ( is_unreserved_uri_char ( c, field ) ) {
426 445
 			used = ssnprintf ( buf, remaining, "%c", c );
427 446
 		} else {
428 447
 			used = ssnprintf ( buf, remaining, "%%%02X", c );
@@ -441,17 +460,17 @@ size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
441 460
  * @v buf		Buffer to contain decoded string
442 461
  * @v len		Length of buffer
443 462
  * @ret len		Length of decoded string (excluding NUL)
463
+ *
464
+ * This function may be used in-place, with @a buf the same as
465
+ * @a encoded_string.
444 466
  */
445
-size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
446
-	ssize_t remaining = len;
467
+size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
468
+	ssize_t remaining;
447 469
 	char hexbuf[3];
448 470
 	char *hexbuf_end;
449 471
 	unsigned char c;
450 472
 
451
-	if ( len )
452
-		buf[0] = '\0';
453
-
454
-	while ( *encoded_string ) {
473
+	for ( remaining = len; *encoded_string; remaining-- ) {
455 474
 		if ( *encoded_string == '%' ) {
456 475
 			encoded_string++;
457 476
 			snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
@@ -461,7 +480,12 @@ size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
461 480
 		} else {
462 481
 			c = *(encoded_string++);
463 482
 		}
464
-		ssnprintf ( buf++, remaining--, "%c", c );
483
+		if ( remaining > 1 )
484
+			*buf++ = c;
465 485
 	}
486
+
487
+	if ( len )
488
+		*buf = 0;
489
+
466 490
 	return ( len - remaining );
467 491
 }

+ 41
- 3
src/include/gpxe/uri.h View File

@@ -20,6 +20,10 @@ FILE_LICENCE ( GPL2_OR_LATER );
20 20
  *
21 21
  * Note that all fields within a URI are optional and may be NULL.
22 22
  *
23
+ * The pointers to the various fields are packed together so they can
24
+ * be accessed in array fashion in some places in uri.c where doing so
25
+ * saves significant code size.
26
+ *
23 27
  * Some examples are probably helpful:
24 28
  *
25 29
  *   http://www.etherboot.org/wiki :
@@ -61,8 +65,40 @@ struct uri {
61 65
 	const char *query;
62 66
 	/** Fragment */
63 67
 	const char *fragment;
68
+} __attribute__ (( packed ));
69
+
70
+/** A field in a URI
71
+ *
72
+ * The order of the indices in this enumeration must match the order
73
+ * of the fields in the URI structure.
74
+ */
75
+enum {
76
+	URI_SCHEME = 0,		URI_SCHEME_BIT = ( 1 << URI_SCHEME ),
77
+	URI_OPAQUE = 1,		URI_OPAQUE_BIT = ( 1 << URI_OPAQUE ),
78
+	URI_USER = 2,		URI_USER_BIT = ( 1 << URI_USER ),
79
+	URI_PASSWORD = 3,	URI_PASSWORD_BIT = ( 1 << URI_PASSWORD ),
80
+	URI_HOST = 4,		URI_HOST_BIT = ( 1 << URI_HOST ),
81
+	URI_PORT = 5,		URI_PORT_BIT = ( 1 << URI_PORT ),
82
+	URI_PATH = 6,		URI_PATH_BIT = ( 1 << URI_PATH ),
83
+	URI_QUERY = 7,		URI_QUERY_BIT = ( 1 << URI_QUERY ),
84
+	URI_FRAGMENT = 8,	URI_FRAGMENT_BIT = ( 1 << URI_FRAGMENT ),
85
+
86
+	URI_FIRST_FIELD = URI_SCHEME,
87
+	URI_LAST_FIELD = URI_FRAGMENT,
64 88
 };
65 89
 
90
+/** Extract field from URI */
91
+#define uri_get_field( uri, field )	(&uri->scheme)[field]
92
+
93
+/** All URI fields */
94
+#define URI_ALL		( URI_SCHEME_BIT | URI_OPAQUE_BIT | URI_USER_BIT | \
95
+			  URI_PASSWORD_BIT | URI_HOST_BIT | URI_PORT_BIT | \
96
+			  URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
97
+
98
+/** URI fields that should be decoded on storage */
99
+#define URI_ENCODED	( URI_USER_BIT | URI_PASSWORD_BIT | URI_HOST_BIT | \
100
+			  URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
101
+
66 102
 /**
67 103
  * URI is an absolute URI
68 104
  *
@@ -131,14 +167,16 @@ extern struct uri *cwuri;
131 167
 
132 168
 extern struct uri * parse_uri ( const char *uri_string );
133 169
 extern unsigned int uri_port ( struct uri *uri, unsigned int default_port );
134
-extern int unparse_uri ( char *buf, size_t size, struct uri *uri );
170
+extern int unparse_uri ( char *buf, size_t size, struct uri *uri,
171
+			 unsigned int fields );
135 172
 extern struct uri * uri_dup ( struct uri *uri );
136 173
 extern char * resolve_path ( const char *base_path,
137 174
 			     const char *relative_path );
138 175
 extern struct uri * resolve_uri ( struct uri *base_uri,
139 176
 				  struct uri *relative_uri );
140 177
 extern void churi ( struct uri *uri );
141
-extern size_t uri_encode ( const char *raw_string, char *buf, size_t len );
142
-extern size_t uri_decode ( const char *encoded_string, char *buf, size_t len );
178
+extern size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
179
+			   int field );
180
+extern size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len );
143 181
 
144 182
 #endif /* _GPXE_URI_H */

+ 12
- 19
src/net/tcp/http.c View File

@@ -417,9 +417,7 @@ static int http_socket_deliver_iob ( struct xfer_interface *socket,
417 417
 static void http_step ( struct process *process ) {
418 418
 	struct http_request *http =
419 419
 		container_of ( process, struct http_request, process );
420
-	const char *path = http->uri->path;
421 420
 	const char *host = http->uri->host;
422
-	const char *query = http->uri->query;
423 421
 	const char *user = http->uri->user;
424 422
 	const char *password =
425 423
 		( http->uri->password ? http->uri->password : "" );
@@ -429,27 +427,24 @@ static void http_step ( struct process *process ) {
429 427
 	char user_pw[ user_pw_len + 1 /* NUL */ ];
430 428
 	char user_pw_base64[ user_pw_base64_len + 1 /* NUL */ ];
431 429
 	int rc;
430
+	int request_len = unparse_uri ( NULL, 0, http->uri,
431
+					URI_PATH_BIT | URI_QUERY_BIT );
432 432
 
433 433
 	if ( xfer_window ( &http->socket ) ) {
434
+		char request[request_len + 1];
435
+
436
+		/* Construct path?query request */
437
+		unparse_uri ( request, sizeof ( request ), http->uri,
438
+			      URI_PATH_BIT | URI_QUERY_BIT );
434 439
 
435 440
 		/* We want to execute only once */
436 441
 		process_del ( &http->process );
437 442
 
438 443
 		/* Construct authorisation, if applicable */
439 444
 		if ( user ) {
440
-			char *buf = user_pw;
441
-			ssize_t remaining = sizeof ( user_pw );
442
-			size_t len;
443
-
444
-			/* URI-decode the username and password */
445
-			len = uri_decode ( user, buf, remaining );
446
-			buf += len;
447
-			remaining -= len;
448
-			*(remaining--, buf++) = ':';
449
-			len = uri_decode ( password, buf, remaining );
450
-			buf += len;
451
-			remaining -= len;
452
-			assert ( remaining >= 0 );
445
+			/* Make "user:password" string from decoded fields */
446
+			snprintf ( user_pw, sizeof ( user_pw ), "%s:%s",
447
+				   user, password );
453 448
 
454 449
 			/* Base64-encode the "user:password" string */
455 450
 			base64_encode ( user_pw, user_pw_base64 );
@@ -457,14 +452,12 @@ static void http_step ( struct process *process ) {
457 452
 
458 453
 		/* Send GET request */
459 454
 		if ( ( rc = xfer_printf ( &http->socket,
460
-					  "GET %s%s%s HTTP/1.0\r\n"
455
+					  "GET %s HTTP/1.0\r\n"
461 456
 					  "User-Agent: gPXE/" VERSION "\r\n"
462 457
 					  "%s%s%s"
463 458
 					  "Host: %s\r\n"
464 459
 					  "\r\n",
465
-					  ( path ? path : "/" ),
466
-					  ( query ? "?" : "" ),
467
-					  ( query ? query : "" ),
460
+					  request,
468 461
 					  ( user ?
469 462
 					    "Authorization: Basic " : "" ),
470 463
 					  ( user ? user_pw_base64 : "" ),

+ 5
- 2
src/tests/uri_test.c View File

@@ -22,6 +22,9 @@ static struct uri_test uri_tests[] = {
22 22
 	  "http://etherboot.org/page3" },
23 23
 	{ "tftp://192.168.0.1/", "/tftpboot/vmlinuz",
24 24
 	  "tftp://192.168.0.1/tftpboot/vmlinuz" },
25
+	{ "ftp://the%41nswer%3d:%34ty%32wo@ether%62oot.org:8080/p%41th/foo",
26
+	  "to?%41=b#%43d",
27
+	  "ftp://theAnswer%3d:4ty2wo@etherboot.org:8080/path/to?a=b#cd" },
25 28
 #if 0
26 29
 	"http://www.etherboot.org/wiki",
27 30
 	"mailto:bob@nowhere.com",
@@ -41,7 +44,7 @@ static int test_parse_unparse ( const char *uri_string ) {
41 44
 		rc = -ENOMEM;
42 45
 		goto done;
43 46
 	}
44
-	len = unparse_uri ( buf, sizeof ( buf ), uri );
47
+	len = unparse_uri ( buf, sizeof ( buf ), uri, URI_ALL );
45 48
 
46 49
 	/* Compare result */
47 50
 	if ( strcmp ( buf, uri_string ) != 0 ) {
@@ -92,7 +95,7 @@ static int test_resolve ( const char *base_uri_string,
92 95
 	}
93 96
 
94 97
 	/* Compare result */
95
-	len = unparse_uri ( buf, sizeof ( buf ), resolved_uri );
98
+	len = unparse_uri ( buf, sizeof ( buf ), resolved_uri, URI_ALL );
96 99
 	if ( strcmp ( buf, resolved_uri_string ) != 0 ) {
97 100
 		printf ( "Resolution of \"%s\"+\"%s\" produced \"%s\"\n",
98 101
 			 base_uri_string, relative_uri_string, buf );

+ 7
- 3
src/usr/autoboot.c View File

@@ -61,7 +61,9 @@ int boot_next_server_and_filename ( struct in_addr next_server,
61 61
 				    const char *filename ) {
62 62
 	struct uri *uri;
63 63
 	struct image *image;
64
-	char buf[ 23 /* tftp://xxx.xxx.xxx.xxx/ */ + strlen(filename) + 1 ];
64
+	char buf[ 23 /* tftp://xxx.xxx.xxx.xxx/ */ +
65
+		  ( 3 * strlen(filename) ) /* completely URI-encoded */
66
+		  + 1 /* NUL */ ];
65 67
 	int filename_is_absolute;
66 68
 	int rc;
67 69
 
@@ -78,8 +80,10 @@ int boot_next_server_and_filename ( struct in_addr next_server,
78 80
 		 * between filenames with and without initial slashes,
79 81
 		 * which is significant for TFTP.
80 82
 		 */
81
-		snprintf ( buf, sizeof ( buf ), "tftp://%s/%s",
82
-			   inet_ntoa ( next_server ), filename );
83
+		snprintf ( buf, sizeof ( buf ), "tftp://%s/",
84
+			   inet_ntoa ( next_server ) );
85
+		uri_encode ( filename, buf + strlen ( buf ),
86
+			     sizeof ( buf ) - strlen ( buf ), URI_PATH );
83 87
 		filename = buf;
84 88
 	}
85 89
 

+ 1
- 1
src/usr/imgmgmt.c View File

@@ -61,7 +61,7 @@ int imgfetch ( struct image *image, const char *uri_string,
61 61
 	if ( password )
62 62
 		uri->password = "***";
63 63
 	unparse_uri ( uri_string_redacted, sizeof ( uri_string_redacted ),
64
-		      uri );
64
+		      uri, URI_ALL );
65 65
 	uri->password = password;
66 66
 
67 67
 	if ( ( rc = create_downloader ( &monojob, image, image_register,

Loading…
Cancel
Save