You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

uri.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. */
  19. FILE_LICENCE ( GPL2_OR_LATER );
  20. /** @file
  21. *
  22. * Uniform Resource Identifiers
  23. *
  24. */
  25. #include <stdint.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <libgen.h>
  29. #include <ctype.h>
  30. #include <ipxe/vsprintf.h>
  31. #include <ipxe/params.h>
  32. #include <ipxe/uri.h>
  33. /**
  34. * Dump URI for debugging
  35. *
  36. * @v uri URI
  37. */
  38. static void dump_uri ( struct uri *uri ) {
  39. if ( ! uri )
  40. return;
  41. if ( uri->scheme )
  42. DBG ( " scheme \"%s\"", uri->scheme );
  43. if ( uri->opaque )
  44. DBG ( " opaque \"%s\"", uri->opaque );
  45. if ( uri->user )
  46. DBG ( " user \"%s\"", uri->user );
  47. if ( uri->password )
  48. DBG ( " password \"%s\"", uri->password );
  49. if ( uri->host )
  50. DBG ( " host \"%s\"", uri->host );
  51. if ( uri->port )
  52. DBG ( " port \"%s\"", uri->port );
  53. if ( uri->path )
  54. DBG ( " path \"%s\"", uri->path );
  55. if ( uri->query )
  56. DBG ( " query \"%s\"", uri->query );
  57. if ( uri->fragment )
  58. DBG ( " fragment \"%s\"", uri->fragment );
  59. if ( uri->params )
  60. DBG ( " params \"%s\"", uri->params->name );
  61. }
  62. /**
  63. * Free URI
  64. *
  65. * @v refcnt Reference count
  66. */
  67. static void free_uri ( struct refcnt *refcnt ) {
  68. struct uri *uri = container_of ( refcnt, struct uri, refcnt );
  69. params_put ( uri->params );
  70. free ( uri );
  71. }
  72. /**
  73. * Parse URI
  74. *
  75. * @v uri_string URI as a string
  76. * @ret uri URI
  77. *
  78. * Splits a URI into its component parts. The return URI structure is
  79. * dynamically allocated and must eventually be freed by calling
  80. * uri_put().
  81. */
  82. struct uri * parse_uri ( const char *uri_string ) {
  83. struct uri *uri;
  84. struct parameters *params;
  85. char *raw;
  86. char *tmp;
  87. char *path;
  88. char *authority;
  89. int i;
  90. size_t raw_len;
  91. /* Allocate space for URI struct and a copy of the string */
  92. raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
  93. uri = zalloc ( sizeof ( *uri ) + raw_len );
  94. if ( ! uri )
  95. return NULL;
  96. ref_init ( &uri->refcnt, free_uri );
  97. raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
  98. /* Copy in the raw string */
  99. memcpy ( raw, uri_string, raw_len );
  100. /* Identify the parameter list, if present */
  101. if ( ( tmp = strstr ( raw, "##params" ) ) ) {
  102. *tmp = '\0';
  103. tmp += 8 /* "##params" */;
  104. params = find_parameters ( *tmp ? ( tmp + 1 ) : NULL );
  105. if ( params ) {
  106. uri->params = claim_parameters ( params );
  107. } else {
  108. /* Ignore non-existent submission blocks */
  109. }
  110. }
  111. /* Chop off the fragment, if it exists */
  112. if ( ( tmp = strchr ( raw, '#' ) ) ) {
  113. *(tmp++) = '\0';
  114. uri->fragment = tmp;
  115. }
  116. /* Identify absolute/relative URI. We ignore schemes that are
  117. * apparently only a single character long, since otherwise we
  118. * misinterpret a DOS-style path name ("C:\path\to\file") as a
  119. * URI with scheme="C",opaque="\path\to\file".
  120. */
  121. if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
  122. /* Absolute URI: identify hierarchical/opaque */
  123. uri->scheme = raw;
  124. *(tmp++) = '\0';
  125. if ( *tmp == '/' ) {
  126. /* Absolute URI with hierarchical part */
  127. path = tmp;
  128. } else {
  129. /* Absolute URI with opaque part */
  130. uri->opaque = tmp;
  131. path = NULL;
  132. }
  133. } else {
  134. /* Relative URI */
  135. path = raw;
  136. }
  137. /* If we don't have a path (i.e. we have an absolute URI with
  138. * an opaque portion, we're already finished processing
  139. */
  140. if ( ! path )
  141. goto done;
  142. /* Chop off the query, if it exists */
  143. if ( ( tmp = strchr ( path, '?' ) ) ) {
  144. *(tmp++) = '\0';
  145. uri->query = tmp;
  146. }
  147. /* Identify net/absolute/relative path */
  148. if ( strncmp ( path, "//", 2 ) == 0 ) {
  149. /* Net path. If this is terminated by the first '/'
  150. * of an absolute path, then we have no space for a
  151. * terminator after the authority field, so shuffle
  152. * the authority down by one byte, overwriting one of
  153. * the two slashes.
  154. */
  155. authority = ( path + 2 );
  156. if ( ( tmp = strchr ( authority, '/' ) ) ) {
  157. /* Shuffle down */
  158. uri->path = tmp;
  159. memmove ( ( authority - 1 ), authority,
  160. ( tmp - authority ) );
  161. authority--;
  162. *(--tmp) = '\0';
  163. }
  164. } else {
  165. /* Absolute/relative path */
  166. uri->path = path;
  167. authority = NULL;
  168. }
  169. /* If we don't have an authority (i.e. we have a non-net
  170. * path), we're already finished processing
  171. */
  172. if ( ! authority )
  173. goto done;
  174. /* Split authority into user[:password] and host[:port] portions */
  175. if ( ( tmp = strchr ( authority, '@' ) ) ) {
  176. /* Has user[:password] */
  177. *(tmp++) = '\0';
  178. uri->host = tmp;
  179. uri->user = authority;
  180. if ( ( tmp = strchr ( authority, ':' ) ) ) {
  181. /* Has password */
  182. *(tmp++) = '\0';
  183. uri->password = tmp;
  184. }
  185. } else {
  186. /* No user:password */
  187. uri->host = authority;
  188. }
  189. /* Split host into host[:port] */
  190. if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
  191. *(tmp++) = '\0';
  192. uri->port = tmp;
  193. }
  194. /* Decode fields that should be decoded */
  195. for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
  196. const char *field = uri_get_field ( uri, i );
  197. if ( field && ( URI_ENCODED & ( 1 << i ) ) )
  198. uri_decode ( field, ( char * ) field,
  199. strlen ( field ) + 1 /* NUL */ );
  200. }
  201. done:
  202. DBG ( "URI \"%s\" split into", uri_string );
  203. dump_uri ( uri );
  204. DBG ( "\n" );
  205. return uri;
  206. }
  207. /**
  208. * Get port from URI
  209. *
  210. * @v uri URI, or NULL
  211. * @v default_port Default port to use if none specified in URI
  212. * @ret port Port
  213. */
  214. unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
  215. if ( ( ! uri ) || ( ! uri->port ) )
  216. return default_port;
  217. return ( strtoul ( uri->port, NULL, 0 ) );
  218. }
  219. /**
  220. * Unparse URI
  221. *
  222. * @v buf Buffer to fill with URI string
  223. * @v size Size of buffer
  224. * @v uri URI to write into buffer, or NULL
  225. * @v fields Bitmask of fields to include in URI string, or URI_ALL
  226. * @ret len Length of URI string
  227. */
  228. int unparse_uri ( char *buf, size_t size, struct uri *uri,
  229. unsigned int fields ) {
  230. /* List of characters that typically go before certain fields */
  231. static char separators[] = { /* scheme */ 0, /* opaque */ ':',
  232. /* user */ 0, /* password */ ':',
  233. /* host */ '@', /* port */ ':',
  234. /* path */ 0, /* query */ '?',
  235. /* fragment */ '#' };
  236. int used = 0;
  237. int i;
  238. DBG ( "URI unparsing" );
  239. dump_uri ( uri );
  240. DBG ( "\n" );
  241. /* Ensure buffer is NUL-terminated */
  242. if ( size )
  243. buf[0] = '\0';
  244. /* Special-case NULL URI */
  245. if ( ! uri )
  246. return 0;
  247. /* Iterate through requested fields */
  248. for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
  249. const char *field = uri_get_field ( uri, i );
  250. char sep = separators[i];
  251. /* Ensure `fields' only contains bits for fields that exist */
  252. if ( ! field )
  253. fields &= ~( 1 << i );
  254. /* Store this field if we were asked to */
  255. if ( fields & ( 1 << i ) ) {
  256. /* Print :// if we're non-opaque and had a scheme */
  257. if ( ( fields & URI_SCHEME_BIT ) &&
  258. ( i > URI_OPAQUE ) ) {
  259. used += ssnprintf ( buf + used, size - used,
  260. "://" );
  261. /* Only print :// once */
  262. fields &= ~URI_SCHEME_BIT;
  263. }
  264. /* Only print separator if an earlier field exists */
  265. if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
  266. used += ssnprintf ( buf + used, size - used,
  267. "%c", sep );
  268. /* Print contents of field, possibly encoded */
  269. if ( URI_ENCODED & ( 1 << i ) )
  270. used += uri_encode ( field, buf + used,
  271. size - used, i );
  272. else
  273. used += ssnprintf ( buf + used, size - used,
  274. "%s", field );
  275. }
  276. }
  277. return used;
  278. }
  279. /**
  280. * Duplicate URI
  281. *
  282. * @v uri URI
  283. * @ret uri Duplicate URI
  284. *
  285. * Creates a modifiable copy of a URI.
  286. */
  287. struct uri * uri_dup ( struct uri *uri ) {
  288. size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
  289. char buf[len];
  290. unparse_uri ( buf, len, uri, URI_ALL );
  291. return parse_uri ( buf );
  292. }
  293. /**
  294. * Resolve base+relative path
  295. *
  296. * @v base_uri Base path
  297. * @v relative_uri Relative path
  298. * @ret resolved_uri Resolved path
  299. *
  300. * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
  301. * path (e.g. "initrd.gz") and produces a new path
  302. * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
  303. * portion of the base path will automatically be stripped; this
  304. * matches the semantics used when resolving the path component of
  305. * URIs.
  306. */
  307. char * resolve_path ( const char *base_path,
  308. const char *relative_path ) {
  309. size_t base_len = ( strlen ( base_path ) + 1 );
  310. char base_path_copy[base_len];
  311. char *base_tmp = base_path_copy;
  312. char *resolved;
  313. /* If relative path is absolute, just re-use it */
  314. if ( relative_path[0] == '/' )
  315. return strdup ( relative_path );
  316. /* Create modifiable copy of path for dirname() */
  317. memcpy ( base_tmp, base_path, base_len );
  318. base_tmp = dirname ( base_tmp );
  319. /* Process "./" and "../" elements */
  320. while ( *relative_path == '.' ) {
  321. relative_path++;
  322. if ( *relative_path == 0 ) {
  323. /* Do nothing */
  324. } else if ( *relative_path == '/' ) {
  325. relative_path++;
  326. } else if ( *relative_path == '.' ) {
  327. relative_path++;
  328. if ( *relative_path == 0 ) {
  329. base_tmp = dirname ( base_tmp );
  330. } else if ( *relative_path == '/' ) {
  331. base_tmp = dirname ( base_tmp );
  332. relative_path++;
  333. } else {
  334. relative_path -= 2;
  335. break;
  336. }
  337. } else {
  338. relative_path--;
  339. break;
  340. }
  341. }
  342. /* Create and return new path */
  343. if ( asprintf ( &resolved, "%s%s%s", base_tmp,
  344. ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
  345. "" : "/" ), relative_path ) < 0 )
  346. return NULL;
  347. return resolved;
  348. }
  349. /**
  350. * Resolve base+relative URI
  351. *
  352. * @v base_uri Base URI, or NULL
  353. * @v relative_uri Relative URI
  354. * @ret resolved_uri Resolved URI
  355. *
  356. * Takes a base URI (e.g. "http://ipxe.org/kernels/vmlinuz" and a
  357. * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
  358. * (e.g. "http://ipxe.org/initrds/initrd.gz").
  359. */
  360. struct uri * resolve_uri ( struct uri *base_uri,
  361. struct uri *relative_uri ) {
  362. struct uri tmp_uri;
  363. char *tmp_path = NULL;
  364. struct uri *new_uri;
  365. /* If relative URI is absolute, just re-use it */
  366. if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
  367. return uri_get ( relative_uri );
  368. /* Mangle URI */
  369. memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
  370. if ( relative_uri->path ) {
  371. tmp_path = resolve_path ( ( base_uri->path ?
  372. base_uri->path : "/" ),
  373. relative_uri->path );
  374. tmp_uri.path = tmp_path;
  375. tmp_uri.query = relative_uri->query;
  376. tmp_uri.fragment = relative_uri->fragment;
  377. } else if ( relative_uri->query ) {
  378. tmp_uri.query = relative_uri->query;
  379. tmp_uri.fragment = relative_uri->fragment;
  380. } else if ( relative_uri->fragment ) {
  381. tmp_uri.fragment = relative_uri->fragment;
  382. }
  383. /* Create demangled URI */
  384. new_uri = uri_dup ( &tmp_uri );
  385. free ( tmp_path );
  386. return new_uri;
  387. }
  388. /**
  389. * Test for unreserved URI characters
  390. *
  391. * @v c Character to test
  392. * @v field Field of URI in which character lies
  393. * @ret is_unreserved Character is an unreserved character
  394. */
  395. static int is_unreserved_uri_char ( int c, int field ) {
  396. /* According to RFC3986, the unreserved character set is
  397. *
  398. * A-Z a-z 0-9 - _ . ~
  399. *
  400. * but we also pass & ; = in queries, / in paths,
  401. * and everything in opaques
  402. */
  403. int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
  404. ( c == '-' ) || ( c == '_' ) ||
  405. ( c == '.' ) || ( c == '~' ) );
  406. if ( field == URI_QUERY )
  407. ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
  408. if ( field == URI_PATH )
  409. ok = ok || ( c == '/' );
  410. if ( field == URI_OPAQUE )
  411. ok = 1;
  412. return ok;
  413. }
  414. /**
  415. * URI-encode string
  416. *
  417. * @v raw_string String to be URI-encoded
  418. * @v buf Buffer to contain encoded string
  419. * @v len Length of buffer
  420. * @v field Field of URI in which string lies
  421. * @ret len Length of encoded string (excluding NUL)
  422. */
  423. size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
  424. int field ) {
  425. ssize_t remaining = len;
  426. size_t used;
  427. unsigned char c;
  428. if ( len > 0 )
  429. buf[0] = '\0';
  430. while ( ( c = *(raw_string++) ) ) {
  431. if ( is_unreserved_uri_char ( c, field ) ) {
  432. used = ssnprintf ( buf, remaining, "%c", c );
  433. } else {
  434. used = ssnprintf ( buf, remaining, "%%%02X", c );
  435. }
  436. buf += used;
  437. remaining -= used;
  438. }
  439. return ( len - remaining );
  440. }
  441. /**
  442. * Decode URI-encoded string
  443. *
  444. * @v encoded_string URI-encoded string
  445. * @v buf Buffer to contain decoded string
  446. * @v len Length of buffer
  447. * @ret len Length of decoded string (excluding NUL)
  448. *
  449. * This function may be used in-place, with @a buf the same as
  450. * @a encoded_string.
  451. */
  452. size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
  453. ssize_t remaining;
  454. char hexbuf[3];
  455. char *hexbuf_end;
  456. unsigned char c;
  457. for ( remaining = len; *encoded_string; remaining-- ) {
  458. if ( *encoded_string == '%' ) {
  459. encoded_string++;
  460. snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
  461. encoded_string );
  462. c = strtoul ( hexbuf, &hexbuf_end, 16 );
  463. encoded_string += ( hexbuf_end - hexbuf );
  464. } else {
  465. c = *(encoded_string++);
  466. }
  467. if ( remaining > 1 )
  468. *buf++ = c;
  469. }
  470. if ( len )
  471. *buf = 0;
  472. return ( len - remaining );
  473. }