You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. /** @file
  19. *
  20. * Uniform Resource Identifiers
  21. *
  22. */
  23. #include <stdint.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <libgen.h>
  27. #include <ctype.h>
  28. #include <gpxe/vsprintf.h>
  29. #include <gpxe/uri.h>
  30. /**
  31. * Dump URI for debugging
  32. *
  33. * @v uri URI
  34. */
  35. static void dump_uri ( struct uri *uri ) {
  36. if ( ! uri )
  37. return;
  38. if ( uri->scheme )
  39. DBG ( " scheme \"%s\"", uri->scheme );
  40. if ( uri->opaque )
  41. DBG ( " opaque \"%s\"", uri->opaque );
  42. if ( uri->user )
  43. DBG ( " user \"%s\"", uri->user );
  44. if ( uri->password )
  45. DBG ( " password \"%s\"", uri->password );
  46. if ( uri->host )
  47. DBG ( " host \"%s\"", uri->host );
  48. if ( uri->port )
  49. DBG ( " port \"%s\"", uri->port );
  50. if ( uri->path )
  51. DBG ( " path \"%s\"", uri->path );
  52. if ( uri->query )
  53. DBG ( " query \"%s\"", uri->query );
  54. if ( uri->fragment )
  55. DBG ( " fragment \"%s\"", uri->fragment );
  56. }
  57. /**
  58. * Parse URI
  59. *
  60. * @v uri_string URI as a string
  61. * @ret uri URI
  62. *
  63. * Splits a URI into its component parts. The return URI structure is
  64. * dynamically allocated and must eventually be freed by calling
  65. * uri_put().
  66. */
  67. struct uri * parse_uri ( const char *uri_string ) {
  68. struct uri *uri;
  69. char *raw;
  70. char *tmp;
  71. char *path = NULL;
  72. char *authority = NULL;
  73. size_t raw_len;
  74. /* Allocate space for URI struct and a copy of the string */
  75. raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
  76. uri = zalloc ( sizeof ( *uri ) + raw_len );
  77. if ( ! uri )
  78. return NULL;
  79. raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
  80. /* Zero URI struct and copy in the raw string */
  81. memcpy ( raw, uri_string, raw_len );
  82. /* Start by chopping off the fragment, if it exists */
  83. if ( ( tmp = strchr ( raw, '#' ) ) ) {
  84. *(tmp++) = '\0';
  85. uri->fragment = tmp;
  86. }
  87. /* Identify absolute/relative URI. We ignore schemes that are
  88. * apparently only a single character long, since otherwise we
  89. * misinterpret a DOS-style path name ("C:\path\to\file") as a
  90. * URI with scheme="C",opaque="\path\to\file".
  91. */
  92. if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
  93. /* Absolute URI: identify hierarchical/opaque */
  94. uri->scheme = raw;
  95. *(tmp++) = '\0';
  96. if ( *tmp == '/' ) {
  97. /* Absolute URI with hierarchical part */
  98. path = tmp;
  99. } else {
  100. /* Absolute URI with opaque part */
  101. uri->opaque = tmp;
  102. }
  103. } else {
  104. /* Relative URI */
  105. path = raw;
  106. }
  107. /* If we don't have a path (i.e. we have an absolute URI with
  108. * an opaque portion, we're already finished processing
  109. */
  110. if ( ! path )
  111. goto done;
  112. /* Chop off the query, if it exists */
  113. if ( ( tmp = strchr ( path, '?' ) ) ) {
  114. *(tmp++) = '\0';
  115. uri->query = tmp;
  116. }
  117. /* Identify net/absolute/relative path */
  118. if ( strncmp ( path, "//", 2 ) == 0 ) {
  119. /* Net path. If this is terminated by the first '/'
  120. * of an absolute path, then we have no space for a
  121. * terminator after the authority field, so shuffle
  122. * the authority down by one byte, overwriting one of
  123. * the two slashes.
  124. */
  125. authority = ( path + 2 );
  126. if ( ( tmp = strchr ( authority, '/' ) ) ) {
  127. /* Shuffle down */
  128. uri->path = tmp;
  129. memmove ( ( authority - 1 ), authority,
  130. ( tmp - authority ) );
  131. authority--;
  132. *(--tmp) = '\0';
  133. }
  134. } else {
  135. /* Absolute/relative path */
  136. uri->path = path;
  137. }
  138. /* Split authority into user[:password] and host[:port] portions */
  139. if ( ( tmp = strchr ( authority, '@' ) ) ) {
  140. /* Has user[:password] */
  141. *(tmp++) = '\0';
  142. uri->host = tmp;
  143. uri->user = authority;
  144. if ( ( tmp = strchr ( authority, ':' ) ) ) {
  145. /* Has password */
  146. *(tmp++) = '\0';
  147. uri->password = tmp;
  148. }
  149. } else {
  150. /* No user:password */
  151. uri->host = authority;
  152. }
  153. /* Split host into host[:port] */
  154. if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
  155. *(tmp++) = '\0';
  156. uri->port = tmp;
  157. }
  158. done:
  159. DBG ( "URI \"%s\" split into", uri_string );
  160. dump_uri ( uri );
  161. DBG ( "\n" );
  162. return uri;
  163. }
  164. /**
  165. * Get port from URI
  166. *
  167. * @v uri URI, or NULL
  168. * @v default_port Default port to use if none specified in URI
  169. * @ret port Port
  170. */
  171. unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
  172. if ( ( ! uri ) || ( ! uri->port ) )
  173. return default_port;
  174. return ( strtoul ( uri->port, NULL, 0 ) );
  175. }
  176. /**
  177. * Unparse URI
  178. *
  179. * @v buf Buffer to fill with URI string
  180. * @v size Size of buffer
  181. * @v uri URI to write into buffer, or NULL
  182. * @ret len Length of URI string
  183. */
  184. int unparse_uri ( char *buf, size_t size, struct uri *uri ) {
  185. int used = 0;
  186. DBG ( "URI unparsing" );
  187. dump_uri ( uri );
  188. DBG ( "\n" );
  189. /* Special-case NULL URI */
  190. if ( ! uri ) {
  191. if ( size )
  192. buf[0] = '\0';
  193. return 0;
  194. }
  195. /* Special-case opaque URIs */
  196. if ( uri->opaque ) {
  197. return ssnprintf ( ( buf + used ), ( size - used ),
  198. "%s:%s", uri->scheme, uri->opaque );
  199. }
  200. /* scheme:// */
  201. if ( uri->scheme ) {
  202. used += ssnprintf ( ( buf + used ), ( size - used ),
  203. "%s://", uri->scheme );
  204. }
  205. /* [user[:password]@]host[:port] */
  206. if ( uri->host ) {
  207. if ( uri->user ) {
  208. used += ssnprintf ( ( buf + used ), ( size - used ),
  209. "%s", uri->user );
  210. if ( uri->password ) {
  211. used += ssnprintf ( ( buf + used ),
  212. ( size - used ),
  213. ":%s", uri->password );
  214. }
  215. used += ssnprintf ( ( buf + used ), ( size - used ),
  216. "@" );
  217. }
  218. used += ssnprintf ( ( buf + used ), ( size - used ), "%s",
  219. uri->host );
  220. if ( uri->port ) {
  221. used += ssnprintf ( ( buf + used ), ( size - used ),
  222. ":%s", uri->port );
  223. }
  224. }
  225. /* /path */
  226. if ( uri->path ) {
  227. used += ssnprintf ( ( buf + used ), ( size - used ),
  228. "%s", uri->path );
  229. }
  230. /* ?query */
  231. if ( uri->query ) {
  232. used += ssnprintf ( ( buf + used ), ( size - used ),
  233. "?%s", uri->query );
  234. }
  235. /* #fragment */
  236. if ( uri->fragment ) {
  237. used += ssnprintf ( ( buf + used ), ( size - used ),
  238. "#%s", uri->fragment );
  239. }
  240. return used;
  241. }
  242. /**
  243. * Duplicate URI
  244. *
  245. * @v uri URI
  246. * @ret uri Duplicate URI
  247. *
  248. * Creates a modifiable copy of a URI.
  249. */
  250. struct uri * uri_dup ( struct uri *uri ) {
  251. size_t len = ( unparse_uri ( NULL, 0, uri ) + 1 );
  252. char buf[len];
  253. unparse_uri ( buf, len, uri );
  254. return parse_uri ( buf );
  255. }
  256. /**
  257. * Resolve base+relative path
  258. *
  259. * @v base_uri Base path
  260. * @v relative_uri Relative path
  261. * @ret resolved_uri Resolved path
  262. *
  263. * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
  264. * path (e.g. "initrd.gz") and produces a new path
  265. * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
  266. * portion of the base path will automatically be stripped; this
  267. * matches the semantics used when resolving the path component of
  268. * URIs.
  269. */
  270. char * resolve_path ( const char *base_path,
  271. const char *relative_path ) {
  272. size_t base_len = ( strlen ( base_path ) + 1 );
  273. char base_path_copy[base_len];
  274. char *base_tmp = base_path_copy;
  275. char *resolved;
  276. /* If relative path is absolute, just re-use it */
  277. if ( relative_path[0] == '/' )
  278. return strdup ( relative_path );
  279. /* Create modifiable copy of path for dirname() */
  280. memcpy ( base_tmp, base_path, base_len );
  281. base_tmp = dirname ( base_tmp );
  282. /* Process "./" and "../" elements */
  283. while ( *relative_path == '.' ) {
  284. relative_path++;
  285. if ( *relative_path == 0 ) {
  286. /* Do nothing */
  287. } else if ( *relative_path == '/' ) {
  288. relative_path++;
  289. } else if ( *relative_path == '.' ) {
  290. relative_path++;
  291. if ( *relative_path == 0 ) {
  292. base_tmp = dirname ( base_tmp );
  293. } else if ( *relative_path == '/' ) {
  294. base_tmp = dirname ( base_tmp );
  295. relative_path++;
  296. } else {
  297. relative_path -= 2;
  298. break;
  299. }
  300. } else {
  301. relative_path--;
  302. break;
  303. }
  304. }
  305. /* Create and return new path */
  306. if ( asprintf ( &resolved, "%s%s%s", base_tmp,
  307. ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
  308. "" : "/" ), relative_path ) < 0 )
  309. return NULL;
  310. return resolved;
  311. }
  312. /**
  313. * Resolve base+relative URI
  314. *
  315. * @v base_uri Base URI, or NULL
  316. * @v relative_uri Relative URI
  317. * @ret resolved_uri Resolved URI
  318. *
  319. * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
  320. * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
  321. * (e.g. "http://etherboot.org/initrds/initrd.gz").
  322. */
  323. struct uri * resolve_uri ( struct uri *base_uri,
  324. struct uri *relative_uri ) {
  325. struct uri tmp_uri;
  326. char *tmp_path = NULL;
  327. struct uri *new_uri;
  328. /* If relative URI is absolute, just re-use it */
  329. if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
  330. return uri_get ( relative_uri );
  331. /* Mangle URI */
  332. memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
  333. if ( relative_uri->path ) {
  334. tmp_path = resolve_path ( ( base_uri->path ?
  335. base_uri->path : "/" ),
  336. relative_uri->path );
  337. tmp_uri.path = tmp_path;
  338. tmp_uri.query = relative_uri->query;
  339. tmp_uri.fragment = relative_uri->fragment;
  340. } else if ( relative_uri->query ) {
  341. tmp_uri.query = relative_uri->query;
  342. tmp_uri.fragment = relative_uri->fragment;
  343. } else if ( relative_uri->fragment ) {
  344. tmp_uri.fragment = relative_uri->fragment;
  345. }
  346. /* Create demangled URI */
  347. new_uri = uri_dup ( &tmp_uri );
  348. free ( tmp_path );
  349. return new_uri;
  350. }
  351. /**
  352. * Test for unreserved URI characters
  353. *
  354. * @v c Character to test
  355. * @ret is_unreserved Character is an unreserved character
  356. */
  357. static int is_unreserved_uri_char ( int c ) {
  358. /* According to RFC3986, the unreserved character set is
  359. *
  360. * A-Z a-z 0-9 - _ . ~
  361. */
  362. return ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
  363. ( c == '-' ) || ( c == '_' ) ||
  364. ( c == '.' ) || ( c == '~' ) );
  365. }
  366. /**
  367. * URI-encode string
  368. *
  369. * @v raw_string String to be URI-encoded
  370. * @v buf Buffer to contain encoded string
  371. * @v len Length of buffer
  372. * @ret len Length of encoded string (excluding NUL)
  373. */
  374. size_t uri_encode ( const char *raw_string, char *buf, size_t len ) {
  375. ssize_t remaining = len;
  376. size_t used;
  377. unsigned char c;
  378. if ( len )
  379. buf[0] = '\0';
  380. while ( ( c = *(raw_string++) ) ) {
  381. if ( is_unreserved_uri_char ( c ) ) {
  382. used = ssnprintf ( buf, remaining, "%c", c );
  383. } else {
  384. used = ssnprintf ( buf, remaining, "%%%02X", c );
  385. }
  386. buf += used;
  387. remaining -= used;
  388. }
  389. return ( len - remaining );
  390. }
  391. /**
  392. * Decode URI-encoded string
  393. *
  394. * @v encoded_string URI-encoded string
  395. * @v buf Buffer to contain decoded string
  396. * @v len Length of buffer
  397. * @ret len Length of decoded string (excluding NUL)
  398. */
  399. size_t uri_decode ( const char *encoded_string, char *buf, size_t len ) {
  400. ssize_t remaining = len;
  401. char hexbuf[3];
  402. char *hexbuf_end;
  403. unsigned char c;
  404. if ( len )
  405. buf[0] = '\0';
  406. while ( *encoded_string ) {
  407. if ( *encoded_string == '%' ) {
  408. encoded_string++;
  409. snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
  410. encoded_string );
  411. c = strtoul ( hexbuf, &hexbuf_end, 16 );
  412. encoded_string += ( hexbuf_end - hexbuf );
  413. } else {
  414. c = *(encoded_string++);
  415. }
  416. ssnprintf ( buf++, remaining--, "%c", c );
  417. }
  418. return ( len - remaining );
  419. }