You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ipv4.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. #include <string.h>
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5. #include <errno.h>
  6. #include <byteswap.h>
  7. #include <gpxe/list.h>
  8. #include <gpxe/in.h>
  9. #include <gpxe/arp.h>
  10. #include <gpxe/if_ether.h>
  11. #include <gpxe/iobuf.h>
  12. #include <gpxe/netdevice.h>
  13. #include <gpxe/ip.h>
  14. #include <gpxe/tcpip.h>
  15. /** @file
  16. *
  17. * IPv4 protocol
  18. *
  19. */
  20. /* Unique IP datagram identification number */
  21. static uint16_t next_ident = 0;
  22. struct net_protocol ipv4_protocol;
  23. /** List of IPv4 miniroutes */
  24. struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
  25. /** List of fragment reassembly buffers */
  26. static LIST_HEAD ( frag_buffers );
  27. /**
  28. * Add IPv4 minirouting table entry
  29. *
  30. * @v netdev Network device
  31. * @v address IPv4 address
  32. * @v netmask Subnet mask
  33. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  34. * @ret miniroute Routing table entry, or NULL
  35. */
  36. static struct ipv4_miniroute * add_ipv4_miniroute ( struct net_device *netdev,
  37. struct in_addr address,
  38. struct in_addr netmask,
  39. struct in_addr gateway ) {
  40. struct ipv4_miniroute *miniroute;
  41. DBG ( "IPv4 add %s", inet_ntoa ( address ) );
  42. DBG ( "/%s ", inet_ntoa ( netmask ) );
  43. if ( gateway.s_addr != INADDR_NONE )
  44. DBG ( "gw %s ", inet_ntoa ( gateway ) );
  45. DBG ( "via %s\n", netdev->name );
  46. /* Allocate and populate miniroute structure */
  47. miniroute = malloc ( sizeof ( *miniroute ) );
  48. if ( ! miniroute ) {
  49. DBG ( "IPv4 could not add miniroute\n" );
  50. return NULL;
  51. }
  52. /* Record routing information */
  53. miniroute->netdev = netdev_get ( netdev );
  54. miniroute->address = address;
  55. miniroute->netmask = netmask;
  56. miniroute->gateway = gateway;
  57. /* Add to end of list if we have a gateway, otherwise
  58. * to start of list.
  59. */
  60. if ( gateway.s_addr != INADDR_NONE ) {
  61. list_add_tail ( &miniroute->list, &ipv4_miniroutes );
  62. } else {
  63. list_add ( &miniroute->list, &ipv4_miniroutes );
  64. }
  65. return miniroute;
  66. }
  67. /**
  68. * Delete IPv4 minirouting table entry
  69. *
  70. * @v miniroute Routing table entry
  71. */
  72. static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
  73. DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
  74. DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
  75. if ( miniroute->gateway.s_addr != INADDR_NONE )
  76. DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
  77. DBG ( "via %s\n", miniroute->netdev->name );
  78. netdev_put ( miniroute->netdev );
  79. list_del ( &miniroute->list );
  80. free ( miniroute );
  81. }
  82. /**
  83. * Add IPv4 interface
  84. *
  85. * @v netdev Network device
  86. * @v address IPv4 address
  87. * @v netmask Subnet mask
  88. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  89. * @ret rc Return status code
  90. *
  91. */
  92. int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
  93. struct in_addr netmask, struct in_addr gateway ) {
  94. struct ipv4_miniroute *miniroute;
  95. /* Clear any existing address for this net device */
  96. del_ipv4_address ( netdev );
  97. /* Add new miniroute */
  98. miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
  99. if ( ! miniroute )
  100. return -ENOMEM;
  101. return 0;
  102. }
  103. /**
  104. * Remove IPv4 interface
  105. *
  106. * @v netdev Network device
  107. */
  108. void del_ipv4_address ( struct net_device *netdev ) {
  109. struct ipv4_miniroute *miniroute;
  110. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  111. if ( miniroute->netdev == netdev ) {
  112. del_ipv4_miniroute ( miniroute );
  113. break;
  114. }
  115. }
  116. }
  117. /**
  118. * Perform IPv4 routing
  119. *
  120. * @v dest Final destination address
  121. * @ret dest Next hop destination address
  122. * @ret miniroute Routing table entry to use, or NULL if no route
  123. *
  124. * If the route requires use of a gateway, the next hop destination
  125. * address will be overwritten with the gateway address.
  126. */
  127. static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
  128. struct ipv4_miniroute *miniroute;
  129. int local;
  130. int has_gw;
  131. /* Never attempt to route the broadcast address */
  132. if ( dest->s_addr == INADDR_BROADCAST )
  133. return NULL;
  134. /* Find first usable route in routing table */
  135. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  136. local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
  137. & miniroute->netmask.s_addr ) == 0 );
  138. has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
  139. if ( local || has_gw ) {
  140. if ( ! local )
  141. *dest = miniroute->gateway;
  142. return miniroute;
  143. }
  144. }
  145. return NULL;
  146. }
  147. /**
  148. * Fragment reassembly counter timeout
  149. *
  150. * @v timer Retry timer
  151. * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
  152. */
  153. static void ipv4_frag_expired ( struct retry_timer *timer __unused,
  154. int over ) {
  155. if ( over ) {
  156. DBG ( "Fragment reassembly timeout" );
  157. /* Free the fragment buffer */
  158. }
  159. }
  160. /**
  161. * Free fragment buffer
  162. *
  163. * @v fragbug Fragment buffer
  164. */
  165. static void free_fragbuf ( struct frag_buffer *fragbuf ) {
  166. free ( fragbuf );
  167. }
  168. /**
  169. * Fragment reassembler
  170. *
  171. * @v iobuf I/O buffer, fragment of the datagram
  172. * @ret frag_iob Reassembled packet, or NULL
  173. */
  174. static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
  175. struct iphdr *iphdr = iobuf->data;
  176. struct frag_buffer *fragbuf;
  177. /**
  178. * Check if the fragment belongs to any fragment series
  179. */
  180. list_for_each_entry ( fragbuf, &frag_buffers, list ) {
  181. if ( fragbuf->ident == iphdr->ident &&
  182. fragbuf->src.s_addr == iphdr->src.s_addr ) {
  183. /**
  184. * Check if the packet is the expected fragment
  185. *
  186. * The offset of the new packet must be equal to the
  187. * length of the data accumulated so far (the length of
  188. * the reassembled I/O buffer
  189. */
  190. if ( iob_len ( fragbuf->frag_iob ) ==
  191. ( iphdr->frags & IP_MASK_OFFSET ) ) {
  192. /**
  193. * Append the contents of the fragment to the
  194. * reassembled I/O buffer
  195. */
  196. iob_pull ( iobuf, sizeof ( *iphdr ) );
  197. memcpy ( iob_put ( fragbuf->frag_iob,
  198. iob_len ( iobuf ) ),
  199. iobuf->data, iob_len ( iobuf ) );
  200. free_iob ( iobuf );
  201. /** Check if the fragment series is over */
  202. if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
  203. iobuf = fragbuf->frag_iob;
  204. free_fragbuf ( fragbuf );
  205. return iobuf;
  206. }
  207. } else {
  208. /* Discard the fragment series */
  209. free_fragbuf ( fragbuf );
  210. free_iob ( iobuf );
  211. }
  212. return NULL;
  213. }
  214. }
  215. /** Check if the fragment is the first in the fragment series */
  216. if ( iphdr->frags & IP_MASK_MOREFRAGS &&
  217. ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
  218. /** Create a new fragment buffer */
  219. fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
  220. fragbuf->ident = iphdr->ident;
  221. fragbuf->src = iphdr->src;
  222. /* Set up the reassembly I/O buffer */
  223. fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
  224. iob_pull ( iobuf, sizeof ( *iphdr ) );
  225. memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
  226. iobuf->data, iob_len ( iobuf ) );
  227. free_iob ( iobuf );
  228. /* Set the reassembly timer */
  229. fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
  230. fragbuf->frag_timer.expired = ipv4_frag_expired;
  231. start_timer ( &fragbuf->frag_timer );
  232. /* Add the fragment buffer to the list of fragment buffers */
  233. list_add ( &fragbuf->list, &frag_buffers );
  234. }
  235. return NULL;
  236. }
  237. /**
  238. * Add IPv4 pseudo-header checksum to existing checksum
  239. *
  240. * @v iobuf I/O buffer
  241. * @v csum Existing checksum
  242. * @ret csum Updated checksum
  243. */
  244. static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
  245. struct ipv4_pseudo_header pshdr;
  246. struct iphdr *iphdr = iobuf->data;
  247. size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  248. /* Build pseudo-header */
  249. pshdr.src = iphdr->src;
  250. pshdr.dest = iphdr->dest;
  251. pshdr.zero_padding = 0x00;
  252. pshdr.protocol = iphdr->protocol;
  253. pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
  254. /* Update the checksum value */
  255. return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
  256. }
  257. /**
  258. * Determine link-layer address
  259. *
  260. * @v dest IPv4 destination address
  261. * @v src IPv4 source address
  262. * @v netdev Network device
  263. * @v ll_dest Link-layer destination address buffer
  264. * @ret rc Return status code
  265. */
  266. static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
  267. struct net_device *netdev, uint8_t *ll_dest ) {
  268. struct ll_protocol *ll_protocol = netdev->ll_protocol;
  269. uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
  270. if ( dest.s_addr == INADDR_BROADCAST ) {
  271. /* Broadcast address */
  272. memcpy ( ll_dest, ll_protocol->ll_broadcast,
  273. ll_protocol->ll_addr_len );
  274. return 0;
  275. } else if ( IN_MULTICAST ( dest.s_addr ) ) {
  276. /* Special case: IPv4 multicast over Ethernet. This
  277. * code may need to be generalised once we find out
  278. * what happens for other link layers.
  279. */
  280. ll_dest[0] = 0x01;
  281. ll_dest[1] = 0x00;
  282. ll_dest[2] = 0x5e;
  283. ll_dest[3] = dest_bytes[1] & 0x7f;
  284. ll_dest[4] = dest_bytes[2];
  285. ll_dest[5] = dest_bytes[3];
  286. return 0;
  287. } else {
  288. /* Unicast address: resolve via ARP */
  289. return arp_resolve ( netdev, &ipv4_protocol, &dest,
  290. &src, ll_dest );
  291. }
  292. }
  293. /**
  294. * Transmit IP packet
  295. *
  296. * @v iobuf I/O buffer
  297. * @v tcpip Transport-layer protocol
  298. * @v st_dest Destination network-layer address
  299. * @v netdev Network device to use if no route found, or NULL
  300. * @v trans_csum Transport-layer checksum to complete, or NULL
  301. * @ret rc Status
  302. *
  303. * This function expects a transport-layer segment and prepends the IP header
  304. */
  305. static int ipv4_tx ( struct io_buffer *iobuf,
  306. struct tcpip_protocol *tcpip_protocol,
  307. struct sockaddr_tcpip *st_dest,
  308. struct net_device *netdev,
  309. uint16_t *trans_csum ) {
  310. struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
  311. struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
  312. struct ipv4_miniroute *miniroute;
  313. struct in_addr next_hop;
  314. uint8_t ll_dest[MAX_LL_ADDR_LEN];
  315. int rc;
  316. /* Fill up the IP header, except source address */
  317. memset ( iphdr, 0, sizeof ( *iphdr ) );
  318. iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
  319. iphdr->service = IP_TOS;
  320. iphdr->len = htons ( iob_len ( iobuf ) );
  321. iphdr->ident = htons ( ++next_ident );
  322. iphdr->ttl = IP_TTL;
  323. iphdr->protocol = tcpip_protocol->tcpip_proto;
  324. iphdr->dest = sin_dest->sin_addr;
  325. /* Use routing table to identify next hop and transmitting netdev */
  326. next_hop = iphdr->dest;
  327. if ( ( miniroute = ipv4_route ( &next_hop ) ) ) {
  328. iphdr->src = miniroute->address;
  329. netdev = miniroute->netdev;
  330. }
  331. if ( ! netdev ) {
  332. DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
  333. rc = -ENETUNREACH;
  334. goto err;
  335. }
  336. /* Determine link-layer destination address */
  337. if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
  338. ll_dest ) ) != 0 ) {
  339. DBG ( "IPv4 has no link-layer address for %s: %s\n",
  340. inet_ntoa ( next_hop ), strerror ( rc ) );
  341. goto err;
  342. }
  343. /* Fix up checksums */
  344. if ( trans_csum )
  345. *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
  346. iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
  347. /* Print IP4 header for debugging */
  348. DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
  349. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  350. inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
  351. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  352. /* Hand off to link layer */
  353. if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
  354. DBG ( "IPv4 could not transmit packet via %s: %s\n",
  355. netdev->name, strerror ( rc ) );
  356. return rc;
  357. }
  358. return 0;
  359. err:
  360. free_iob ( iobuf );
  361. return rc;
  362. }
  363. /**
  364. * Process incoming packets
  365. *
  366. * @v iobuf I/O buffer
  367. * @v netdev Network device
  368. * @v ll_source Link-layer destination source
  369. *
  370. * This function expects an IP4 network datagram. It processes the headers
  371. * and sends it to the transport layer.
  372. */
  373. static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
  374. const void *ll_source __unused ) {
  375. struct iphdr *iphdr = iobuf->data;
  376. size_t hdrlen;
  377. size_t len;
  378. union {
  379. struct sockaddr_in sin;
  380. struct sockaddr_tcpip st;
  381. } src, dest;
  382. uint16_t csum;
  383. uint16_t pshdr_csum;
  384. int rc;
  385. /* Sanity check the IPv4 header */
  386. if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
  387. DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
  388. iob_len ( iobuf ), sizeof ( *iphdr ) );
  389. goto err;
  390. }
  391. if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
  392. DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
  393. goto err;
  394. }
  395. hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  396. if ( hdrlen < sizeof ( *iphdr ) ) {
  397. DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
  398. hdrlen, sizeof ( *iphdr ) );
  399. goto err;
  400. }
  401. if ( hdrlen > iob_len ( iobuf ) ) {
  402. DBG ( "IPv4 header too long at %d bytes "
  403. "(packet is %d bytes)\n", hdrlen, iob_len ( iobuf ) );
  404. goto err;
  405. }
  406. if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
  407. DBG ( "IPv4 checksum incorrect (is %04x including checksum "
  408. "field, should be 0000)\n", csum );
  409. goto err;
  410. }
  411. len = ntohs ( iphdr->len );
  412. if ( len < hdrlen ) {
  413. DBG ( "IPv4 length too short at %d bytes "
  414. "(header is %d bytes)\n", len, hdrlen );
  415. goto err;
  416. }
  417. if ( len > iob_len ( iobuf ) ) {
  418. DBG ( "IPv4 length too long at %d bytes "
  419. "(packet is %d bytes)\n", len, iob_len ( iobuf ) );
  420. goto err;
  421. }
  422. /* Print IPv4 header for debugging */
  423. DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
  424. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  425. inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
  426. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  427. /* Truncate packet to correct length, calculate pseudo-header
  428. * checksum and then strip off the IPv4 header.
  429. */
  430. iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
  431. pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
  432. iob_pull ( iobuf, hdrlen );
  433. /* Fragment reassembly */
  434. if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
  435. ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
  436. /* Pass the fragment to ipv4_reassemble() which either
  437. * returns a fully reassembled I/O buffer or NULL.
  438. */
  439. iobuf = ipv4_reassemble ( iobuf );
  440. if ( ! iobuf )
  441. return 0;
  442. }
  443. /* Construct socket addresses and hand off to transport layer */
  444. memset ( &src, 0, sizeof ( src ) );
  445. src.sin.sin_family = AF_INET;
  446. src.sin.sin_addr = iphdr->src;
  447. memset ( &dest, 0, sizeof ( dest ) );
  448. dest.sin.sin_family = AF_INET;
  449. dest.sin.sin_addr = iphdr->dest;
  450. if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
  451. &dest.st, pshdr_csum ) ) != 0 ) {
  452. DBG ( "IPv4 received packet rejected by stack: %s\n",
  453. strerror ( rc ) );
  454. return rc;
  455. }
  456. return 0;
  457. err:
  458. free_iob ( iobuf );
  459. return -EINVAL;
  460. }
  461. /**
  462. * Check existence of IPv4 address for ARP
  463. *
  464. * @v netdev Network device
  465. * @v net_addr Network-layer address
  466. * @ret rc Return status code
  467. */
  468. static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
  469. const struct in_addr *address = net_addr;
  470. struct ipv4_miniroute *miniroute;
  471. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  472. if ( ( miniroute->netdev == netdev ) &&
  473. ( miniroute->address.s_addr == address->s_addr ) ) {
  474. /* Found matching address */
  475. return 0;
  476. }
  477. }
  478. return -ENOENT;
  479. }
  480. /**
  481. * Convert IPv4 address to dotted-quad notation
  482. *
  483. * @v in IP address
  484. * @ret string IP address in dotted-quad notation
  485. */
  486. char * inet_ntoa ( struct in_addr in ) {
  487. static char buf[16]; /* "xxx.xxx.xxx.xxx" */
  488. uint8_t *bytes = ( uint8_t * ) &in;
  489. sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
  490. return buf;
  491. }
  492. /**
  493. * Transcribe IP address
  494. *
  495. * @v net_addr IP address
  496. * @ret string IP address in dotted-quad notation
  497. *
  498. */
  499. static const char * ipv4_ntoa ( const void *net_addr ) {
  500. return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
  501. }
  502. /** IPv4 protocol */
  503. struct net_protocol ipv4_protocol __net_protocol = {
  504. .name = "IP",
  505. .net_proto = htons ( ETH_P_IP ),
  506. .net_addr_len = sizeof ( struct in_addr ),
  507. .rx = ipv4_rx,
  508. .ntoa = ipv4_ntoa,
  509. };
  510. /** IPv4 TCPIP net protocol */
  511. struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
  512. .name = "IPv4",
  513. .sa_family = AF_INET,
  514. .tx = ipv4_tx,
  515. };
  516. /** IPv4 ARP protocol */
  517. struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
  518. .net_protocol = &ipv4_protocol,
  519. .check = ipv4_arp_check,
  520. };