You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. #include <string.h>
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5. #include <errno.h>
  6. #include <byteswap.h>
  7. #include <gpxe/list.h>
  8. #include <gpxe/in.h>
  9. #include <gpxe/arp.h>
  10. #include <gpxe/if_ether.h>
  11. #include <gpxe/iobuf.h>
  12. #include <gpxe/netdevice.h>
  13. #include <gpxe/ip.h>
  14. #include <gpxe/tcpip.h>
  15. /** @file
  16. *
  17. * IPv4 protocol
  18. *
  19. */
  20. /* Unique IP datagram identification number */
  21. static uint16_t next_ident = 0;
  22. struct net_protocol ipv4_protocol;
  23. /** List of IPv4 miniroutes */
  24. struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
  25. /** List of fragment reassembly buffers */
  26. static LIST_HEAD ( frag_buffers );
  27. /**
  28. * Add IPv4 minirouting table entry
  29. *
  30. * @v netdev Network device
  31. * @v address IPv4 address
  32. * @v netmask Subnet mask
  33. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  34. * @ret miniroute Routing table entry, or NULL
  35. */
  36. static struct ipv4_miniroute * __malloc
  37. add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
  38. struct in_addr netmask, struct in_addr gateway ) {
  39. struct ipv4_miniroute *miniroute;
  40. DBG ( "IPv4 add %s", inet_ntoa ( address ) );
  41. DBG ( "/%s ", inet_ntoa ( netmask ) );
  42. if ( gateway.s_addr != INADDR_NONE )
  43. DBG ( "gw %s ", inet_ntoa ( gateway ) );
  44. DBG ( "via %s\n", netdev->name );
  45. /* Allocate and populate miniroute structure */
  46. miniroute = malloc ( sizeof ( *miniroute ) );
  47. if ( ! miniroute ) {
  48. DBG ( "IPv4 could not add miniroute\n" );
  49. return NULL;
  50. }
  51. /* Record routing information */
  52. miniroute->netdev = netdev_get ( netdev );
  53. miniroute->address = address;
  54. miniroute->netmask = netmask;
  55. miniroute->gateway = gateway;
  56. /* Add to end of list if we have a gateway, otherwise
  57. * to start of list.
  58. */
  59. if ( gateway.s_addr != INADDR_NONE ) {
  60. list_add_tail ( &miniroute->list, &ipv4_miniroutes );
  61. } else {
  62. list_add ( &miniroute->list, &ipv4_miniroutes );
  63. }
  64. return miniroute;
  65. }
  66. /**
  67. * Delete IPv4 minirouting table entry
  68. *
  69. * @v miniroute Routing table entry
  70. */
  71. static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
  72. DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
  73. DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
  74. if ( miniroute->gateway.s_addr != INADDR_NONE )
  75. DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
  76. DBG ( "via %s\n", miniroute->netdev->name );
  77. netdev_put ( miniroute->netdev );
  78. list_del ( &miniroute->list );
  79. free ( miniroute );
  80. }
  81. /**
  82. * Add IPv4 interface
  83. *
  84. * @v netdev Network device
  85. * @v address IPv4 address
  86. * @v netmask Subnet mask
  87. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  88. * @ret rc Return status code
  89. *
  90. */
  91. int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
  92. struct in_addr netmask, struct in_addr gateway ) {
  93. struct ipv4_miniroute *miniroute;
  94. /* Clear any existing address for this net device */
  95. del_ipv4_address ( netdev );
  96. /* Add new miniroute */
  97. miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
  98. if ( ! miniroute )
  99. return -ENOMEM;
  100. return 0;
  101. }
  102. /**
  103. * Remove IPv4 interface
  104. *
  105. * @v netdev Network device
  106. */
  107. void del_ipv4_address ( struct net_device *netdev ) {
  108. struct ipv4_miniroute *miniroute;
  109. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  110. if ( miniroute->netdev == netdev ) {
  111. del_ipv4_miniroute ( miniroute );
  112. break;
  113. }
  114. }
  115. }
  116. /**
  117. * Perform IPv4 routing
  118. *
  119. * @v dest Final destination address
  120. * @ret dest Next hop destination address
  121. * @ret miniroute Routing table entry to use, or NULL if no route
  122. *
  123. * If the route requires use of a gateway, the next hop destination
  124. * address will be overwritten with the gateway address.
  125. */
  126. static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
  127. struct ipv4_miniroute *miniroute;
  128. int local;
  129. int has_gw;
  130. /* Never attempt to route the broadcast address */
  131. if ( dest->s_addr == INADDR_BROADCAST )
  132. return NULL;
  133. /* Find first usable route in routing table */
  134. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  135. local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
  136. & miniroute->netmask.s_addr ) == 0 );
  137. has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
  138. if ( local || has_gw ) {
  139. if ( ! local )
  140. *dest = miniroute->gateway;
  141. return miniroute;
  142. }
  143. }
  144. return NULL;
  145. }
  146. /**
  147. * Fragment reassembly counter timeout
  148. *
  149. * @v timer Retry timer
  150. * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
  151. */
  152. static void ipv4_frag_expired ( struct retry_timer *timer __unused,
  153. int over ) {
  154. if ( over ) {
  155. DBG ( "Fragment reassembly timeout" );
  156. /* Free the fragment buffer */
  157. }
  158. }
  159. /**
  160. * Free fragment buffer
  161. *
  162. * @v fragbug Fragment buffer
  163. */
  164. static void free_fragbuf ( struct frag_buffer *fragbuf ) {
  165. free ( fragbuf );
  166. }
  167. /**
  168. * Fragment reassembler
  169. *
  170. * @v iobuf I/O buffer, fragment of the datagram
  171. * @ret frag_iob Reassembled packet, or NULL
  172. */
  173. static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
  174. struct iphdr *iphdr = iobuf->data;
  175. struct frag_buffer *fragbuf;
  176. /**
  177. * Check if the fragment belongs to any fragment series
  178. */
  179. list_for_each_entry ( fragbuf, &frag_buffers, list ) {
  180. if ( fragbuf->ident == iphdr->ident &&
  181. fragbuf->src.s_addr == iphdr->src.s_addr ) {
  182. /**
  183. * Check if the packet is the expected fragment
  184. *
  185. * The offset of the new packet must be equal to the
  186. * length of the data accumulated so far (the length of
  187. * the reassembled I/O buffer
  188. */
  189. if ( iob_len ( fragbuf->frag_iob ) ==
  190. ( iphdr->frags & IP_MASK_OFFSET ) ) {
  191. /**
  192. * Append the contents of the fragment to the
  193. * reassembled I/O buffer
  194. */
  195. iob_pull ( iobuf, sizeof ( *iphdr ) );
  196. memcpy ( iob_put ( fragbuf->frag_iob,
  197. iob_len ( iobuf ) ),
  198. iobuf->data, iob_len ( iobuf ) );
  199. free_iob ( iobuf );
  200. /** Check if the fragment series is over */
  201. if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
  202. iobuf = fragbuf->frag_iob;
  203. free_fragbuf ( fragbuf );
  204. return iobuf;
  205. }
  206. } else {
  207. /* Discard the fragment series */
  208. free_fragbuf ( fragbuf );
  209. free_iob ( iobuf );
  210. }
  211. return NULL;
  212. }
  213. }
  214. /** Check if the fragment is the first in the fragment series */
  215. if ( iphdr->frags & IP_MASK_MOREFRAGS &&
  216. ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
  217. /** Create a new fragment buffer */
  218. fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
  219. fragbuf->ident = iphdr->ident;
  220. fragbuf->src = iphdr->src;
  221. /* Set up the reassembly I/O buffer */
  222. fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
  223. iob_pull ( iobuf, sizeof ( *iphdr ) );
  224. memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
  225. iobuf->data, iob_len ( iobuf ) );
  226. free_iob ( iobuf );
  227. /* Set the reassembly timer */
  228. fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
  229. fragbuf->frag_timer.expired = ipv4_frag_expired;
  230. start_timer ( &fragbuf->frag_timer );
  231. /* Add the fragment buffer to the list of fragment buffers */
  232. list_add ( &fragbuf->list, &frag_buffers );
  233. }
  234. return NULL;
  235. }
  236. /**
  237. * Add IPv4 pseudo-header checksum to existing checksum
  238. *
  239. * @v iobuf I/O buffer
  240. * @v csum Existing checksum
  241. * @ret csum Updated checksum
  242. */
  243. static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
  244. struct ipv4_pseudo_header pshdr;
  245. struct iphdr *iphdr = iobuf->data;
  246. size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  247. /* Build pseudo-header */
  248. pshdr.src = iphdr->src;
  249. pshdr.dest = iphdr->dest;
  250. pshdr.zero_padding = 0x00;
  251. pshdr.protocol = iphdr->protocol;
  252. pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
  253. /* Update the checksum value */
  254. return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
  255. }
  256. /**
  257. * Determine link-layer address
  258. *
  259. * @v dest IPv4 destination address
  260. * @v src IPv4 source address
  261. * @v netdev Network device
  262. * @v ll_dest Link-layer destination address buffer
  263. * @ret rc Return status code
  264. */
  265. static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
  266. struct net_device *netdev, uint8_t *ll_dest ) {
  267. struct ll_protocol *ll_protocol = netdev->ll_protocol;
  268. uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
  269. if ( dest.s_addr == INADDR_BROADCAST ) {
  270. /* Broadcast address */
  271. memcpy ( ll_dest, ll_protocol->ll_broadcast,
  272. ll_protocol->ll_addr_len );
  273. return 0;
  274. } else if ( IN_MULTICAST ( dest.s_addr ) ) {
  275. /* Special case: IPv4 multicast over Ethernet. This
  276. * code may need to be generalised once we find out
  277. * what happens for other link layers.
  278. */
  279. ll_dest[0] = 0x01;
  280. ll_dest[1] = 0x00;
  281. ll_dest[2] = 0x5e;
  282. ll_dest[3] = dest_bytes[1] & 0x7f;
  283. ll_dest[4] = dest_bytes[2];
  284. ll_dest[5] = dest_bytes[3];
  285. return 0;
  286. } else {
  287. /* Unicast address: resolve via ARP */
  288. return arp_resolve ( netdev, &ipv4_protocol, &dest,
  289. &src, ll_dest );
  290. }
  291. }
  292. /**
  293. * Transmit IP packet
  294. *
  295. * @v iobuf I/O buffer
  296. * @v tcpip Transport-layer protocol
  297. * @v st_dest Destination network-layer address
  298. * @v netdev Network device to use if no route found, or NULL
  299. * @v trans_csum Transport-layer checksum to complete, or NULL
  300. * @ret rc Status
  301. *
  302. * This function expects a transport-layer segment and prepends the IP header
  303. */
  304. static int ipv4_tx ( struct io_buffer *iobuf,
  305. struct tcpip_protocol *tcpip_protocol,
  306. struct sockaddr_tcpip *st_dest,
  307. struct net_device *netdev,
  308. uint16_t *trans_csum ) {
  309. struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
  310. struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
  311. struct ipv4_miniroute *miniroute;
  312. struct in_addr next_hop;
  313. uint8_t ll_dest[MAX_LL_ADDR_LEN];
  314. int rc;
  315. /* Fill up the IP header, except source address */
  316. memset ( iphdr, 0, sizeof ( *iphdr ) );
  317. iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
  318. iphdr->service = IP_TOS;
  319. iphdr->len = htons ( iob_len ( iobuf ) );
  320. iphdr->ident = htons ( ++next_ident );
  321. iphdr->ttl = IP_TTL;
  322. iphdr->protocol = tcpip_protocol->tcpip_proto;
  323. iphdr->dest = sin_dest->sin_addr;
  324. /* Use routing table to identify next hop and transmitting netdev */
  325. next_hop = iphdr->dest;
  326. if ( ( miniroute = ipv4_route ( &next_hop ) ) ) {
  327. iphdr->src = miniroute->address;
  328. netdev = miniroute->netdev;
  329. }
  330. if ( ! netdev ) {
  331. DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
  332. rc = -ENETUNREACH;
  333. goto err;
  334. }
  335. /* Determine link-layer destination address */
  336. if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
  337. ll_dest ) ) != 0 ) {
  338. DBG ( "IPv4 has no link-layer address for %s: %s\n",
  339. inet_ntoa ( next_hop ), strerror ( rc ) );
  340. goto err;
  341. }
  342. /* Fix up checksums */
  343. if ( trans_csum )
  344. *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
  345. iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
  346. /* Print IP4 header for debugging */
  347. DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
  348. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  349. inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
  350. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  351. /* Hand off to link layer */
  352. if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
  353. DBG ( "IPv4 could not transmit packet via %s: %s\n",
  354. netdev->name, strerror ( rc ) );
  355. return rc;
  356. }
  357. return 0;
  358. err:
  359. free_iob ( iobuf );
  360. return rc;
  361. }
  362. /**
  363. * Process incoming packets
  364. *
  365. * @v iobuf I/O buffer
  366. * @v netdev Network device
  367. * @v ll_source Link-layer destination source
  368. *
  369. * This function expects an IP4 network datagram. It processes the headers
  370. * and sends it to the transport layer.
  371. */
  372. static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
  373. const void *ll_source __unused ) {
  374. struct iphdr *iphdr = iobuf->data;
  375. size_t hdrlen;
  376. size_t len;
  377. union {
  378. struct sockaddr_in sin;
  379. struct sockaddr_tcpip st;
  380. } src, dest;
  381. uint16_t csum;
  382. uint16_t pshdr_csum;
  383. int rc;
  384. /* Sanity check the IPv4 header */
  385. if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
  386. DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
  387. iob_len ( iobuf ), sizeof ( *iphdr ) );
  388. goto err;
  389. }
  390. if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
  391. DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
  392. goto err;
  393. }
  394. hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  395. if ( hdrlen < sizeof ( *iphdr ) ) {
  396. DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
  397. hdrlen, sizeof ( *iphdr ) );
  398. goto err;
  399. }
  400. if ( hdrlen > iob_len ( iobuf ) ) {
  401. DBG ( "IPv4 header too long at %zd bytes "
  402. "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
  403. goto err;
  404. }
  405. if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
  406. DBG ( "IPv4 checksum incorrect (is %04x including checksum "
  407. "field, should be 0000)\n", csum );
  408. goto err;
  409. }
  410. len = ntohs ( iphdr->len );
  411. if ( len < hdrlen ) {
  412. DBG ( "IPv4 length too short at %zd bytes "
  413. "(header is %zd bytes)\n", len, hdrlen );
  414. goto err;
  415. }
  416. if ( len > iob_len ( iobuf ) ) {
  417. DBG ( "IPv4 length too long at %zd bytes "
  418. "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
  419. goto err;
  420. }
  421. /* Print IPv4 header for debugging */
  422. DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
  423. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  424. inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
  425. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  426. /* Truncate packet to correct length, calculate pseudo-header
  427. * checksum and then strip off the IPv4 header.
  428. */
  429. iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
  430. pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
  431. iob_pull ( iobuf, hdrlen );
  432. /* Fragment reassembly */
  433. if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
  434. ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
  435. /* Pass the fragment to ipv4_reassemble() which either
  436. * returns a fully reassembled I/O buffer or NULL.
  437. */
  438. iobuf = ipv4_reassemble ( iobuf );
  439. if ( ! iobuf )
  440. return 0;
  441. }
  442. /* Construct socket addresses and hand off to transport layer */
  443. memset ( &src, 0, sizeof ( src ) );
  444. src.sin.sin_family = AF_INET;
  445. src.sin.sin_addr = iphdr->src;
  446. memset ( &dest, 0, sizeof ( dest ) );
  447. dest.sin.sin_family = AF_INET;
  448. dest.sin.sin_addr = iphdr->dest;
  449. if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
  450. &dest.st, pshdr_csum ) ) != 0 ) {
  451. DBG ( "IPv4 received packet rejected by stack: %s\n",
  452. strerror ( rc ) );
  453. return rc;
  454. }
  455. return 0;
  456. err:
  457. free_iob ( iobuf );
  458. return -EINVAL;
  459. }
  460. /**
  461. * Check existence of IPv4 address for ARP
  462. *
  463. * @v netdev Network device
  464. * @v net_addr Network-layer address
  465. * @ret rc Return status code
  466. */
  467. static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
  468. const struct in_addr *address = net_addr;
  469. struct ipv4_miniroute *miniroute;
  470. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  471. if ( ( miniroute->netdev == netdev ) &&
  472. ( miniroute->address.s_addr == address->s_addr ) ) {
  473. /* Found matching address */
  474. return 0;
  475. }
  476. }
  477. return -ENOENT;
  478. }
  479. /**
  480. * Convert IPv4 address to dotted-quad notation
  481. *
  482. * @v in IP address
  483. * @ret string IP address in dotted-quad notation
  484. */
  485. char * inet_ntoa ( struct in_addr in ) {
  486. static char buf[16]; /* "xxx.xxx.xxx.xxx" */
  487. uint8_t *bytes = ( uint8_t * ) &in;
  488. sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
  489. return buf;
  490. }
  491. /**
  492. * Transcribe IP address
  493. *
  494. * @v net_addr IP address
  495. * @ret string IP address in dotted-quad notation
  496. *
  497. */
  498. static const char * ipv4_ntoa ( const void *net_addr ) {
  499. return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
  500. }
  501. /** IPv4 protocol */
  502. struct net_protocol ipv4_protocol __net_protocol = {
  503. .name = "IP",
  504. .net_proto = htons ( ETH_P_IP ),
  505. .net_addr_len = sizeof ( struct in_addr ),
  506. .rx = ipv4_rx,
  507. .ntoa = ipv4_ntoa,
  508. };
  509. /** IPv4 TCPIP net protocol */
  510. struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
  511. .name = "IPv4",
  512. .sa_family = AF_INET,
  513. .tx = ipv4_tx,
  514. };
  515. /** IPv4 ARP protocol */
  516. struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
  517. .net_protocol = &ipv4_protocol,
  518. .check = ipv4_arp_check,
  519. };