選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

ipv4.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. #include <string.h>
  2. #include <stdint.h>
  3. #include <errno.h>
  4. #include <byteswap.h>
  5. #include <malloc.h>
  6. #include <vsprintf.h>
  7. #include <gpxe/list.h>
  8. #include <gpxe/in.h>
  9. #include <gpxe/arp.h>
  10. #include <gpxe/if_ether.h>
  11. #include <gpxe/pkbuff.h>
  12. #include <gpxe/netdevice.h>
  13. #include <gpxe/ip.h>
  14. #include <gpxe/tcpip.h>
  15. /** @file
  16. *
  17. * IPv4 protocol
  18. *
  19. */
  20. /* Unique IP datagram identification number */
  21. static uint16_t next_ident = 0;
  22. struct net_protocol ipv4_protocol;
  23. /** List of IPv4 miniroutes */
  24. struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
  25. /** List of fragment reassembly buffers */
  26. static LIST_HEAD ( frag_buffers );
  27. static void ipv4_forget_netdev ( struct reference *ref );
  28. /**
  29. * Add IPv4 minirouting table entry
  30. *
  31. * @v netdev Network device
  32. * @v address IPv4 address
  33. * @v netmask Subnet mask
  34. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  35. * @ret miniroute Routing table entry, or NULL
  36. */
  37. static struct ipv4_miniroute * add_ipv4_miniroute ( struct net_device *netdev,
  38. struct in_addr address,
  39. struct in_addr netmask,
  40. struct in_addr gateway ) {
  41. struct ipv4_miniroute *miniroute;
  42. /* Allocate and populate miniroute structure */
  43. miniroute = malloc ( sizeof ( *miniroute ) );
  44. if ( miniroute ) {
  45. DBG ( "IPv4 add %s", inet_ntoa ( address ) );
  46. DBG ( "/%s ", inet_ntoa ( netmask ) );
  47. if ( gateway.s_addr != INADDR_NONE )
  48. DBG ( "gw %s ", inet_ntoa ( gateway ) );
  49. DBG ( "via %s\n", netdev->name );
  50. /* Record routing information */
  51. miniroute->netdev = netdev;
  52. miniroute->address = address;
  53. miniroute->netmask = netmask;
  54. miniroute->gateway = gateway;
  55. /* Add to end of list if we have a gateway, otherwise
  56. * to start of list.
  57. */
  58. if ( gateway.s_addr != INADDR_NONE ) {
  59. list_add_tail ( &miniroute->list, &ipv4_miniroutes );
  60. } else {
  61. list_add ( &miniroute->list, &ipv4_miniroutes );
  62. }
  63. /* Record reference to net_device */
  64. miniroute->netdev_ref.forget = ipv4_forget_netdev;
  65. ref_add ( &miniroute->netdev_ref, &netdev->references );
  66. }
  67. return miniroute;
  68. }
  69. /**
  70. * Delete IPv4 minirouting table entry
  71. *
  72. * @v miniroute Routing table entry
  73. */
  74. static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
  75. DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
  76. DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
  77. if ( miniroute->gateway.s_addr != INADDR_NONE )
  78. DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
  79. DBG ( "via %s\n", miniroute->netdev->name );
  80. ref_del ( &miniroute->netdev_ref );
  81. list_del ( &miniroute->list );
  82. free ( miniroute );
  83. }
  84. /**
  85. * Forget reference to net_device
  86. *
  87. * @v ref Persistent reference
  88. */
  89. static void ipv4_forget_netdev ( struct reference *ref ) {
  90. struct ipv4_miniroute *miniroute
  91. = container_of ( ref, struct ipv4_miniroute, netdev_ref );
  92. del_ipv4_miniroute ( miniroute );
  93. }
  94. /**
  95. * Add IPv4 interface
  96. *
  97. * @v netdev Network device
  98. * @v address IPv4 address
  99. * @v netmask Subnet mask
  100. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  101. * @ret rc Return status code
  102. *
  103. */
  104. int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
  105. struct in_addr netmask, struct in_addr gateway ) {
  106. struct ipv4_miniroute *miniroute;
  107. /* Clear any existing address for this net device */
  108. del_ipv4_address ( netdev );
  109. /* Add new miniroute */
  110. miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
  111. if ( ! miniroute )
  112. return -ENOMEM;
  113. return 0;
  114. }
  115. /**
  116. * Remove IPv4 interface
  117. *
  118. * @v netdev Network device
  119. */
  120. void del_ipv4_address ( struct net_device *netdev ) {
  121. struct ipv4_miniroute *miniroute;
  122. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  123. if ( miniroute->netdev == netdev ) {
  124. del_ipv4_miniroute ( miniroute );
  125. break;
  126. }
  127. }
  128. }
  129. /**
  130. * Perform IPv4 routing
  131. *
  132. * @v dest Final destination address
  133. * @ret dest Next hop destination address
  134. * @ret miniroute Routing table entry to use, or NULL if no route
  135. */
  136. static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
  137. struct ipv4_miniroute *miniroute;
  138. int local;
  139. int has_gw;
  140. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  141. local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
  142. & miniroute->netmask.s_addr ) == 0 );
  143. has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
  144. if ( local || has_gw ) {
  145. if ( ! local )
  146. *dest = miniroute->gateway;
  147. return miniroute;
  148. }
  149. }
  150. return NULL;
  151. }
  152. /**
  153. * Fragment reassembly counter timeout
  154. *
  155. * @v timer Retry timer
  156. * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
  157. */
  158. static void ipv4_frag_expired ( struct retry_timer *timer __unused,
  159. int over ) {
  160. if ( over ) {
  161. DBG ( "Fragment reassembly timeout" );
  162. /* Free the fragment buffer */
  163. }
  164. }
  165. /**
  166. * Free fragment buffer
  167. *
  168. * @v fragbug Fragment buffer
  169. */
  170. static void free_fragbuf ( struct frag_buffer *fragbuf ) {
  171. if ( fragbuf ) {
  172. free_dma ( fragbuf, sizeof ( *fragbuf ) );
  173. }
  174. }
  175. /**
  176. * Fragment reassembler
  177. *
  178. * @v pkb Packet buffer, fragment of the datagram
  179. * @ret frag_pkb Reassembled packet, or NULL
  180. */
  181. static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
  182. struct iphdr *iphdr = pkb->data;
  183. struct frag_buffer *fragbuf;
  184. /**
  185. * Check if the fragment belongs to any fragment series
  186. */
  187. list_for_each_entry ( fragbuf, &frag_buffers, list ) {
  188. if ( fragbuf->ident == iphdr->ident &&
  189. fragbuf->src.s_addr == iphdr->src.s_addr ) {
  190. /**
  191. * Check if the packet is the expected fragment
  192. *
  193. * The offset of the new packet must be equal to the
  194. * length of the data accumulated so far (the length of
  195. * the reassembled packet buffer
  196. */
  197. if ( pkb_len ( fragbuf->frag_pkb ) ==
  198. ( iphdr->frags & IP_MASK_OFFSET ) ) {
  199. /**
  200. * Append the contents of the fragment to the
  201. * reassembled packet buffer
  202. */
  203. pkb_pull ( pkb, sizeof ( *iphdr ) );
  204. memcpy ( pkb_put ( fragbuf->frag_pkb,
  205. pkb_len ( pkb ) ),
  206. pkb->data, pkb_len ( pkb ) );
  207. free_pkb ( pkb );
  208. /** Check if the fragment series is over */
  209. if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
  210. pkb = fragbuf->frag_pkb;
  211. free_fragbuf ( fragbuf );
  212. return pkb;
  213. }
  214. } else {
  215. /* Discard the fragment series */
  216. free_fragbuf ( fragbuf );
  217. free_pkb ( pkb );
  218. }
  219. return NULL;
  220. }
  221. }
  222. /** Check if the fragment is the first in the fragment series */
  223. if ( iphdr->frags & IP_MASK_MOREFRAGS &&
  224. ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
  225. /** Create a new fragment buffer */
  226. fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
  227. fragbuf->ident = iphdr->ident;
  228. fragbuf->src = iphdr->src;
  229. /* Set up the reassembly packet buffer */
  230. fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
  231. pkb_pull ( pkb, sizeof ( *iphdr ) );
  232. memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
  233. pkb->data, pkb_len ( pkb ) );
  234. free_pkb ( pkb );
  235. /* Set the reassembly timer */
  236. fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
  237. fragbuf->frag_timer.expired = ipv4_frag_expired;
  238. start_timer ( &fragbuf->frag_timer );
  239. /* Add the fragment buffer to the list of fragment buffers */
  240. list_add ( &fragbuf->list, &frag_buffers );
  241. }
  242. return NULL;
  243. }
  244. /**
  245. * Add IPv4 pseudo-header checksum to existing checksum
  246. *
  247. * @v pkb Packet buffer
  248. * @v csum Existing checksum
  249. * @ret csum Updated checksum
  250. */
  251. static uint16_t ipv4_pshdr_chksum ( struct pk_buff *pkb, uint16_t csum ) {
  252. struct ipv4_pseudo_header pshdr;
  253. struct iphdr *iphdr = pkb->data;
  254. size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  255. /* Build pseudo-header */
  256. pshdr.src = iphdr->src;
  257. pshdr.dest = iphdr->dest;
  258. pshdr.zero_padding = 0x00;
  259. pshdr.protocol = iphdr->protocol;
  260. pshdr.len = htons ( pkb_len ( pkb ) - hdrlen );
  261. /* Update the checksum value */
  262. return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
  263. }
  264. /**
  265. * Determine link-layer address
  266. *
  267. * @v dest IPv4 destination address
  268. * @v src IPv4 source address
  269. * @v netdev Network device
  270. * @v ll_dest Link-layer destination address buffer
  271. * @ret rc Return status code
  272. */
  273. static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
  274. struct net_device *netdev, uint8_t *ll_dest ) {
  275. struct ll_protocol *ll_protocol = netdev->ll_protocol;
  276. uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
  277. if ( dest.s_addr == INADDR_BROADCAST ) {
  278. /* Broadcast address */
  279. memcpy ( ll_dest, ll_protocol->ll_broadcast,
  280. ll_protocol->ll_addr_len );
  281. return 0;
  282. } else if ( IN_MULTICAST ( dest.s_addr ) ) {
  283. /* Special case: IPv4 multicast over Ethernet. This
  284. * code may need to be generalised once we find out
  285. * what happens for other link layers.
  286. */
  287. ll_dest[0] = 0x01;
  288. ll_dest[1] = 0x00;
  289. ll_dest[2] = 0x5e;
  290. ll_dest[3] = dest_bytes[1] & 0x7f;
  291. ll_dest[4] = dest_bytes[2];
  292. ll_dest[5] = dest_bytes[3];
  293. return 0;
  294. } else {
  295. /* Unicast address: resolve via ARP */
  296. return arp_resolve ( netdev, &ipv4_protocol, &dest,
  297. &src, ll_dest );
  298. }
  299. }
  300. /**
  301. * Transmit IP packet
  302. *
  303. * @v pkb Packet buffer
  304. * @v tcpip Transport-layer protocol
  305. * @v st_dest Destination network-layer address
  306. * @v netdev Network device (or NULL to route automatically)
  307. * @v trans_csum Transport-layer checksum to complete, or NULL
  308. * @ret rc Status
  309. *
  310. * This function expects a transport-layer segment and prepends the IP header
  311. */
  312. static int ipv4_tx ( struct pk_buff *pkb,
  313. struct tcpip_protocol *tcpip_protocol,
  314. struct sockaddr_tcpip *st_dest,
  315. struct net_device *netdev,
  316. uint16_t *trans_csum ) {
  317. struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
  318. struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
  319. struct ipv4_miniroute *miniroute;
  320. struct in_addr next_hop;
  321. uint8_t ll_dest[MAX_LL_ADDR_LEN];
  322. int rc;
  323. /* Fill up the IP header, except source address */
  324. memset ( iphdr, 0, sizeof ( *iphdr ) );
  325. iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
  326. iphdr->service = IP_TOS;
  327. iphdr->len = htons ( pkb_len ( pkb ) );
  328. iphdr->ident = htons ( ++next_ident );
  329. iphdr->ttl = IP_TTL;
  330. iphdr->protocol = tcpip_protocol->tcpip_proto;
  331. iphdr->dest = sin_dest->sin_addr;
  332. /* Use routing table to identify next hop and transmitting netdev */
  333. next_hop = iphdr->dest;
  334. if ( ( miniroute = ipv4_route ( &next_hop ) ) ) {
  335. iphdr->src = miniroute->address;
  336. netdev = miniroute->netdev;
  337. }
  338. if ( ! netdev ) {
  339. DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
  340. rc = -EHOSTUNREACH;
  341. goto err;
  342. }
  343. /* Determine link-layer destination address */
  344. if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
  345. ll_dest ) ) != 0 ) {
  346. DBG ( "IPv4 has no link-layer address for %s\n",
  347. inet_ntoa ( iphdr->dest ) );
  348. goto err;
  349. }
  350. /* Fix up checksums */
  351. if ( trans_csum )
  352. *trans_csum = ipv4_pshdr_chksum ( pkb, *trans_csum );
  353. iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
  354. /* Print IP4 header for debugging */
  355. DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
  356. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  357. inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
  358. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  359. /* Hand off to link layer */
  360. return net_tx ( pkb, netdev, &ipv4_protocol, ll_dest );
  361. err:
  362. free_pkb ( pkb );
  363. return rc;
  364. }
  365. /**
  366. * Process incoming packets
  367. *
  368. * @v pkb Packet buffer
  369. * @v netdev Network device
  370. * @v ll_source Link-layer destination source
  371. *
  372. * This function expects an IP4 network datagram. It processes the headers
  373. * and sends it to the transport layer.
  374. */
  375. static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
  376. const void *ll_source __unused ) {
  377. struct iphdr *iphdr = pkb->data;
  378. size_t hdrlen;
  379. size_t len;
  380. union {
  381. struct sockaddr_in sin;
  382. struct sockaddr_tcpip st;
  383. } src, dest;
  384. uint16_t csum;
  385. uint16_t pshdr_csum;
  386. /* Sanity check the IPv4 header */
  387. if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
  388. DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
  389. pkb_len ( pkb ), sizeof ( *iphdr ) );
  390. goto err;
  391. }
  392. if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
  393. DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
  394. goto err;
  395. }
  396. hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  397. if ( hdrlen < sizeof ( *iphdr ) ) {
  398. DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
  399. hdrlen, sizeof ( *iphdr ) );
  400. goto err;
  401. }
  402. if ( hdrlen > pkb_len ( pkb ) ) {
  403. DBG ( "IPv4 header too long at %d bytes "
  404. "(packet is %d bytes)\n", hdrlen, pkb_len ( pkb ) );
  405. goto err;
  406. }
  407. if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
  408. DBG ( "IPv4 checksum incorrect (is %04x including checksum "
  409. "field, should be 0000)\n", csum );
  410. goto err;
  411. }
  412. len = ntohs ( iphdr->len );
  413. if ( len < hdrlen ) {
  414. DBG ( "IPv4 length too short at %d bytes "
  415. "(header is %d bytes)\n", len, hdrlen );
  416. goto err;
  417. }
  418. if ( len > pkb_len ( pkb ) ) {
  419. DBG ( "IPv4 length too long at %d bytes "
  420. "(packet is %d bytes)\n", len, pkb_len ( pkb ) );
  421. goto err;
  422. }
  423. /* Print IPv4 header for debugging */
  424. DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
  425. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  426. inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
  427. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  428. /* Truncate packet to correct length, calculate pseudo-header
  429. * checksum and then strip off the IPv4 header.
  430. */
  431. pkb_unput ( pkb, ( pkb_len ( pkb ) - len ) );
  432. pshdr_csum = ipv4_pshdr_chksum ( pkb, TCPIP_EMPTY_CSUM );
  433. pkb_pull ( pkb, hdrlen );
  434. /* Fragment reassembly */
  435. if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
  436. ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
  437. /* Pass the fragment to ipv4_reassemble() which either
  438. * returns a fully reassembled packet buffer or NULL.
  439. */
  440. pkb = ipv4_reassemble ( pkb );
  441. if ( ! pkb )
  442. return 0;
  443. }
  444. /* Construct socket addresses and hand off to transport layer */
  445. memset ( &src, 0, sizeof ( src ) );
  446. src.sin.sin_family = AF_INET;
  447. src.sin.sin_addr = iphdr->src;
  448. memset ( &dest, 0, sizeof ( dest ) );
  449. dest.sin.sin_family = AF_INET;
  450. dest.sin.sin_addr = iphdr->dest;
  451. return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st, pshdr_csum);
  452. err:
  453. free_pkb ( pkb );
  454. return -EINVAL;
  455. }
  456. /**
  457. * Check existence of IPv4 address for ARP
  458. *
  459. * @v netdev Network device
  460. * @v net_addr Network-layer address
  461. * @ret rc Return status code
  462. */
  463. static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
  464. const struct in_addr *address = net_addr;
  465. struct ipv4_miniroute *miniroute;
  466. list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
  467. if ( ( miniroute->netdev == netdev ) &&
  468. ( miniroute->address.s_addr == address->s_addr ) ) {
  469. /* Found matching address */
  470. return 0;
  471. }
  472. }
  473. return -ENOENT;
  474. }
  475. /**
  476. * Convert IPv4 address to dotted-quad notation
  477. *
  478. * @v in IP address
  479. * @ret string IP address in dotted-quad notation
  480. */
  481. char * inet_ntoa ( struct in_addr in ) {
  482. static char buf[16]; /* "xxx.xxx.xxx.xxx" */
  483. uint8_t *bytes = ( uint8_t * ) &in;
  484. sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
  485. return buf;
  486. }
  487. /**
  488. * Transcribe IP address
  489. *
  490. * @v net_addr IP address
  491. * @ret string IP address in dotted-quad notation
  492. *
  493. */
  494. static const char * ipv4_ntoa ( const void *net_addr ) {
  495. return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
  496. }
  497. /** IPv4 protocol */
  498. struct net_protocol ipv4_protocol __net_protocol = {
  499. .name = "IP",
  500. .net_proto = htons ( ETH_P_IP ),
  501. .net_addr_len = sizeof ( struct in_addr ),
  502. .rx = ipv4_rx,
  503. .ntoa = ipv4_ntoa,
  504. };
  505. /** IPv4 TCPIP net protocol */
  506. struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
  507. .name = "IPv4",
  508. .sa_family = AF_INET,
  509. .tx = ipv4_tx,
  510. };
  511. /** IPv4 ARP protocol */
  512. struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
  513. .net_protocol = &ipv4_protocol,
  514. .check = ipv4_arp_check,
  515. };