ipv4.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. #include <string.h>
  2. #include <stdint.h>
  3. #include <errno.h>
  4. #include <byteswap.h>
  5. #include <malloc.h>
  6. #include <vsprintf.h>
  7. #include <gpxe/list.h>
  8. #include <gpxe/in.h>
  9. #include <gpxe/arp.h>
  10. #include <gpxe/if_ether.h>
  11. #include <gpxe/pkbuff.h>
  12. #include <gpxe/netdevice.h>
  13. #include "uip/uip.h"
  14. #include <gpxe/ip.h>
  15. #include <gpxe/tcpip.h>
  16. /** @file
  17. *
  18. * IPv4 protocol
  19. *
  20. * The gPXE IP stack is currently implemented on top of the uIP
  21. * protocol stack. This file provides wrappers around uIP so that
  22. * higher-level protocol implementations do not need to talk directly
  23. * to uIP (which has a somewhat baroque API).
  24. *
  25. */
  26. /* Unique IP datagram identification number */
  27. static uint16_t next_ident = 0;
  28. struct net_protocol ipv4_protocol;
  29. /** An IPv4 address/routing table entry */
  30. struct ipv4_miniroute {
  31. /** List of miniroutes */
  32. struct list_head list;
  33. /** Network device */
  34. struct net_device *netdev;
  35. /** IPv4 address */
  36. struct in_addr address;
  37. /** Subnet mask */
  38. struct in_addr netmask;
  39. /** Gateway address */
  40. struct in_addr gateway;
  41. };
  42. /** List of IPv4 miniroutes */
  43. static LIST_HEAD ( miniroutes );
  44. /** List of fragment reassembly buffers */
  45. static LIST_HEAD ( frag_buffers );
  46. /**
  47. * Add IPv4 interface
  48. *
  49. * @v netdev Network device
  50. * @v address IPv4 address
  51. * @v netmask Subnet mask
  52. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  53. * @ret rc Return status code
  54. *
  55. */
  56. int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
  57. struct in_addr netmask, struct in_addr gateway ) {
  58. struct ipv4_miniroute *miniroute;
  59. /* Allocate and populate miniroute structure */
  60. miniroute = malloc ( sizeof ( *miniroute ) );
  61. if ( ! miniroute )
  62. return -ENOMEM;
  63. miniroute->netdev = netdev;
  64. miniroute->address = address;
  65. miniroute->netmask = netmask;
  66. miniroute->gateway = gateway;
  67. /* Add to end of list if we have a gateway, otherwise to start
  68. * of list.
  69. */
  70. if ( gateway.s_addr != INADDR_NONE ) {
  71. list_add_tail ( &miniroute->list, &miniroutes );
  72. } else {
  73. list_add ( &miniroute->list, &miniroutes );
  74. }
  75. return 0;
  76. }
  77. /**
  78. * Remove IPv4 interface
  79. *
  80. * @v netdev Network device
  81. */
  82. void del_ipv4_address ( struct net_device *netdev ) {
  83. struct ipv4_miniroute *miniroute;
  84. list_for_each_entry ( miniroute, &miniroutes, list ) {
  85. if ( miniroute->netdev == netdev ) {
  86. list_del ( &miniroute->list );
  87. break;
  88. }
  89. }
  90. }
  91. /**
  92. * Perform IPv4 routing
  93. *
  94. * @v dest Final destination address
  95. * @ret dest Next hop destination address
  96. * @ret miniroute Routing table entry to use, or NULL if no route
  97. */
  98. static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
  99. struct ipv4_miniroute *miniroute;
  100. int local;
  101. int has_gw;
  102. list_for_each_entry ( miniroute, &miniroutes, list ) {
  103. local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
  104. & miniroute->netmask.s_addr ) == 0 );
  105. has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
  106. if ( local || has_gw ) {
  107. if ( ! local )
  108. *dest = miniroute->gateway;
  109. return miniroute;
  110. }
  111. }
  112. return NULL;
  113. }
  114. /**
  115. * Fragment reassembly counter timeout
  116. *
  117. * @v timer Retry timer
  118. * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
  119. */
  120. static void ipv4_frag_expired ( struct retry_timer *timer __unused,
  121. int over ) {
  122. if ( over ) {
  123. DBG ( "Fragment reassembly timeout" );
  124. /* Free the fragment buffer */
  125. }
  126. }
  127. /**
  128. * Free fragment buffer
  129. *
  130. * @v fragbug Fragment buffer
  131. */
  132. static void free_fragbuf ( struct frag_buffer *fragbuf ) {
  133. if ( fragbuf ) {
  134. free_dma ( fragbuf, sizeof ( *fragbuf ) );
  135. }
  136. }
  137. /**
  138. * Fragment reassembler
  139. *
  140. * @v pkb Packet buffer, fragment of the datagram
  141. * @ret frag_pkb Reassembled packet, or NULL
  142. */
  143. static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
  144. struct iphdr *iphdr = pkb->data;
  145. struct frag_buffer *fragbuf;
  146. /**
  147. * Check if the fragment belongs to any fragment series
  148. */
  149. list_for_each_entry ( fragbuf, &frag_buffers, list ) {
  150. if ( fragbuf->ident == iphdr->ident &&
  151. fragbuf->src.s_addr == iphdr->src.s_addr ) {
  152. /**
  153. * Check if the packet is the expected fragment
  154. *
  155. * The offset of the new packet must be equal to the
  156. * length of the data accumulated so far (the length of
  157. * the reassembled packet buffer
  158. */
  159. if ( pkb_len ( fragbuf->frag_pkb ) ==
  160. ( iphdr->frags & IP_MASK_OFFSET ) ) {
  161. /**
  162. * Append the contents of the fragment to the
  163. * reassembled packet buffer
  164. */
  165. pkb_pull ( pkb, sizeof ( *iphdr ) );
  166. memcpy ( pkb_put ( fragbuf->frag_pkb,
  167. pkb_len ( pkb ) ),
  168. pkb->data, pkb_len ( pkb ) );
  169. free_pkb ( pkb );
  170. /** Check if the fragment series is over */
  171. if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
  172. pkb = fragbuf->frag_pkb;
  173. free_fragbuf ( fragbuf );
  174. return pkb;
  175. }
  176. } else {
  177. /* Discard the fragment series */
  178. free_fragbuf ( fragbuf );
  179. free_pkb ( pkb );
  180. }
  181. return NULL;
  182. }
  183. }
  184. /** Check if the fragment is the first in the fragment series */
  185. if ( iphdr->frags & IP_MASK_MOREFRAGS &&
  186. ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
  187. /** Create a new fragment buffer */
  188. fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
  189. fragbuf->ident = iphdr->ident;
  190. fragbuf->src = iphdr->src;
  191. /* Set up the reassembly packet buffer */
  192. fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
  193. pkb_pull ( pkb, sizeof ( *iphdr ) );
  194. memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
  195. pkb->data, pkb_len ( pkb ) );
  196. free_pkb ( pkb );
  197. /* Set the reassembly timer */
  198. fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
  199. fragbuf->frag_timer.expired = ipv4_frag_expired;
  200. start_timer ( &fragbuf->frag_timer );
  201. /* Add the fragment buffer to the list of fragment buffers */
  202. list_add ( &fragbuf->list, &frag_buffers );
  203. }
  204. return NULL;
  205. }
  206. /**
  207. * Add IPv4 pseudo-header checksum to existing checksum
  208. *
  209. * @v pkb Packet buffer
  210. * @v csum Existing checksum
  211. * @ret csum Updated checksum
  212. */
  213. static uint16_t ipv4_pshdr_chksum ( struct pk_buff *pkb, uint16_t csum ) {
  214. struct ipv4_pseudo_header pshdr;
  215. struct iphdr *iphdr = pkb->data;
  216. size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  217. /* Build pseudo-header */
  218. pshdr.src = iphdr->src;
  219. pshdr.dest = iphdr->dest;
  220. pshdr.zero_padding = 0x00;
  221. pshdr.protocol = iphdr->protocol;
  222. pshdr.len = htons ( pkb_len ( pkb ) - hdrlen );
  223. /* Update the checksum value */
  224. return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
  225. }
  226. /**
  227. * Determine link-layer address
  228. *
  229. * @v dest IPv4 destination address
  230. * @v src IPv4 source address
  231. * @v netdev Network device
  232. * @v ll_dest Link-layer destination address buffer
  233. * @ret rc Return status code
  234. */
  235. static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
  236. struct net_device *netdev, uint8_t *ll_dest ) {
  237. struct ll_protocol *ll_protocol = netdev->ll_protocol;
  238. uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
  239. if ( dest.s_addr == INADDR_BROADCAST ) {
  240. /* Broadcast address */
  241. memcpy ( ll_dest, ll_protocol->ll_broadcast,
  242. ll_protocol->ll_addr_len );
  243. return 0;
  244. } else if ( IN_MULTICAST ( dest.s_addr ) ) {
  245. /* Special case: IPv4 multicast over Ethernet. This
  246. * code may need to be generalised once we find out
  247. * what happens for other link layers.
  248. */
  249. ll_dest[0] = 0x01;
  250. ll_dest[1] = 0x00;
  251. ll_dest[2] = 0x5e;
  252. ll_dest[3] = dest_bytes[1] & 0x7f;
  253. ll_dest[4] = dest_bytes[2];
  254. ll_dest[5] = dest_bytes[3];
  255. return 0;
  256. } else {
  257. /* Unicast address: resolve via ARP */
  258. return arp_resolve ( netdev, &ipv4_protocol, &dest,
  259. &src, ll_dest );
  260. }
  261. }
  262. /**
  263. * Transmit IP packet
  264. *
  265. * @v pkb Packet buffer
  266. * @v tcpip Transport-layer protocol
  267. * @v st_dest Destination network-layer address
  268. * @v trans_csum Transport-layer checksum to complete, or NULL
  269. * @ret rc Status
  270. *
  271. * This function expects a transport-layer segment and prepends the IP header
  272. */
  273. static int ipv4_tx ( struct pk_buff *pkb,
  274. struct tcpip_protocol *tcpip_protocol,
  275. struct sockaddr_tcpip *st_dest, uint16_t *trans_csum ) {
  276. struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
  277. struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
  278. struct ipv4_miniroute *miniroute;
  279. struct in_addr next_hop;
  280. uint8_t ll_dest[MAX_LL_ADDR_LEN];
  281. int rc;
  282. /* Fill up the IP header, except source address */
  283. iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
  284. iphdr->service = IP_TOS;
  285. iphdr->len = htons ( pkb_len ( pkb ) );
  286. iphdr->ident = htons ( ++next_ident );
  287. iphdr->frags = 0;
  288. iphdr->ttl = IP_TTL;
  289. iphdr->protocol = tcpip_protocol->tcpip_proto;
  290. iphdr->chksum = 0;
  291. iphdr->dest = sin_dest->sin_addr;
  292. /* Use routing table to identify next hop and transmitting netdev */
  293. next_hop = iphdr->dest;
  294. miniroute = ipv4_route ( &next_hop );
  295. if ( ! miniroute ) {
  296. DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
  297. rc = -EHOSTUNREACH;
  298. goto err;
  299. }
  300. iphdr->src = miniroute->address;
  301. /* Determine link-layer destination address */
  302. if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, miniroute->netdev,
  303. ll_dest ) ) != 0 ) {
  304. DBG ( "IPv4 has no link-layer address for %s\n",
  305. inet_ntoa ( iphdr->dest ) );
  306. goto err;
  307. }
  308. /* Fix up checksums */
  309. if ( trans_csum )
  310. *trans_csum = ipv4_pshdr_chksum ( pkb, *trans_csum );
  311. iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
  312. /* Print IP4 header for debugging */
  313. DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
  314. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  315. inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
  316. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  317. /* Hand off to link layer */
  318. return net_tx ( pkb, miniroute->netdev, &ipv4_protocol, ll_dest );
  319. err:
  320. free_pkb ( pkb );
  321. return rc;
  322. }
  323. /**
  324. * Process incoming packets
  325. *
  326. * @v pkb Packet buffer
  327. * @v netdev Network device
  328. * @v ll_source Link-layer destination source
  329. *
  330. * This function expects an IP4 network datagram. It processes the headers
  331. * and sends it to the transport layer.
  332. */
  333. static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
  334. const void *ll_source __unused ) {
  335. struct iphdr *iphdr = pkb->data;
  336. size_t hdrlen;
  337. size_t len;
  338. union {
  339. struct sockaddr_in sin;
  340. struct sockaddr_tcpip st;
  341. } src, dest;
  342. uint16_t csum;
  343. uint16_t pshdr_csum;
  344. /* Sanity check the IPv4 header */
  345. if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
  346. DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
  347. pkb_len ( pkb ), sizeof ( *iphdr ) );
  348. goto err;
  349. }
  350. if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
  351. DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
  352. goto err;
  353. }
  354. hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
  355. if ( hdrlen < sizeof ( *iphdr ) ) {
  356. DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
  357. hdrlen, sizeof ( *iphdr ) );
  358. goto err;
  359. }
  360. if ( hdrlen > pkb_len ( pkb ) ) {
  361. DBG ( "IPv4 header too long at %d bytes "
  362. "(packet is %d bytes)\n", hdrlen, pkb_len ( pkb ) );
  363. goto err;
  364. }
  365. if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
  366. DBG ( "IPv4 checksum incorrect (is %04x including checksum "
  367. "field, should be 0000)\n", csum );
  368. goto err;
  369. }
  370. len = ntohs ( iphdr->len );
  371. if ( len < hdrlen ) {
  372. DBG ( "IPv4 length too short at %d bytes "
  373. "(header is %d bytes)\n", len, hdrlen );
  374. goto err;
  375. }
  376. if ( len > pkb_len ( pkb ) ) {
  377. DBG ( "IPv4 length too long at %d bytes "
  378. "(packet is %d bytes)\n", len, pkb_len ( pkb ) );
  379. goto err;
  380. }
  381. /* Print IPv4 header for debugging */
  382. DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
  383. DBG ( "%s len %d proto %d id %04x csum %04x\n",
  384. inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
  385. ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
  386. /* Truncate packet to correct length, calculate pseudo-header
  387. * checksum and then strip off the IPv4 header.
  388. */
  389. pkb_unput ( pkb, ( pkb_len ( pkb ) - len ) );
  390. pshdr_csum = ipv4_pshdr_chksum ( pkb, TCPIP_EMPTY_CSUM );
  391. pkb_pull ( pkb, hdrlen );
  392. /* Fragment reassembly */
  393. if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
  394. ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
  395. /* Pass the fragment to ipv4_reassemble() which either
  396. * returns a fully reassembled packet buffer or NULL.
  397. */
  398. pkb = ipv4_reassemble ( pkb );
  399. if ( ! pkb )
  400. return 0;
  401. }
  402. /* Construct socket addresses and hand off to transport layer */
  403. memset ( &src, 0, sizeof ( src ) );
  404. src.sin.sin_family = AF_INET;
  405. src.sin.sin_addr = iphdr->src;
  406. memset ( &dest, 0, sizeof ( dest ) );
  407. dest.sin.sin_family = AF_INET;
  408. dest.sin.sin_addr = iphdr->dest;
  409. return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st, pshdr_csum);
  410. err:
  411. free_pkb ( pkb );
  412. return -EINVAL;
  413. }
  414. /**
  415. * Check existence of IPv4 address for ARP
  416. *
  417. * @v netdev Network device
  418. * @v net_addr Network-layer address
  419. * @ret rc Return status code
  420. */
  421. static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
  422. const struct in_addr *address = net_addr;
  423. struct ipv4_miniroute *miniroute;
  424. list_for_each_entry ( miniroute, &miniroutes, list ) {
  425. if ( ( miniroute->netdev == netdev ) &&
  426. ( miniroute->address.s_addr == address->s_addr ) ) {
  427. /* Found matching address */
  428. return 0;
  429. }
  430. }
  431. return -ENOENT;
  432. }
  433. /**
  434. * Convert IPv4 address to dotted-quad notation
  435. *
  436. * @v in IP address
  437. * @ret string IP address in dotted-quad notation
  438. */
  439. char * inet_ntoa ( struct in_addr in ) {
  440. static char buf[16]; /* "xxx.xxx.xxx.xxx" */
  441. uint8_t *bytes = ( uint8_t * ) &in;
  442. sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
  443. return buf;
  444. }
  445. /**
  446. * Transcribe IP address
  447. *
  448. * @v net_addr IP address
  449. * @ret string IP address in dotted-quad notation
  450. *
  451. */
  452. static const char * ipv4_ntoa ( const void *net_addr ) {
  453. return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
  454. }
  455. /** IPv4 protocol */
  456. struct net_protocol ipv4_protocol __net_protocol = {
  457. .name = "IP",
  458. .net_proto = htons ( ETH_P_IP ),
  459. .net_addr_len = sizeof ( struct in_addr ),
  460. .rx = ipv4_rx,
  461. .ntoa = ipv4_ntoa,
  462. };
  463. /** IPv4 TCPIP net protocol */
  464. struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
  465. .name = "IPv4",
  466. .sa_family = AF_INET,
  467. .tx = ipv4_tx,
  468. };
  469. /** IPv4 ARP protocol */
  470. struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
  471. .net_protocol = &ipv4_protocol,
  472. .check = ipv4_arp_check,
  473. };