You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ipv4.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. #include <string.h>
  2. #include <stdint.h>
  3. #include <errno.h>
  4. #include <byteswap.h>
  5. #include <malloc.h>
  6. #include <vsprintf.h>
  7. #include <gpxe/list.h>
  8. #include <gpxe/in.h>
  9. #include <gpxe/arp.h>
  10. #include <gpxe/if_ether.h>
  11. #include <gpxe/pkbuff.h>
  12. #include <gpxe/netdevice.h>
  13. #include "uip/uip.h"
  14. #include <gpxe/ip.h>
  15. #include <gpxe/tcpip.h>
  16. /** @file
  17. *
  18. * IPv4 protocol
  19. *
  20. * The gPXE IP stack is currently implemented on top of the uIP
  21. * protocol stack. This file provides wrappers around uIP so that
  22. * higher-level protocol implementations do not need to talk directly
  23. * to uIP (which has a somewhat baroque API).
  24. *
  25. */
  26. /* Unique IP datagram identification number */
  27. static uint16_t next_ident = 0;
  28. struct net_protocol ipv4_protocol;
  29. /** An IPv4 address/routing table entry */
  30. struct ipv4_miniroute {
  31. /** List of miniroutes */
  32. struct list_head list;
  33. /** Network device */
  34. struct net_device *netdev;
  35. /** IPv4 address */
  36. struct in_addr address;
  37. /** Subnet mask */
  38. struct in_addr netmask;
  39. /** Gateway address */
  40. struct in_addr gateway;
  41. };
  42. /** List of IPv4 miniroutes */
  43. static LIST_HEAD ( miniroutes );
  44. /** List of fragment reassembly buffers */
  45. static LIST_HEAD ( frag_buffers );
  46. /**
  47. * Add IPv4 interface
  48. *
  49. * @v netdev Network device
  50. * @v address IPv4 address
  51. * @v netmask Subnet mask
  52. * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
  53. * @ret rc Return status code
  54. *
  55. */
  56. int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
  57. struct in_addr netmask, struct in_addr gateway ) {
  58. struct ipv4_miniroute *miniroute;
  59. /* Allocate and populate miniroute structure */
  60. miniroute = malloc ( sizeof ( *miniroute ) );
  61. if ( ! miniroute )
  62. return -ENOMEM;
  63. miniroute->netdev = netdev;
  64. miniroute->address = address;
  65. miniroute->netmask = netmask;
  66. miniroute->gateway = gateway;
  67. /* Add to end of list if we have a gateway, otherwise to start
  68. * of list.
  69. */
  70. if ( gateway.s_addr != INADDR_NONE ) {
  71. list_add_tail ( &miniroute->list, &miniroutes );
  72. } else {
  73. list_add ( &miniroute->list, &miniroutes );
  74. }
  75. return 0;
  76. }
  77. /**
  78. * Remove IPv4 interface
  79. *
  80. * @v netdev Network device
  81. */
  82. void del_ipv4_address ( struct net_device *netdev ) {
  83. struct ipv4_miniroute *miniroute;
  84. list_for_each_entry ( miniroute, &miniroutes, list ) {
  85. if ( miniroute->netdev == netdev ) {
  86. list_del ( &miniroute->list );
  87. break;
  88. }
  89. }
  90. }
  91. /**
  92. * Dump IPv4 packet header
  93. *
  94. * @v iphdr IPv4 header
  95. */
  96. static void ipv4_dump ( struct iphdr *iphdr __unused ) {
  97. DBG ( "IP4 %p transmitting %p+%d ident %d protocol %d header-csum %x\n",
  98. &ipv4_protocol, iphdr, ntohs ( iphdr->len ), ntohs ( iphdr->ident ),
  99. iphdr->protocol, ntohs ( iphdr->chksum ) );
  100. DBG ( "src %s, dest %s\n", inet_ntoa ( iphdr->src ), inet_ntoa ( iphdr->dest ) );
  101. }
  102. /**
  103. * Fragment reassembly counter timeout
  104. *
  105. * @v timer Retry timer
  106. * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
  107. */
  108. static void ipv4_frag_expired ( struct retry_timer *timer __unused,
  109. int over ) {
  110. if ( over ) {
  111. DBG ( "Fragment reassembly timeout" );
  112. /* Free the fragment buffer */
  113. }
  114. }
  115. /**
  116. * Free fragment buffer
  117. *
  118. * @v fragbug Fragment buffer
  119. */
  120. static void free_fragbuf ( struct frag_buffer *fragbuf ) {
  121. if ( fragbuf ) {
  122. free_dma ( fragbuf, sizeof ( *fragbuf ) );
  123. }
  124. }
  125. /**
  126. * Fragment reassembler
  127. *
  128. * @v pkb Packet buffer, fragment of the datagram
  129. * @ret frag_pkb Reassembled packet, or NULL
  130. */
  131. static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
  132. struct iphdr *iphdr = pkb->data;
  133. struct frag_buffer *fragbuf;
  134. /**
  135. * Check if the fragment belongs to any fragment series
  136. */
  137. list_for_each_entry ( fragbuf, &frag_buffers, list ) {
  138. if ( fragbuf->ident == iphdr->ident &&
  139. fragbuf->src.s_addr == iphdr->src.s_addr ) {
  140. /**
  141. * Check if the packet is the expected fragment
  142. *
  143. * The offset of the new packet must be equal to the
  144. * length of the data accumulated so far (the length of
  145. * the reassembled packet buffer
  146. */
  147. if ( pkb_len ( fragbuf->frag_pkb ) ==
  148. ( iphdr->frags & IP_MASK_OFFSET ) ) {
  149. /**
  150. * Append the contents of the fragment to the
  151. * reassembled packet buffer
  152. */
  153. pkb_pull ( pkb, sizeof ( *iphdr ) );
  154. memcpy ( pkb_put ( fragbuf->frag_pkb,
  155. pkb_len ( pkb ) ),
  156. pkb->data, pkb_len ( pkb ) );
  157. free_pkb ( pkb );
  158. /** Check if the fragment series is over */
  159. if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
  160. pkb = fragbuf->frag_pkb;
  161. free_fragbuf ( fragbuf );
  162. return pkb;
  163. }
  164. } else {
  165. /* Discard the fragment series */
  166. free_fragbuf ( fragbuf );
  167. free_pkb ( pkb );
  168. }
  169. return NULL;
  170. }
  171. }
  172. /** Check if the fragment is the first in the fragment series */
  173. if ( iphdr->frags & IP_MASK_MOREFRAGS &&
  174. ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
  175. /** Create a new fragment buffer */
  176. fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
  177. fragbuf->ident = iphdr->ident;
  178. fragbuf->src = iphdr->src;
  179. /* Set up the reassembly packet buffer */
  180. fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
  181. pkb_pull ( pkb, sizeof ( *iphdr ) );
  182. memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
  183. pkb->data, pkb_len ( pkb ) );
  184. free_pkb ( pkb );
  185. /* Set the reassembly timer */
  186. fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
  187. fragbuf->frag_timer.expired = ipv4_frag_expired;
  188. start_timer ( &fragbuf->frag_timer );
  189. /* Add the fragment buffer to the list of fragment buffers */
  190. list_add ( &fragbuf->list, &frag_buffers );
  191. }
  192. return NULL;
  193. }
  194. /**
  195. * Complete the transport-layer checksum
  196. *
  197. * @v pkb Packet buffer
  198. * @v tcpip Transport-layer protocol
  199. *
  200. * This function calculates the tcpip
  201. */
  202. static void ipv4_tx_csum ( struct pk_buff *pkb,
  203. struct tcpip_protocol *tcpip ) {
  204. struct iphdr *iphdr = pkb->data;
  205. struct ipv4_pseudo_header pshdr;
  206. uint16_t *csum = ( ( ( void * ) iphdr ) + sizeof ( *iphdr )
  207. + tcpip->csum_offset );
  208. /* Calculate pseudo header */
  209. pshdr.src = iphdr->src;
  210. pshdr.dest = iphdr->dest;
  211. pshdr.zero_padding = 0x00;
  212. pshdr.protocol = iphdr->protocol;
  213. /* This is only valid when IPv4 does not have options */
  214. pshdr.len = htons ( pkb_len ( pkb ) - sizeof ( *iphdr ) );
  215. /* Update the checksum value */
  216. *csum = tcpip_continue_chksum ( *csum, &pshdr, sizeof ( pshdr ) );
  217. }
  218. /**
  219. * Calculate the transport-layer checksum while processing packets
  220. */
  221. static uint16_t ipv4_rx_csum ( struct pk_buff *pkb __unused,
  222. uint8_t trans_proto __unused ) {
  223. /**
  224. * This function needs to be implemented. Until then, it will return
  225. * 0xffffffff every time
  226. */
  227. return 0xffff;
  228. }
  229. /**
  230. * Transmit IP packet
  231. *
  232. * @v pkb Packet buffer
  233. * @v tcpip Transport-layer protocol
  234. * @v st_dest Destination network-layer address
  235. * @ret rc Status
  236. *
  237. * This function expects a transport-layer segment and prepends the IP header
  238. */
  239. static int ipv4_tx ( struct pk_buff *pkb,
  240. struct tcpip_protocol *tcpip_protocol,
  241. struct sockaddr_tcpip *st_dest ) {
  242. struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
  243. struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
  244. struct ipv4_miniroute *miniroute;
  245. struct net_device *netdev = NULL;
  246. struct in_addr next_hop;
  247. uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
  248. const uint8_t *ll_dest = ll_dest_buf;
  249. int rc;
  250. /* Fill up the IP header, except source address */
  251. iphdr->verhdrlen = ( IP_VER << 4 ) | ( sizeof ( *iphdr ) / 4 );
  252. iphdr->service = IP_TOS;
  253. iphdr->len = htons ( pkb_len ( pkb ) );
  254. iphdr->ident = htons ( next_ident++ );
  255. iphdr->frags = 0;
  256. iphdr->ttl = IP_TTL;
  257. iphdr->protocol = tcpip_protocol->tcpip_proto;
  258. /* Copy destination address */
  259. iphdr->dest = sin_dest->sin_addr;
  260. /**
  261. * All fields in the IP header filled in except the source network
  262. * address (which requires routing) and the header checksum (which
  263. * requires the source network address). As the pseudo header requires
  264. * the source address as well and the transport-layer checksum is
  265. * updated after routing.
  266. */
  267. /* Use routing table to identify next hop and transmitting netdev */
  268. next_hop = iphdr->dest;
  269. list_for_each_entry ( miniroute, &miniroutes, list ) {
  270. int local, has_gw;
  271. local = ( ( ( iphdr->dest.s_addr ^ miniroute->address.s_addr )
  272. & miniroute->netmask.s_addr ) == 0 );
  273. has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
  274. if ( local || has_gw ) {
  275. netdev = miniroute->netdev;
  276. iphdr->src = miniroute->address;
  277. if ( ! local )
  278. next_hop = miniroute->gateway;
  279. break;
  280. }
  281. }
  282. /* Abort if no network device identified */
  283. if ( ! netdev ) {
  284. DBG ( "No route to %s\n", inet_ntoa ( iphdr->dest ) );
  285. rc = -EHOSTUNREACH;
  286. goto err;
  287. }
  288. /* Calculate the transport layer checksum */
  289. if ( tcpip_protocol->csum_offset > 0 ) {
  290. ipv4_tx_csum ( pkb, tcpip_protocol );
  291. }
  292. /* Calculate header checksum, in network byte order */
  293. iphdr->chksum = 0;
  294. iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
  295. /* Print IP4 header for debugging */
  296. ipv4_dump ( iphdr );
  297. /* Determine link-layer destination address */
  298. if ( next_hop.s_addr == INADDR_BROADCAST ) {
  299. /* Broadcast address */
  300. ll_dest = netdev->ll_protocol->ll_broadcast;
  301. } else if ( IN_MULTICAST ( next_hop.s_addr ) ) {
  302. /* Special case: IPv4 multicast over Ethernet. This
  303. * code may need to be generalised once we find out
  304. * what happens for other link layers.
  305. */
  306. uint8_t *next_hop_bytes = ( uint8_t * ) &next_hop;
  307. ll_dest_buf[0] = 0x01;
  308. ll_dest_buf[0] = 0x00;
  309. ll_dest_buf[0] = 0x5e;
  310. ll_dest_buf[3] = next_hop_bytes[1] & 0x7f;
  311. ll_dest_buf[4] = next_hop_bytes[2];
  312. ll_dest_buf[5] = next_hop_bytes[3];
  313. } else {
  314. /* Unicast address: resolve via ARP */
  315. if ( ( rc = arp_resolve ( netdev, &ipv4_protocol, &next_hop,
  316. &iphdr->src, ll_dest_buf ) ) != 0 ) {
  317. DBG ( "No ARP entry for %s\n",
  318. inet_ntoa ( iphdr->dest ) );
  319. goto err;
  320. }
  321. }
  322. /* Hand off to link layer */
  323. return net_tx ( pkb, netdev, &ipv4_protocol, ll_dest );
  324. err:
  325. free_pkb ( pkb );
  326. return rc;
  327. }
  328. /**
  329. * Process incoming packets
  330. *
  331. * @v pkb Packet buffer
  332. * @v netdev Network device
  333. * @v ll_source Link-layer destination source
  334. *
  335. * This function expects an IP4 network datagram. It processes the headers
  336. * and sends it to the transport layer.
  337. */
  338. static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
  339. const void *ll_source __unused ) {
  340. struct iphdr *iphdr = pkb->data;
  341. union {
  342. struct sockaddr_in sin;
  343. struct sockaddr_tcpip st;
  344. } src, dest;
  345. uint16_t chksum;
  346. /* Sanity check */
  347. if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
  348. DBG ( "IP datagram too short (%d bytes)\n", pkb_len ( pkb ) );
  349. goto err;
  350. }
  351. /* Print IP4 header for debugging */
  352. ipv4_dump ( iphdr );
  353. /* Validate version and header length */
  354. if ( iphdr->verhdrlen != 0x45 ) {
  355. DBG ( "Bad version and header length %x\n", iphdr->verhdrlen );
  356. goto err;
  357. }
  358. /* Validate length of IP packet */
  359. if ( ntohs ( iphdr->len ) > pkb_len ( pkb ) ) {
  360. DBG ( "Inconsistent packet length %d\n",
  361. ntohs ( iphdr->len ) );
  362. goto err;
  363. }
  364. /* Verify the checksum */
  365. if ( ( chksum = ipv4_rx_csum ( pkb, iphdr->protocol ) ) != 0xffff ) {
  366. DBG ( "Bad checksum %x\n", chksum );
  367. }
  368. /* Fragment reassembly */
  369. if ( iphdr->frags & IP_MASK_MOREFRAGS ||
  370. ( !iphdr->frags & IP_MASK_MOREFRAGS &&
  371. iphdr->frags & IP_MASK_OFFSET != 0 ) ) {
  372. /* Pass the fragment to the reassembler ipv4_ressable() which
  373. * either returns a fully reassembled packet buffer or NULL.
  374. */
  375. pkb = ipv4_reassemble ( pkb );
  376. if ( !pkb ) {
  377. return 0;
  378. }
  379. }
  380. /* To reduce code size, the following functions are not implemented:
  381. * 1. Check the destination address
  382. * 2. Check the TTL field
  383. * 3. Check the service field
  384. */
  385. /* Construct socket addresses */
  386. memset ( &src, 0, sizeof ( src ) );
  387. src.sin.sin_family = AF_INET;
  388. src.sin.sin_addr = iphdr->src;
  389. memset ( &dest, 0, sizeof ( dest ) );
  390. dest.sin.sin_family = AF_INET;
  391. dest.sin.sin_addr = iphdr->dest;
  392. /* Strip header */
  393. pkb_unput ( pkb, pkb_len ( pkb ) - ntohs ( iphdr->len ) );
  394. pkb_pull ( pkb, sizeof ( *iphdr ) );
  395. /* Send it to the transport layer */
  396. return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st );
  397. err:
  398. free_pkb ( pkb );
  399. return -EINVAL;
  400. }
  401. /**
  402. * Check existence of IPv4 address for ARP
  403. *
  404. * @v netdev Network device
  405. * @v net_addr Network-layer address
  406. * @ret rc Return status code
  407. */
  408. static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
  409. const struct in_addr *address = net_addr;
  410. struct ipv4_miniroute *miniroute;
  411. list_for_each_entry ( miniroute, &miniroutes, list ) {
  412. if ( ( miniroute->netdev == netdev ) &&
  413. ( miniroute->address.s_addr == address->s_addr ) ) {
  414. /* Found matching address */
  415. return 0;
  416. }
  417. }
  418. return -ENOENT;
  419. }
  420. /**
  421. * Convert IPv4 address to dotted-quad notation
  422. *
  423. * @v in IP address
  424. * @ret string IP address in dotted-quad notation
  425. */
  426. char * inet_ntoa ( struct in_addr in ) {
  427. static char buf[16]; /* "xxx.xxx.xxx.xxx" */
  428. uint8_t *bytes = ( uint8_t * ) &in;
  429. sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
  430. return buf;
  431. }
  432. /**
  433. * Transcribe IP address
  434. *
  435. * @v net_addr IP address
  436. * @ret string IP address in dotted-quad notation
  437. *
  438. */
  439. static const char * ipv4_ntoa ( const void *net_addr ) {
  440. return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
  441. }
  442. /** IPv4 protocol */
  443. struct net_protocol ipv4_protocol __net_protocol = {
  444. .name = "IP",
  445. .net_proto = htons ( ETH_P_IP ),
  446. .net_addr_len = sizeof ( struct in_addr ),
  447. .rx = ipv4_rx,
  448. .ntoa = ipv4_ntoa,
  449. };
  450. /** IPv4 TCPIP net protocol */
  451. struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
  452. .name = "IPv4",
  453. .sa_family = AF_INET,
  454. .tx = ipv4_tx,
  455. };
  456. /** IPv4 ARP protocol */
  457. struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
  458. .net_protocol = &ipv4_protocol,
  459. .check = ipv4_arp_check,
  460. };