You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ipoib.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. */
  19. FILE_LICENCE ( GPL2_OR_LATER );
  20. #include <stdint.h>
  21. #include <stdio.h>
  22. #include <unistd.h>
  23. #include <string.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <ipxe/errortab.h>
  27. #include <ipxe/if_arp.h>
  28. #include <ipxe/iobuf.h>
  29. #include <ipxe/netdevice.h>
  30. #include <ipxe/infiniband.h>
  31. #include <ipxe/ib_pathrec.h>
  32. #include <ipxe/ib_mcast.h>
  33. #include <ipxe/ipoib.h>
  34. /** @file
  35. *
  36. * IP over Infiniband
  37. */
  38. /** Number of IPoIB send work queue entries */
  39. #define IPOIB_NUM_SEND_WQES 2
  40. /** Number of IPoIB receive work queue entries */
  41. #define IPOIB_NUM_RECV_WQES 4
  42. /** Number of IPoIB completion entries */
  43. #define IPOIB_NUM_CQES 8
  44. /** An IPoIB device */
  45. struct ipoib_device {
  46. /** Network device */
  47. struct net_device *netdev;
  48. /** Underlying Infiniband device */
  49. struct ib_device *ibdev;
  50. /** Completion queue */
  51. struct ib_completion_queue *cq;
  52. /** Queue pair */
  53. struct ib_queue_pair *qp;
  54. /** Broadcast MAC */
  55. struct ipoib_mac broadcast;
  56. /** Joined to IPv4 broadcast multicast group
  57. *
  58. * This flag indicates whether or not we have initiated the
  59. * join to the IPv4 broadcast multicast group.
  60. */
  61. int broadcast_joined;
  62. /** IPv4 broadcast multicast group membership */
  63. struct ib_mc_membership broadcast_membership;
  64. };
  65. /** Broadcast IPoIB address */
  66. static struct ipoib_mac ipoib_broadcast = {
  67. .flags__qpn = htonl ( IB_QPN_BROADCAST ),
  68. .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
  69. 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
  70. };
  71. /** Link status for "broadcast join in progress" */
  72. #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
  73. #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
  74. ( EINFO_EINPROGRESS, 0x01, "Joining" )
  75. /** Human-readable message for the link status */
  76. struct errortab ipoib_errors[] __errortab = {
  77. __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
  78. };
  79. /****************************************************************************
  80. *
  81. * IPoIB peer cache
  82. *
  83. ****************************************************************************
  84. */
  85. /**
  86. * IPoIB peer address
  87. *
  88. * The IPoIB link-layer header is only four bytes long and so does not
  89. * have sufficient room to store IPoIB MAC address(es). We therefore
  90. * maintain a cache of MAC addresses identified by a single-byte key,
  91. * and abuse the spare two bytes within the link-layer header to
  92. * communicate these MAC addresses between the link-layer code and the
  93. * netdevice driver.
  94. */
  95. struct ipoib_peer {
  96. /** Key */
  97. uint8_t key;
  98. /** MAC address */
  99. struct ipoib_mac mac;
  100. };
  101. /** Number of IPoIB peer cache entries
  102. *
  103. * Must be a power of two.
  104. */
  105. #define IPOIB_NUM_CACHED_PEERS 4
  106. /** IPoIB peer address cache */
  107. static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
  108. /** Oldest IPoIB peer cache entry index */
  109. static unsigned int ipoib_peer_cache_idx = 0;
  110. /** IPoIB peer cache entry validity flag */
  111. #define IPOIB_PEER_KEY_VALID 0x80
  112. /**
  113. * Look up cached peer by key
  114. *
  115. * @v key Peer cache key
  116. * @ret peer Peer cache entry, or NULL
  117. */
  118. static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
  119. struct ipoib_peer *peer;
  120. unsigned int i;
  121. if ( ! key )
  122. return NULL;
  123. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  124. peer = &ipoib_peer_cache[i];
  125. if ( peer->key == key )
  126. return peer;
  127. }
  128. DBG ( "IPoIB warning: peer cache lost track of key %x while still in "
  129. "use\n", key );
  130. return NULL;
  131. }
  132. /**
  133. * Store GID and QPN in peer cache
  134. *
  135. * @v mac Peer MAC address
  136. * @ret peer Peer cache entry
  137. */
  138. static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
  139. struct ipoib_peer *peer;
  140. uint8_t key;
  141. unsigned int i;
  142. /* Look for existing cache entry */
  143. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  144. peer = &ipoib_peer_cache[i];
  145. if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
  146. return peer;
  147. }
  148. /* No entry found: create a new one */
  149. key = ( ipoib_peer_cache_idx++ | IPOIB_PEER_KEY_VALID );
  150. peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
  151. if ( peer->key )
  152. DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
  153. memset ( peer, 0, sizeof ( *peer ) );
  154. peer->key = key;
  155. memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
  156. DBG ( "IPoIB peer %x has MAC %s\n",
  157. peer->key, ipoib_ntoa ( &peer->mac ) );
  158. return peer;
  159. }
  160. /****************************************************************************
  161. *
  162. * IPoIB link layer
  163. *
  164. ****************************************************************************
  165. */
  166. /**
  167. * Add IPoIB link-layer header
  168. *
  169. * @v netdev Network device
  170. * @v iobuf I/O buffer
  171. * @v ll_dest Link-layer destination address
  172. * @v ll_source Source link-layer address
  173. * @v net_proto Network-layer protocol, in network-byte order
  174. * @ret rc Return status code
  175. */
  176. static int ipoib_push ( struct net_device *netdev __unused,
  177. struct io_buffer *iobuf, const void *ll_dest,
  178. const void *ll_source __unused, uint16_t net_proto ) {
  179. struct ipoib_hdr *ipoib_hdr =
  180. iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
  181. const struct ipoib_mac *dest_mac = ll_dest;
  182. const struct ipoib_mac *src_mac = ll_source;
  183. struct ipoib_peer *dest;
  184. struct ipoib_peer *src;
  185. /* Add link-layer addresses to cache */
  186. dest = ipoib_cache_peer ( dest_mac );
  187. src = ipoib_cache_peer ( src_mac );
  188. /* Build IPoIB header */
  189. ipoib_hdr->proto = net_proto;
  190. ipoib_hdr->u.peer.dest = dest->key;
  191. ipoib_hdr->u.peer.src = src->key;
  192. return 0;
  193. }
  194. /**
  195. * Remove IPoIB link-layer header
  196. *
  197. * @v netdev Network device
  198. * @v iobuf I/O buffer
  199. * @ret ll_dest Link-layer destination address
  200. * @ret ll_source Source link-layer address
  201. * @ret net_proto Network-layer protocol, in network-byte order
  202. * @ret flags Packet flags
  203. * @ret rc Return status code
  204. */
  205. static int ipoib_pull ( struct net_device *netdev,
  206. struct io_buffer *iobuf, const void **ll_dest,
  207. const void **ll_source, uint16_t *net_proto,
  208. unsigned int *flags ) {
  209. struct ipoib_device *ipoib = netdev->priv;
  210. struct ipoib_hdr *ipoib_hdr = iobuf->data;
  211. struct ipoib_peer *dest;
  212. struct ipoib_peer *source;
  213. /* Sanity check */
  214. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  215. DBG ( "IPoIB packet too short for link-layer header\n" );
  216. DBG_HD ( iobuf->data, iob_len ( iobuf ) );
  217. return -EINVAL;
  218. }
  219. /* Strip off IPoIB header */
  220. iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
  221. /* Identify source and destination addresses, and clear
  222. * reserved word in IPoIB header
  223. */
  224. dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  225. source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
  226. ipoib_hdr->u.reserved = 0;
  227. /* Fill in required fields */
  228. *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
  229. *ll_source = ( source ? &source->mac : &ipoib->broadcast );
  230. *net_proto = ipoib_hdr->proto;
  231. *flags = ( ( *ll_dest == &ipoib->broadcast ) ?
  232. ( LL_MULTICAST | LL_BROADCAST ) : 0 );
  233. return 0;
  234. }
  235. /**
  236. * Initialise IPoIB link-layer address
  237. *
  238. * @v hw_addr Hardware address
  239. * @v ll_addr Link-layer address
  240. */
  241. static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
  242. const union ib_guid *guid = hw_addr;
  243. struct ipoib_mac *mac = ll_addr;
  244. memset ( mac, 0, sizeof ( *mac ) );
  245. memcpy ( &mac->gid.s.guid, guid, sizeof ( mac->gid.s.guid ) );
  246. }
  247. /**
  248. * Transcribe IPoIB link-layer address
  249. *
  250. * @v ll_addr Link-layer address
  251. * @ret string Link-layer address in human-readable format
  252. */
  253. const char * ipoib_ntoa ( const void *ll_addr ) {
  254. static char buf[45];
  255. const struct ipoib_mac *mac = ll_addr;
  256. snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
  257. htonl ( mac->flags__qpn ), htonl ( mac->gid.dwords[0] ),
  258. htonl ( mac->gid.dwords[1] ),
  259. htonl ( mac->gid.dwords[2] ),
  260. htonl ( mac->gid.dwords[3] ) );
  261. return buf;
  262. }
  263. /**
  264. * Hash multicast address
  265. *
  266. * @v af Address family
  267. * @v net_addr Network-layer address
  268. * @v ll_addr Link-layer address to fill in
  269. * @ret rc Return status code
  270. */
  271. static int ipoib_mc_hash ( unsigned int af __unused,
  272. const void *net_addr __unused,
  273. void *ll_addr __unused ) {
  274. return -ENOTSUP;
  275. }
  276. /**
  277. * Generate Mellanox Ethernet-compatible compressed link-layer address
  278. *
  279. * @v ll_addr Link-layer address
  280. * @v eth_addr Ethernet-compatible address to fill in
  281. */
  282. static int ipoib_mlx_eth_addr ( const union ib_guid *guid,
  283. uint8_t *eth_addr ) {
  284. eth_addr[0] = ( ( guid->bytes[3] == 2 ) ? 0x00 : 0x02 );
  285. eth_addr[1] = guid->bytes[1];
  286. eth_addr[2] = guid->bytes[2];
  287. eth_addr[3] = guid->bytes[5];
  288. eth_addr[4] = guid->bytes[6];
  289. eth_addr[5] = guid->bytes[7];
  290. return 0;
  291. }
  292. /** An IPoIB Ethernet-compatible compressed link-layer address generator */
  293. struct ipoib_eth_addr_handler {
  294. /** GUID byte 1 */
  295. uint8_t byte1;
  296. /** GUID byte 2 */
  297. uint8_t byte2;
  298. /** Handler */
  299. int ( * eth_addr ) ( const union ib_guid *guid,
  300. uint8_t *eth_addr );
  301. };
  302. /** IPoIB Ethernet-compatible compressed link-layer address generators */
  303. static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
  304. { 0x02, 0xc9, ipoib_mlx_eth_addr },
  305. };
  306. /**
  307. * Generate Ethernet-compatible compressed link-layer address
  308. *
  309. * @v ll_addr Link-layer address
  310. * @v eth_addr Ethernet-compatible address to fill in
  311. */
  312. static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
  313. const struct ipoib_mac *ipoib_addr = ll_addr;
  314. const union ib_guid *guid = &ipoib_addr->gid.s.guid;
  315. struct ipoib_eth_addr_handler *handler;
  316. unsigned int i;
  317. for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
  318. sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
  319. handler = &ipoib_eth_addr_handlers[i];
  320. if ( ( handler->byte1 == guid->bytes[1] ) &&
  321. ( handler->byte2 == guid->bytes[2] ) ) {
  322. return handler->eth_addr ( guid, eth_addr );
  323. }
  324. }
  325. return -ENOTSUP;
  326. }
  327. /** IPoIB protocol */
  328. struct ll_protocol ipoib_protocol __ll_protocol = {
  329. .name = "IPoIB",
  330. .ll_proto = htons ( ARPHRD_INFINIBAND ),
  331. .hw_addr_len = sizeof ( union ib_guid ),
  332. .ll_addr_len = IPOIB_ALEN,
  333. .ll_header_len = IPOIB_HLEN,
  334. .push = ipoib_push,
  335. .pull = ipoib_pull,
  336. .init_addr = ipoib_init_addr,
  337. .ntoa = ipoib_ntoa,
  338. .mc_hash = ipoib_mc_hash,
  339. .eth_addr = ipoib_eth_addr,
  340. };
  341. /**
  342. * Allocate IPoIB device
  343. *
  344. * @v priv_size Size of driver private data
  345. * @ret netdev Network device, or NULL
  346. */
  347. struct net_device * alloc_ipoibdev ( size_t priv_size ) {
  348. struct net_device *netdev;
  349. netdev = alloc_netdev ( priv_size );
  350. if ( netdev ) {
  351. netdev->ll_protocol = &ipoib_protocol;
  352. netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
  353. netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
  354. }
  355. return netdev;
  356. }
  357. /****************************************************************************
  358. *
  359. * IPoIB network device
  360. *
  361. ****************************************************************************
  362. */
  363. /**
  364. * Transmit packet via IPoIB network device
  365. *
  366. * @v netdev Network device
  367. * @v iobuf I/O buffer
  368. * @ret rc Return status code
  369. */
  370. static int ipoib_transmit ( struct net_device *netdev,
  371. struct io_buffer *iobuf ) {
  372. struct ipoib_device *ipoib = netdev->priv;
  373. struct ib_device *ibdev = ipoib->ibdev;
  374. struct ipoib_hdr *ipoib_hdr;
  375. struct ipoib_peer *peer;
  376. struct ib_address_vector dest;
  377. int rc;
  378. /* Sanity check */
  379. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  380. DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
  381. return -EINVAL;
  382. }
  383. ipoib_hdr = iobuf->data;
  384. /* Attempting transmission while link is down will put the
  385. * queue pair into an error state, so don't try it.
  386. */
  387. if ( ! ib_link_ok ( ibdev ) )
  388. return -ENETUNREACH;
  389. /* Identify destination address */
  390. peer = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  391. if ( ! peer )
  392. return -ENXIO;
  393. ipoib_hdr->u.reserved = 0;
  394. /* Construct address vector */
  395. memset ( &dest, 0, sizeof ( dest ) );
  396. dest.qpn = ( ntohl ( peer->mac.flags__qpn ) & IB_QPN_MASK );
  397. dest.gid_present = 1;
  398. memcpy ( &dest.gid, &peer->mac.gid, sizeof ( dest.gid ) );
  399. if ( ( rc = ib_resolve_path ( ibdev, &dest ) ) != 0 ) {
  400. /* Path not resolved yet */
  401. return rc;
  402. }
  403. return ib_post_send ( ibdev, ipoib->qp, &dest, iobuf );
  404. }
  405. /**
  406. * Handle IPoIB send completion
  407. *
  408. * @v ibdev Infiniband device
  409. * @v qp Queue pair
  410. * @v iobuf I/O buffer
  411. * @v rc Completion status code
  412. */
  413. static void ipoib_complete_send ( struct ib_device *ibdev __unused,
  414. struct ib_queue_pair *qp,
  415. struct io_buffer *iobuf, int rc ) {
  416. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  417. netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
  418. }
  419. /**
  420. * Handle IPoIB receive completion
  421. *
  422. * @v ibdev Infiniband device
  423. * @v qp Queue pair
  424. * @v dest Destination address vector, or NULL
  425. * @v source Source address vector, or NULL
  426. * @v iobuf I/O buffer
  427. * @v rc Completion status code
  428. */
  429. static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
  430. struct ib_queue_pair *qp,
  431. struct ib_address_vector *dest __unused,
  432. struct ib_address_vector *source,
  433. struct io_buffer *iobuf, int rc ) {
  434. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  435. struct net_device *netdev = ipoib->netdev;
  436. struct ipoib_hdr *ipoib_hdr;
  437. struct ipoib_mac ll_src;
  438. struct ipoib_peer *src;
  439. /* Record errors */
  440. if ( rc != 0 ) {
  441. netdev_rx_err ( netdev, iobuf, rc );
  442. return;
  443. }
  444. /* Sanity check */
  445. if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
  446. DBGC ( ipoib, "IPoIB %p received packet too short to "
  447. "contain IPoIB header\n", ipoib );
  448. DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
  449. netdev_rx_err ( netdev, iobuf, -EIO );
  450. return;
  451. }
  452. ipoib_hdr = iobuf->data;
  453. if ( ! source ) {
  454. DBGC ( ipoib, "IPoIB %p received packet without address "
  455. "vector\n", ipoib );
  456. netdev_rx_err ( netdev, iobuf, -ENOTTY );
  457. return;
  458. }
  459. /* Parse source address */
  460. if ( source->gid_present ) {
  461. ll_src.flags__qpn = htonl ( source->qpn );
  462. memcpy ( &ll_src.gid, &source->gid, sizeof ( ll_src.gid ) );
  463. src = ipoib_cache_peer ( &ll_src );
  464. ipoib_hdr->u.peer.src = src->key;
  465. }
  466. /* Hand off to network layer */
  467. netdev_rx ( netdev, iobuf );
  468. }
  469. /** IPoIB completion operations */
  470. static struct ib_completion_queue_operations ipoib_cq_op = {
  471. .complete_send = ipoib_complete_send,
  472. .complete_recv = ipoib_complete_recv,
  473. };
  474. /** IPoIB queue pair operations */
  475. static struct ib_queue_pair_operations ipoib_qp_op = {
  476. .alloc_iob = alloc_iob,
  477. };
  478. /**
  479. * Poll IPoIB network device
  480. *
  481. * @v netdev Network device
  482. */
  483. static void ipoib_poll ( struct net_device *netdev ) {
  484. struct ipoib_device *ipoib = netdev->priv;
  485. struct ib_device *ibdev = ipoib->ibdev;
  486. ib_poll_eq ( ibdev );
  487. }
  488. /**
  489. * Handle IPv4 broadcast multicast group join completion
  490. *
  491. * @v ibdev Infiniband device
  492. * @v qp Queue pair
  493. * @v membership Multicast group membership
  494. * @v rc Status code
  495. * @v mad Response MAD (or NULL on error)
  496. */
  497. void ipoib_join_complete ( struct ib_device *ibdev __unused,
  498. struct ib_queue_pair *qp __unused,
  499. struct ib_mc_membership *membership, int rc,
  500. union ib_mad *mad __unused ) {
  501. struct ipoib_device *ipoib = container_of ( membership,
  502. struct ipoib_device, broadcast_membership );
  503. /* Record join status as link status */
  504. netdev_link_err ( ipoib->netdev, rc );
  505. }
  506. /**
  507. * Join IPv4 broadcast multicast group
  508. *
  509. * @v ipoib IPoIB device
  510. * @ret rc Return status code
  511. */
  512. static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
  513. int rc;
  514. if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
  515. &ipoib->broadcast_membership,
  516. &ipoib->broadcast.gid,
  517. ipoib_join_complete ) ) != 0 ) {
  518. DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
  519. ipoib, strerror ( rc ) );
  520. return rc;
  521. }
  522. ipoib->broadcast_joined = 1;
  523. return 0;
  524. }
  525. /**
  526. * Leave IPv4 broadcast multicast group
  527. *
  528. * @v ipoib IPoIB device
  529. */
  530. static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
  531. if ( ipoib->broadcast_joined ) {
  532. ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
  533. &ipoib->broadcast_membership );
  534. ipoib->broadcast_joined = 0;
  535. }
  536. }
  537. /**
  538. * Handle link status change
  539. *
  540. * @v ibdev Infiniband device
  541. */
  542. static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
  543. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  544. struct ipoib_device *ipoib = netdev->priv;
  545. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  546. int rc;
  547. /* Leave existing broadcast group */
  548. ipoib_leave_broadcast_group ( ipoib );
  549. /* Update MAC address based on potentially-new GID prefix */
  550. memcpy ( &mac->gid.s.prefix, &ibdev->gid.s.prefix,
  551. sizeof ( mac->gid.s.prefix ) );
  552. /* Update broadcast GID based on potentially-new partition key */
  553. ipoib->broadcast.gid.words[2] =
  554. htons ( ibdev->pkey | IB_PKEY_FULL );
  555. /* Set net device link state to reflect Infiniband link state */
  556. rc = ib_link_rc ( ibdev );
  557. netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
  558. /* Join new broadcast group */
  559. if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) &&
  560. ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
  561. DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
  562. "%s\n", ipoib, strerror ( rc ) );
  563. netdev_link_err ( netdev, rc );
  564. return;
  565. }
  566. }
  567. /**
  568. * Open IPoIB network device
  569. *
  570. * @v netdev Network device
  571. * @ret rc Return status code
  572. */
  573. static int ipoib_open ( struct net_device *netdev ) {
  574. struct ipoib_device *ipoib = netdev->priv;
  575. struct ib_device *ibdev = ipoib->ibdev;
  576. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  577. int rc;
  578. /* Open IB device */
  579. if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
  580. DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
  581. ipoib, strerror ( rc ) );
  582. goto err_ib_open;
  583. }
  584. /* Allocate completion queue */
  585. ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
  586. if ( ! ipoib->cq ) {
  587. DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
  588. ipoib );
  589. rc = -ENOMEM;
  590. goto err_create_cq;
  591. }
  592. /* Allocate queue pair */
  593. ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD, IPOIB_NUM_SEND_WQES,
  594. ipoib->cq, IPOIB_NUM_RECV_WQES, ipoib->cq,
  595. &ipoib_qp_op );
  596. if ( ! ipoib->qp ) {
  597. DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
  598. ipoib );
  599. rc = -ENOMEM;
  600. goto err_create_qp;
  601. }
  602. ib_qp_set_ownerdata ( ipoib->qp, ipoib );
  603. /* Update MAC address with QPN */
  604. mac->flags__qpn = htonl ( ipoib->qp->qpn );
  605. /* Fill receive rings */
  606. ib_refill_recv ( ibdev, ipoib->qp );
  607. /* Fake a link status change to join the broadcast group */
  608. ipoib_link_state_changed ( ibdev );
  609. return 0;
  610. ib_destroy_qp ( ibdev, ipoib->qp );
  611. err_create_qp:
  612. ib_destroy_cq ( ibdev, ipoib->cq );
  613. err_create_cq:
  614. ib_close ( ibdev );
  615. err_ib_open:
  616. return rc;
  617. }
  618. /**
  619. * Close IPoIB network device
  620. *
  621. * @v netdev Network device
  622. */
  623. static void ipoib_close ( struct net_device *netdev ) {
  624. struct ipoib_device *ipoib = netdev->priv;
  625. struct ib_device *ibdev = ipoib->ibdev;
  626. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  627. /* Leave broadcast group */
  628. ipoib_leave_broadcast_group ( ipoib );
  629. /* Remove QPN from MAC address */
  630. mac->flags__qpn = 0;
  631. /* Tear down the queues */
  632. ib_destroy_qp ( ibdev, ipoib->qp );
  633. ib_destroy_cq ( ibdev, ipoib->cq );
  634. /* Close IB device */
  635. ib_close ( ibdev );
  636. }
  637. /** IPoIB network device operations */
  638. static struct net_device_operations ipoib_operations = {
  639. .open = ipoib_open,
  640. .close = ipoib_close,
  641. .transmit = ipoib_transmit,
  642. .poll = ipoib_poll,
  643. };
  644. /**
  645. * Probe IPoIB device
  646. *
  647. * @v ibdev Infiniband device
  648. * @ret rc Return status code
  649. */
  650. static int ipoib_probe ( struct ib_device *ibdev ) {
  651. struct net_device *netdev;
  652. struct ipoib_device *ipoib;
  653. int rc;
  654. /* Allocate network device */
  655. netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
  656. if ( ! netdev )
  657. return -ENOMEM;
  658. netdev_init ( netdev, &ipoib_operations );
  659. ipoib = netdev->priv;
  660. ib_set_ownerdata ( ibdev, netdev );
  661. netdev->dev = ibdev->dev;
  662. memset ( ipoib, 0, sizeof ( *ipoib ) );
  663. ipoib->netdev = netdev;
  664. ipoib->ibdev = ibdev;
  665. /* Extract hardware address */
  666. memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
  667. sizeof ( ibdev->gid.s.guid ) );
  668. /* Set default broadcast address */
  669. memcpy ( &ipoib->broadcast, &ipoib_broadcast,
  670. sizeof ( ipoib->broadcast ) );
  671. netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
  672. /* Register network device */
  673. if ( ( rc = register_netdev ( netdev ) ) != 0 )
  674. goto err_register_netdev;
  675. return 0;
  676. err_register_netdev:
  677. netdev_nullify ( netdev );
  678. netdev_put ( netdev );
  679. return rc;
  680. }
  681. /**
  682. * Remove IPoIB device
  683. *
  684. * @v ibdev Infiniband device
  685. */
  686. static void ipoib_remove ( struct ib_device *ibdev ) {
  687. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  688. unregister_netdev ( netdev );
  689. netdev_nullify ( netdev );
  690. netdev_put ( netdev );
  691. }
  692. /** IPoIB driver */
  693. struct ib_driver ipoib_driver __ib_driver = {
  694. .name = "IPoIB",
  695. .probe = ipoib_probe,
  696. .notify = ipoib_link_state_changed,
  697. .remove = ipoib_remove,
  698. };