You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ipoib.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdio.h>
  21. #include <unistd.h>
  22. #include <string.h>
  23. #include <byteswap.h>
  24. #include <errno.h>
  25. #include <ipxe/errortab.h>
  26. #include <ipxe/if_arp.h>
  27. #include <ipxe/iobuf.h>
  28. #include <ipxe/netdevice.h>
  29. #include <ipxe/infiniband.h>
  30. #include <ipxe/ib_pathrec.h>
  31. #include <ipxe/ib_mcast.h>
  32. #include <ipxe/ipoib.h>
  33. /** @file
  34. *
  35. * IP over Infiniband
  36. */
  37. /** Number of IPoIB send work queue entries */
  38. #define IPOIB_NUM_SEND_WQES 2
  39. /** Number of IPoIB receive work queue entries */
  40. #define IPOIB_NUM_RECV_WQES 4
  41. /** Number of IPoIB completion entries */
  42. #define IPOIB_NUM_CQES 8
  43. /** An IPoIB device */
  44. struct ipoib_device {
  45. /** Network device */
  46. struct net_device *netdev;
  47. /** Underlying Infiniband device */
  48. struct ib_device *ibdev;
  49. /** Completion queue */
  50. struct ib_completion_queue *cq;
  51. /** Queue pair */
  52. struct ib_queue_pair *qp;
  53. /** Broadcast MAC */
  54. struct ipoib_mac broadcast;
  55. /** Joined to IPv4 broadcast multicast group
  56. *
  57. * This flag indicates whether or not we have initiated the
  58. * join to the IPv4 broadcast multicast group.
  59. */
  60. int broadcast_joined;
  61. /** IPv4 broadcast multicast group membership */
  62. struct ib_mc_membership broadcast_membership;
  63. };
  64. /** Broadcast IPoIB address */
  65. static struct ipoib_mac ipoib_broadcast = {
  66. .flags__qpn = htonl ( IB_QPN_BROADCAST ),
  67. .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
  68. 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
  69. };
  70. /** Link status for "broadcast join in progress" */
  71. #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
  72. #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
  73. ( EINFO_EINPROGRESS, 0x01, "Joining" )
  74. /** Human-readable message for the link status */
  75. struct errortab ipoib_errors[] __errortab = {
  76. __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
  77. };
  78. /****************************************************************************
  79. *
  80. * IPoIB peer cache
  81. *
  82. ****************************************************************************
  83. */
  84. /**
  85. * IPoIB peer address
  86. *
  87. * The IPoIB link-layer header is only four bytes long and so does not
  88. * have sufficient room to store IPoIB MAC address(es). We therefore
  89. * maintain a cache of MAC addresses identified by a single-byte key,
  90. * and abuse the spare two bytes within the link-layer header to
  91. * communicate these MAC addresses between the link-layer code and the
  92. * netdevice driver.
  93. */
  94. struct ipoib_peer {
  95. /** Key */
  96. uint8_t key;
  97. /** MAC address */
  98. struct ipoib_mac mac;
  99. };
  100. /** Number of IPoIB peer cache entries
  101. *
  102. * Must be a power of two.
  103. */
  104. #define IPOIB_NUM_CACHED_PEERS 4
  105. /** IPoIB peer address cache */
  106. static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
  107. /** Oldest IPoIB peer cache entry index */
  108. static unsigned int ipoib_peer_cache_idx = 1;
  109. /**
  110. * Look up cached peer by key
  111. *
  112. * @v key Peer cache key
  113. * @ret peer Peer cache entry, or NULL
  114. */
  115. static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
  116. struct ipoib_peer *peer;
  117. unsigned int i;
  118. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  119. peer = &ipoib_peer_cache[i];
  120. if ( peer->key == key )
  121. return peer;
  122. }
  123. if ( key != 0 ) {
  124. DBG ( "IPoIB warning: peer cache lost track of key %x while "
  125. "still in use\n", key );
  126. }
  127. return NULL;
  128. }
  129. /**
  130. * Store GID and QPN in peer cache
  131. *
  132. * @v mac Peer MAC address
  133. * @ret peer Peer cache entry
  134. */
  135. static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
  136. struct ipoib_peer *peer;
  137. unsigned int key;
  138. unsigned int i;
  139. /* Look for existing cache entry */
  140. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  141. peer = &ipoib_peer_cache[i];
  142. if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
  143. return peer;
  144. }
  145. /* No entry found: create a new one */
  146. key = ipoib_peer_cache_idx++;
  147. peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
  148. if ( peer->key )
  149. DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
  150. memset ( peer, 0, sizeof ( *peer ) );
  151. peer->key = key;
  152. memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
  153. DBG ( "IPoIB peer %x has MAC %s\n",
  154. peer->key, ipoib_ntoa ( &peer->mac ) );
  155. return peer;
  156. }
  157. /****************************************************************************
  158. *
  159. * IPoIB link layer
  160. *
  161. ****************************************************************************
  162. */
  163. /**
  164. * Add IPoIB link-layer header
  165. *
  166. * @v netdev Network device
  167. * @v iobuf I/O buffer
  168. * @v ll_dest Link-layer destination address
  169. * @v ll_source Source link-layer address
  170. * @v net_proto Network-layer protocol, in network-byte order
  171. * @ret rc Return status code
  172. */
  173. static int ipoib_push ( struct net_device *netdev __unused,
  174. struct io_buffer *iobuf, const void *ll_dest,
  175. const void *ll_source __unused, uint16_t net_proto ) {
  176. struct ipoib_hdr *ipoib_hdr =
  177. iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
  178. const struct ipoib_mac *dest_mac = ll_dest;
  179. const struct ipoib_mac *src_mac = ll_source;
  180. struct ipoib_peer *dest;
  181. struct ipoib_peer *src;
  182. /* Add link-layer addresses to cache */
  183. dest = ipoib_cache_peer ( dest_mac );
  184. src = ipoib_cache_peer ( src_mac );
  185. /* Build IPoIB header */
  186. ipoib_hdr->proto = net_proto;
  187. ipoib_hdr->u.peer.dest = dest->key;
  188. ipoib_hdr->u.peer.src = src->key;
  189. return 0;
  190. }
  191. /**
  192. * Remove IPoIB link-layer header
  193. *
  194. * @v netdev Network device
  195. * @v iobuf I/O buffer
  196. * @ret ll_dest Link-layer destination address
  197. * @ret ll_source Source link-layer address
  198. * @ret net_proto Network-layer protocol, in network-byte order
  199. * @ret rc Return status code
  200. */
  201. static int ipoib_pull ( struct net_device *netdev,
  202. struct io_buffer *iobuf, const void **ll_dest,
  203. const void **ll_source, uint16_t *net_proto ) {
  204. struct ipoib_device *ipoib = netdev->priv;
  205. struct ipoib_hdr *ipoib_hdr = iobuf->data;
  206. struct ipoib_peer *dest;
  207. struct ipoib_peer *source;
  208. /* Sanity check */
  209. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  210. DBG ( "IPoIB packet too short for link-layer header\n" );
  211. DBG_HD ( iobuf->data, iob_len ( iobuf ) );
  212. return -EINVAL;
  213. }
  214. /* Strip off IPoIB header */
  215. iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
  216. /* Identify source and destination addresses, and clear
  217. * reserved word in IPoIB header
  218. */
  219. dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  220. source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
  221. ipoib_hdr->u.reserved = 0;
  222. /* Fill in required fields */
  223. *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
  224. *ll_source = ( source ? &source->mac : &ipoib->broadcast );
  225. *net_proto = ipoib_hdr->proto;
  226. return 0;
  227. }
  228. /**
  229. * Initialise IPoIB link-layer address
  230. *
  231. * @v hw_addr Hardware address
  232. * @v ll_addr Link-layer address
  233. */
  234. static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
  235. const union ib_guid *guid = hw_addr;
  236. struct ipoib_mac *mac = ll_addr;
  237. memset ( mac, 0, sizeof ( *mac ) );
  238. memcpy ( &mac->gid.s.guid, guid, sizeof ( mac->gid.s.guid ) );
  239. }
  240. /**
  241. * Transcribe IPoIB link-layer address
  242. *
  243. * @v ll_addr Link-layer address
  244. * @ret string Link-layer address in human-readable format
  245. */
  246. const char * ipoib_ntoa ( const void *ll_addr ) {
  247. static char buf[45];
  248. const struct ipoib_mac *mac = ll_addr;
  249. snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
  250. htonl ( mac->flags__qpn ), htonl ( mac->gid.dwords[0] ),
  251. htonl ( mac->gid.dwords[1] ),
  252. htonl ( mac->gid.dwords[2] ),
  253. htonl ( mac->gid.dwords[3] ) );
  254. return buf;
  255. }
  256. /**
  257. * Hash multicast address
  258. *
  259. * @v af Address family
  260. * @v net_addr Network-layer address
  261. * @v ll_addr Link-layer address to fill in
  262. * @ret rc Return status code
  263. */
  264. static int ipoib_mc_hash ( unsigned int af __unused,
  265. const void *net_addr __unused,
  266. void *ll_addr __unused ) {
  267. return -ENOTSUP;
  268. }
  269. /**
  270. * Generate Mellanox Ethernet-compatible compressed link-layer address
  271. *
  272. * @v ll_addr Link-layer address
  273. * @v eth_addr Ethernet-compatible address to fill in
  274. */
  275. static int ipoib_mlx_eth_addr ( const union ib_guid *guid,
  276. uint8_t *eth_addr ) {
  277. eth_addr[0] = ( ( guid->bytes[3] == 2 ) ? 0x00 : 0x02 );
  278. eth_addr[1] = guid->bytes[1];
  279. eth_addr[2] = guid->bytes[2];
  280. eth_addr[3] = guid->bytes[5];
  281. eth_addr[4] = guid->bytes[6];
  282. eth_addr[5] = guid->bytes[7];
  283. return 0;
  284. }
  285. /** An IPoIB Ethernet-compatible compressed link-layer address generator */
  286. struct ipoib_eth_addr_handler {
  287. /** GUID byte 1 */
  288. uint8_t byte1;
  289. /** GUID byte 2 */
  290. uint8_t byte2;
  291. /** Handler */
  292. int ( * eth_addr ) ( const union ib_guid *guid,
  293. uint8_t *eth_addr );
  294. };
  295. /** IPoIB Ethernet-compatible compressed link-layer address generators */
  296. static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
  297. { 0x02, 0xc9, ipoib_mlx_eth_addr },
  298. };
  299. /**
  300. * Generate Ethernet-compatible compressed link-layer address
  301. *
  302. * @v ll_addr Link-layer address
  303. * @v eth_addr Ethernet-compatible address to fill in
  304. */
  305. static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
  306. const struct ipoib_mac *ipoib_addr = ll_addr;
  307. const union ib_guid *guid = &ipoib_addr->gid.s.guid;
  308. struct ipoib_eth_addr_handler *handler;
  309. unsigned int i;
  310. for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
  311. sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
  312. handler = &ipoib_eth_addr_handlers[i];
  313. if ( ( handler->byte1 == guid->bytes[1] ) &&
  314. ( handler->byte2 == guid->bytes[2] ) ) {
  315. return handler->eth_addr ( guid, eth_addr );
  316. }
  317. }
  318. return -ENOTSUP;
  319. }
  320. /** IPoIB protocol */
  321. struct ll_protocol ipoib_protocol __ll_protocol = {
  322. .name = "IPoIB",
  323. .ll_proto = htons ( ARPHRD_INFINIBAND ),
  324. .hw_addr_len = sizeof ( union ib_guid ),
  325. .ll_addr_len = IPOIB_ALEN,
  326. .ll_header_len = IPOIB_HLEN,
  327. .push = ipoib_push,
  328. .pull = ipoib_pull,
  329. .init_addr = ipoib_init_addr,
  330. .ntoa = ipoib_ntoa,
  331. .mc_hash = ipoib_mc_hash,
  332. .eth_addr = ipoib_eth_addr,
  333. };
  334. /**
  335. * Allocate IPoIB device
  336. *
  337. * @v priv_size Size of driver private data
  338. * @ret netdev Network device, or NULL
  339. */
  340. struct net_device * alloc_ipoibdev ( size_t priv_size ) {
  341. struct net_device *netdev;
  342. netdev = alloc_netdev ( priv_size );
  343. if ( netdev ) {
  344. netdev->ll_protocol = &ipoib_protocol;
  345. netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
  346. netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
  347. }
  348. return netdev;
  349. }
  350. /****************************************************************************
  351. *
  352. * IPoIB network device
  353. *
  354. ****************************************************************************
  355. */
  356. /**
  357. * Transmit packet via IPoIB network device
  358. *
  359. * @v netdev Network device
  360. * @v iobuf I/O buffer
  361. * @ret rc Return status code
  362. */
  363. static int ipoib_transmit ( struct net_device *netdev,
  364. struct io_buffer *iobuf ) {
  365. struct ipoib_device *ipoib = netdev->priv;
  366. struct ib_device *ibdev = ipoib->ibdev;
  367. struct ipoib_hdr *ipoib_hdr;
  368. struct ipoib_peer *dest;
  369. struct ib_address_vector av;
  370. int rc;
  371. /* Sanity check */
  372. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  373. DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
  374. return -EINVAL;
  375. }
  376. ipoib_hdr = iobuf->data;
  377. /* Attempting transmission while link is down will put the
  378. * queue pair into an error state, so don't try it.
  379. */
  380. if ( ! ib_link_ok ( ibdev ) )
  381. return -ENETUNREACH;
  382. /* Identify destination address */
  383. dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  384. if ( ! dest )
  385. return -ENXIO;
  386. ipoib_hdr->u.reserved = 0;
  387. /* Construct address vector */
  388. memset ( &av, 0, sizeof ( av ) );
  389. av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
  390. av.gid_present = 1;
  391. memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
  392. if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
  393. /* Path not resolved yet */
  394. return rc;
  395. }
  396. return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
  397. }
  398. /**
  399. * Handle IPoIB send completion
  400. *
  401. * @v ibdev Infiniband device
  402. * @v qp Queue pair
  403. * @v iobuf I/O buffer
  404. * @v rc Completion status code
  405. */
  406. static void ipoib_complete_send ( struct ib_device *ibdev __unused,
  407. struct ib_queue_pair *qp,
  408. struct io_buffer *iobuf, int rc ) {
  409. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  410. netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
  411. }
  412. /**
  413. * Handle IPoIB receive completion
  414. *
  415. * @v ibdev Infiniband device
  416. * @v qp Queue pair
  417. * @v av Address vector, or NULL
  418. * @v iobuf I/O buffer
  419. * @v rc Completion status code
  420. */
  421. static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
  422. struct ib_queue_pair *qp,
  423. struct ib_address_vector *av,
  424. struct io_buffer *iobuf, int rc ) {
  425. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  426. struct net_device *netdev = ipoib->netdev;
  427. struct ipoib_hdr *ipoib_hdr;
  428. struct ipoib_mac ll_src;
  429. struct ipoib_peer *src;
  430. /* Record errors */
  431. if ( rc != 0 ) {
  432. netdev_rx_err ( netdev, iobuf, rc );
  433. return;
  434. }
  435. /* Sanity check */
  436. if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
  437. DBGC ( ipoib, "IPoIB %p received packet too short to "
  438. "contain IPoIB header\n", ipoib );
  439. DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
  440. netdev_rx_err ( netdev, iobuf, -EIO );
  441. return;
  442. }
  443. ipoib_hdr = iobuf->data;
  444. if ( ! av ) {
  445. DBGC ( ipoib, "IPoIB %p received packet without address "
  446. "vector\n", ipoib );
  447. netdev_rx_err ( netdev, iobuf, -ENOTTY );
  448. return;
  449. }
  450. /* Parse source address */
  451. if ( av->gid_present ) {
  452. ll_src.flags__qpn = htonl ( av->qpn );
  453. memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
  454. src = ipoib_cache_peer ( &ll_src );
  455. ipoib_hdr->u.peer.src = src->key;
  456. }
  457. /* Hand off to network layer */
  458. netdev_rx ( netdev, iobuf );
  459. }
  460. /** IPoIB completion operations */
  461. static struct ib_completion_queue_operations ipoib_cq_op = {
  462. .complete_send = ipoib_complete_send,
  463. .complete_recv = ipoib_complete_recv,
  464. };
  465. /**
  466. * Poll IPoIB network device
  467. *
  468. * @v netdev Network device
  469. */
  470. static void ipoib_poll ( struct net_device *netdev ) {
  471. struct ipoib_device *ipoib = netdev->priv;
  472. struct ib_device *ibdev = ipoib->ibdev;
  473. ib_poll_eq ( ibdev );
  474. }
  475. /**
  476. * Handle IPv4 broadcast multicast group join completion
  477. *
  478. * @v ibdev Infiniband device
  479. * @v qp Queue pair
  480. * @v membership Multicast group membership
  481. * @v rc Status code
  482. * @v mad Response MAD (or NULL on error)
  483. */
  484. void ipoib_join_complete ( struct ib_device *ibdev __unused,
  485. struct ib_queue_pair *qp __unused,
  486. struct ib_mc_membership *membership, int rc,
  487. union ib_mad *mad __unused ) {
  488. struct ipoib_device *ipoib = container_of ( membership,
  489. struct ipoib_device, broadcast_membership );
  490. /* Record join status as link status */
  491. netdev_link_err ( ipoib->netdev, rc );
  492. }
  493. /**
  494. * Join IPv4 broadcast multicast group
  495. *
  496. * @v ipoib IPoIB device
  497. * @ret rc Return status code
  498. */
  499. static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
  500. int rc;
  501. if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
  502. &ipoib->broadcast_membership,
  503. &ipoib->broadcast.gid,
  504. ipoib_join_complete ) ) != 0 ) {
  505. DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
  506. ipoib, strerror ( rc ) );
  507. return rc;
  508. }
  509. ipoib->broadcast_joined = 1;
  510. return 0;
  511. }
  512. /**
  513. * Leave IPv4 broadcast multicast group
  514. *
  515. * @v ipoib IPoIB device
  516. */
  517. static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
  518. if ( ipoib->broadcast_joined ) {
  519. ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
  520. &ipoib->broadcast_membership );
  521. ipoib->broadcast_joined = 0;
  522. }
  523. }
  524. /**
  525. * Handle link status change
  526. *
  527. * @v ibdev Infiniband device
  528. */
  529. static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
  530. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  531. struct ipoib_device *ipoib = netdev->priv;
  532. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  533. int rc;
  534. /* Leave existing broadcast group */
  535. ipoib_leave_broadcast_group ( ipoib );
  536. /* Update MAC address based on potentially-new GID prefix */
  537. memcpy ( &mac->gid.s.prefix, &ibdev->gid.s.prefix,
  538. sizeof ( mac->gid.s.prefix ) );
  539. /* Update broadcast GID based on potentially-new partition key */
  540. ipoib->broadcast.gid.words[2] =
  541. htons ( ibdev->pkey | IB_PKEY_FULL );
  542. /* Set net device link state to reflect Infiniband link state */
  543. rc = ib_link_rc ( ibdev );
  544. netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
  545. /* Join new broadcast group */
  546. if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) &&
  547. ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
  548. DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
  549. "%s\n", ipoib, strerror ( rc ) );
  550. netdev_link_err ( netdev, rc );
  551. return;
  552. }
  553. }
  554. /**
  555. * Open IPoIB network device
  556. *
  557. * @v netdev Network device
  558. * @ret rc Return status code
  559. */
  560. static int ipoib_open ( struct net_device *netdev ) {
  561. struct ipoib_device *ipoib = netdev->priv;
  562. struct ib_device *ibdev = ipoib->ibdev;
  563. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  564. int rc;
  565. /* Open IB device */
  566. if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
  567. DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
  568. ipoib, strerror ( rc ) );
  569. goto err_ib_open;
  570. }
  571. /* Allocate completion queue */
  572. ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
  573. if ( ! ipoib->cq ) {
  574. DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
  575. ipoib );
  576. rc = -ENOMEM;
  577. goto err_create_cq;
  578. }
  579. /* Allocate queue pair */
  580. ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
  581. IPOIB_NUM_SEND_WQES, ipoib->cq,
  582. IPOIB_NUM_RECV_WQES, ipoib->cq );
  583. if ( ! ipoib->qp ) {
  584. DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
  585. ipoib );
  586. rc = -ENOMEM;
  587. goto err_create_qp;
  588. }
  589. ib_qp_set_ownerdata ( ipoib->qp, ipoib );
  590. /* Update MAC address with QPN */
  591. mac->flags__qpn = htonl ( ipoib->qp->qpn );
  592. /* Fill receive rings */
  593. ib_refill_recv ( ibdev, ipoib->qp );
  594. /* Fake a link status change to join the broadcast group */
  595. ipoib_link_state_changed ( ibdev );
  596. return 0;
  597. ib_destroy_qp ( ibdev, ipoib->qp );
  598. err_create_qp:
  599. ib_destroy_cq ( ibdev, ipoib->cq );
  600. err_create_cq:
  601. ib_close ( ibdev );
  602. err_ib_open:
  603. return rc;
  604. }
  605. /**
  606. * Close IPoIB network device
  607. *
  608. * @v netdev Network device
  609. */
  610. static void ipoib_close ( struct net_device *netdev ) {
  611. struct ipoib_device *ipoib = netdev->priv;
  612. struct ib_device *ibdev = ipoib->ibdev;
  613. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  614. /* Leave broadcast group */
  615. ipoib_leave_broadcast_group ( ipoib );
  616. /* Remove QPN from MAC address */
  617. mac->flags__qpn = 0;
  618. /* Tear down the queues */
  619. ib_destroy_qp ( ibdev, ipoib->qp );
  620. ib_destroy_cq ( ibdev, ipoib->cq );
  621. /* Close IB device */
  622. ib_close ( ibdev );
  623. }
  624. /** IPoIB network device operations */
  625. static struct net_device_operations ipoib_operations = {
  626. .open = ipoib_open,
  627. .close = ipoib_close,
  628. .transmit = ipoib_transmit,
  629. .poll = ipoib_poll,
  630. };
  631. /**
  632. * Probe IPoIB device
  633. *
  634. * @v ibdev Infiniband device
  635. * @ret rc Return status code
  636. */
  637. static int ipoib_probe ( struct ib_device *ibdev ) {
  638. struct net_device *netdev;
  639. struct ipoib_device *ipoib;
  640. int rc;
  641. /* Allocate network device */
  642. netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
  643. if ( ! netdev )
  644. return -ENOMEM;
  645. netdev_init ( netdev, &ipoib_operations );
  646. ipoib = netdev->priv;
  647. ib_set_ownerdata ( ibdev, netdev );
  648. netdev->dev = ibdev->dev;
  649. memset ( ipoib, 0, sizeof ( *ipoib ) );
  650. ipoib->netdev = netdev;
  651. ipoib->ibdev = ibdev;
  652. /* Extract hardware address */
  653. memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
  654. sizeof ( ibdev->gid.s.guid ) );
  655. /* Set default broadcast address */
  656. memcpy ( &ipoib->broadcast, &ipoib_broadcast,
  657. sizeof ( ipoib->broadcast ) );
  658. netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
  659. /* Register network device */
  660. if ( ( rc = register_netdev ( netdev ) ) != 0 )
  661. goto err_register_netdev;
  662. return 0;
  663. err_register_netdev:
  664. netdev_nullify ( netdev );
  665. netdev_put ( netdev );
  666. return rc;
  667. }
  668. /**
  669. * Remove IPoIB device
  670. *
  671. * @v ibdev Infiniband device
  672. */
  673. static void ipoib_remove ( struct ib_device *ibdev ) {
  674. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  675. unregister_netdev ( netdev );
  676. netdev_nullify ( netdev );
  677. netdev_put ( netdev );
  678. }
  679. /** IPoIB driver */
  680. struct ib_driver ipoib_driver __ib_driver = {
  681. .name = "IPoIB",
  682. .probe = ipoib_probe,
  683. .notify = ipoib_link_state_changed,
  684. .remove = ipoib_remove,
  685. };