您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

ipoib.c 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdio.h>
  21. #include <unistd.h>
  22. #include <string.h>
  23. #include <byteswap.h>
  24. #include <errno.h>
  25. #include <gpxe/if_arp.h>
  26. #include <gpxe/iobuf.h>
  27. #include <gpxe/netdevice.h>
  28. #include <gpxe/infiniband.h>
  29. #include <gpxe/ib_qset.h>
  30. #include <gpxe/ib_pathrec.h>
  31. #include <gpxe/ib_mcast.h>
  32. #include <gpxe/ipoib.h>
  33. /** @file
  34. *
  35. * IP over Infiniband
  36. */
  37. /** Number of IPoIB data send work queue entries */
  38. #define IPOIB_DATA_NUM_SEND_WQES 2
  39. /** Number of IPoIB data receive work queue entries */
  40. #define IPOIB_DATA_NUM_RECV_WQES 4
  41. /** Number of IPoIB data completion entries */
  42. #define IPOIB_DATA_NUM_CQES 8
  43. /** Number of IPoIB metadata send work queue entries */
  44. #define IPOIB_META_NUM_SEND_WQES 2
  45. /** Number of IPoIB metadata receive work queue entries */
  46. #define IPOIB_META_NUM_RECV_WQES 2
  47. /** Number of IPoIB metadata completion entries */
  48. #define IPOIB_META_NUM_CQES 8
  49. /** An IPoIB device */
  50. struct ipoib_device {
  51. /** Network device */
  52. struct net_device *netdev;
  53. /** Underlying Infiniband device */
  54. struct ib_device *ibdev;
  55. /** Data queue set */
  56. struct ib_queue_set data;
  57. /** Data queue set */
  58. struct ib_queue_set meta;
  59. /** Broadcast MAC */
  60. struct ipoib_mac broadcast;
  61. /** Joined to multicast group
  62. *
  63. * This flag indicates whether or not we have initiated the
  64. * join to the IPv4 multicast group.
  65. */
  66. int broadcast_joined;
  67. };
  68. /** Broadcast IPoIB address */
  69. static struct ipoib_mac ipoib_broadcast = {
  70. .qpn = htonl ( IB_QPN_BROADCAST ),
  71. .gid.u.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
  72. 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
  73. };
  74. /****************************************************************************
  75. *
  76. * IPoIB peer cache
  77. *
  78. ****************************************************************************
  79. */
  80. /**
  81. * IPoIB peer address
  82. *
  83. * The IPoIB link-layer header is only four bytes long and so does not
  84. * have sufficient room to store IPoIB MAC address(es). We therefore
  85. * maintain a cache of MAC addresses identified by a single-byte key,
  86. * and abuse the spare two bytes within the link-layer header to
  87. * communicate these MAC addresses between the link-layer code and the
  88. * netdevice driver.
  89. */
  90. struct ipoib_peer {
  91. /** Key */
  92. uint8_t key;
  93. /** MAC address */
  94. struct ipoib_mac mac;
  95. };
  96. /** Number of IPoIB peer cache entries
  97. *
  98. * Must be a power of two.
  99. */
  100. #define IPOIB_NUM_CACHED_PEERS 4
  101. /** IPoIB peer address cache */
  102. static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
  103. /** Oldest IPoIB peer cache entry index */
  104. static unsigned int ipoib_peer_cache_idx = 1;
  105. /**
  106. * Look up cached peer by key
  107. *
  108. * @v key Peer cache key
  109. * @ret peer Peer cache entry, or NULL
  110. */
  111. static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
  112. struct ipoib_peer *peer;
  113. unsigned int i;
  114. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  115. peer = &ipoib_peer_cache[i];
  116. if ( peer->key == key )
  117. return peer;
  118. }
  119. if ( key != 0 ) {
  120. DBG ( "IPoIB warning: peer cache lost track of key %x while "
  121. "still in use\n", key );
  122. }
  123. return NULL;
  124. }
  125. /**
  126. * Store GID and QPN in peer cache
  127. *
  128. * @v gid Peer GID
  129. * @v qpn Peer QPN
  130. * @ret peer Peer cache entry
  131. */
  132. static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
  133. struct ipoib_peer *peer;
  134. unsigned int key;
  135. unsigned int i;
  136. /* Look for existing cache entry */
  137. for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
  138. peer = &ipoib_peer_cache[i];
  139. if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
  140. return peer;
  141. }
  142. /* No entry found: create a new one */
  143. key = ipoib_peer_cache_idx++;
  144. peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
  145. if ( peer->key )
  146. DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
  147. memset ( peer, 0, sizeof ( *peer ) );
  148. peer->key = key;
  149. memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
  150. DBG ( "IPoIB peer %x has MAC %s\n",
  151. peer->key, ipoib_ntoa ( &peer->mac ) );
  152. return peer;
  153. }
  154. /****************************************************************************
  155. *
  156. * IPoIB link layer
  157. *
  158. ****************************************************************************
  159. */
  160. /**
  161. * Add IPoIB link-layer header
  162. *
  163. * @v netdev Network device
  164. * @v iobuf I/O buffer
  165. * @v ll_dest Link-layer destination address
  166. * @v ll_source Source link-layer address
  167. * @v net_proto Network-layer protocol, in network-byte order
  168. * @ret rc Return status code
  169. */
  170. static int ipoib_push ( struct net_device *netdev __unused,
  171. struct io_buffer *iobuf, const void *ll_dest,
  172. const void *ll_source __unused, uint16_t net_proto ) {
  173. struct ipoib_hdr *ipoib_hdr =
  174. iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
  175. const struct ipoib_mac *dest_mac = ll_dest;
  176. const struct ipoib_mac *src_mac = ll_source;
  177. struct ipoib_peer *dest;
  178. struct ipoib_peer *src;
  179. /* Add link-layer addresses to cache */
  180. dest = ipoib_cache_peer ( dest_mac );
  181. src = ipoib_cache_peer ( src_mac );
  182. /* Build IPoIB header */
  183. ipoib_hdr->proto = net_proto;
  184. ipoib_hdr->u.peer.dest = dest->key;
  185. ipoib_hdr->u.peer.src = src->key;
  186. return 0;
  187. }
  188. /**
  189. * Remove IPoIB link-layer header
  190. *
  191. * @v netdev Network device
  192. * @v iobuf I/O buffer
  193. * @ret ll_dest Link-layer destination address
  194. * @ret ll_source Source link-layer address
  195. * @ret net_proto Network-layer protocol, in network-byte order
  196. * @ret rc Return status code
  197. */
  198. static int ipoib_pull ( struct net_device *netdev,
  199. struct io_buffer *iobuf, const void **ll_dest,
  200. const void **ll_source, uint16_t *net_proto ) {
  201. struct ipoib_device *ipoib = netdev->priv;
  202. struct ipoib_hdr *ipoib_hdr = iobuf->data;
  203. struct ipoib_peer *dest;
  204. struct ipoib_peer *source;
  205. /* Sanity check */
  206. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  207. DBG ( "IPoIB packet too short for link-layer header\n" );
  208. DBG_HD ( iobuf->data, iob_len ( iobuf ) );
  209. return -EINVAL;
  210. }
  211. /* Strip off IPoIB header */
  212. iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
  213. /* Identify source and destination addresses, and clear
  214. * reserved word in IPoIB header
  215. */
  216. dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  217. source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
  218. ipoib_hdr->u.reserved = 0;
  219. /* Fill in required fields */
  220. *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
  221. *ll_source = ( source ? &source->mac : &ipoib->broadcast );
  222. *net_proto = ipoib_hdr->proto;
  223. return 0;
  224. }
  225. /**
  226. * Transcribe IPoIB address
  227. *
  228. * @v ll_addr Link-layer address
  229. * @ret string Link-layer address in human-readable format
  230. */
  231. const char * ipoib_ntoa ( const void *ll_addr ) {
  232. static char buf[45];
  233. const struct ipoib_mac *mac = ll_addr;
  234. snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
  235. htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
  236. htonl ( mac->gid.u.dwords[1] ),
  237. htonl ( mac->gid.u.dwords[2] ),
  238. htonl ( mac->gid.u.dwords[3] ) );
  239. return buf;
  240. }
  241. /**
  242. * Hash multicast address
  243. *
  244. * @v af Address family
  245. * @v net_addr Network-layer address
  246. * @v ll_addr Link-layer address to fill in
  247. * @ret rc Return status code
  248. */
  249. static int ipoib_mc_hash ( unsigned int af __unused,
  250. const void *net_addr __unused,
  251. void *ll_addr __unused ) {
  252. return -ENOTSUP;
  253. }
  254. /** IPoIB protocol */
  255. struct ll_protocol ipoib_protocol __ll_protocol = {
  256. .name = "IPoIB",
  257. .ll_proto = htons ( ARPHRD_INFINIBAND ),
  258. .ll_addr_len = IPOIB_ALEN,
  259. .ll_header_len = IPOIB_HLEN,
  260. .push = ipoib_push,
  261. .pull = ipoib_pull,
  262. .ntoa = ipoib_ntoa,
  263. .mc_hash = ipoib_mc_hash,
  264. };
  265. /**
  266. * Allocate IPoIB device
  267. *
  268. * @v priv_size Size of driver private data
  269. * @ret netdev Network device, or NULL
  270. */
  271. struct net_device * alloc_ipoibdev ( size_t priv_size ) {
  272. struct net_device *netdev;
  273. netdev = alloc_netdev ( priv_size );
  274. if ( netdev ) {
  275. netdev->ll_protocol = &ipoib_protocol;
  276. netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
  277. netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
  278. }
  279. return netdev;
  280. }
  281. /****************************************************************************
  282. *
  283. * IPoIB network device
  284. *
  285. ****************************************************************************
  286. */
  287. /**
  288. * Transmit packet via IPoIB network device
  289. *
  290. * @v netdev Network device
  291. * @v iobuf I/O buffer
  292. * @ret rc Return status code
  293. */
  294. static int ipoib_transmit ( struct net_device *netdev,
  295. struct io_buffer *iobuf ) {
  296. struct ipoib_device *ipoib = netdev->priv;
  297. struct ib_device *ibdev = ipoib->ibdev;
  298. struct ipoib_hdr *ipoib_hdr;
  299. struct ipoib_peer *dest;
  300. struct ib_address_vector av;
  301. int rc;
  302. /* Sanity check */
  303. if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
  304. DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
  305. return -EINVAL;
  306. }
  307. ipoib_hdr = iobuf->data;
  308. /* Attempting transmission while link is down will put the
  309. * queue pair into an error state, so don't try it.
  310. */
  311. if ( ! ib_link_ok ( ibdev ) )
  312. return -ENETUNREACH;
  313. /* Identify destination address */
  314. dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
  315. if ( ! dest )
  316. return -ENXIO;
  317. ipoib_hdr->u.reserved = 0;
  318. /* Construct address vector */
  319. memset ( &av, 0, sizeof ( av ) );
  320. av.qpn = ntohl ( dest->mac.qpn );
  321. av.gid_present = 1;
  322. memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
  323. if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
  324. /* Path not resolved yet */
  325. return rc;
  326. }
  327. return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
  328. }
  329. /**
  330. * Handle IPoIB data send completion
  331. *
  332. * @v ibdev Infiniband device
  333. * @v qp Queue pair
  334. * @v iobuf I/O buffer
  335. * @v rc Completion status code
  336. */
  337. static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
  338. struct ib_queue_pair *qp,
  339. struct io_buffer *iobuf, int rc ) {
  340. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  341. netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
  342. }
  343. /**
  344. * Handle IPoIB data receive completion
  345. *
  346. * @v ibdev Infiniband device
  347. * @v qp Queue pair
  348. * @v av Address vector, or NULL
  349. * @v iobuf I/O buffer
  350. * @v rc Completion status code
  351. */
  352. static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
  353. struct ib_queue_pair *qp,
  354. struct ib_address_vector *av,
  355. struct io_buffer *iobuf, int rc ) {
  356. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  357. struct net_device *netdev = ipoib->netdev;
  358. struct ipoib_hdr *ipoib_hdr;
  359. struct ipoib_mac ll_src;
  360. struct ipoib_peer *src;
  361. if ( rc != 0 ) {
  362. netdev_rx_err ( netdev, iobuf, rc );
  363. return;
  364. }
  365. /* Sanity check */
  366. if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
  367. DBGC ( ipoib, "IPoIB %p received data packet too short to "
  368. "contain IPoIB header\n", ipoib );
  369. DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
  370. netdev_rx_err ( netdev, iobuf, -EIO );
  371. return;
  372. }
  373. ipoib_hdr = iobuf->data;
  374. /* Parse source address */
  375. if ( av->gid_present ) {
  376. ll_src.qpn = htonl ( av->qpn );
  377. memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
  378. src = ipoib_cache_peer ( &ll_src );
  379. ipoib_hdr->u.peer.src = src->key;
  380. }
  381. /* Hand off to network layer */
  382. netdev_rx ( netdev, iobuf );
  383. }
  384. /** IPoIB data completion operations */
  385. static struct ib_completion_queue_operations ipoib_data_cq_op = {
  386. .complete_send = ipoib_data_complete_send,
  387. .complete_recv = ipoib_data_complete_recv,
  388. };
  389. /**
  390. * Handle IPoIB metadata send completion
  391. *
  392. * @v ibdev Infiniband device
  393. * @v qp Queue pair
  394. * @v iobuf I/O buffer
  395. * @v rc Completion status code
  396. */
  397. static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
  398. struct ib_queue_pair *qp,
  399. struct io_buffer *iobuf, int rc ) {
  400. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  401. if ( rc != 0 ) {
  402. DBGC ( ipoib, "IPoIB %p metadata TX completion error: %s\n",
  403. ipoib, strerror ( rc ) );
  404. }
  405. free_iob ( iobuf );
  406. }
  407. /**
  408. * Handle IPoIB metadata receive completion
  409. *
  410. * @v ibdev Infiniband device
  411. * @v qp Queue pair
  412. * @v av Address vector, or NULL
  413. * @v iobuf I/O buffer
  414. * @v rc Completion status code
  415. */
  416. static void
  417. ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
  418. struct ib_queue_pair *qp,
  419. struct ib_address_vector *av __unused,
  420. struct io_buffer *iobuf, int rc ) {
  421. struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
  422. struct ib_mad_sa *sa;
  423. if ( rc != 0 ) {
  424. DBGC ( ipoib, "IPoIB %p metadata RX completion error: %s\n",
  425. ipoib, strerror ( rc ) );
  426. goto done;
  427. }
  428. if ( iob_len ( iobuf ) < sizeof ( *sa ) ) {
  429. DBGC ( ipoib, "IPoIB %p received metadata packet too short "
  430. "to contain reply\n", ipoib );
  431. DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
  432. goto done;
  433. }
  434. sa = iobuf->data;
  435. if ( sa->mad_hdr.status != 0 ) {
  436. DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
  437. ipoib, ntohs ( sa->mad_hdr.status ) );
  438. goto done;
  439. }
  440. switch ( sa->mad_hdr.tid[0] ) {
  441. default:
  442. DBGC ( ipoib, "IPoIB %p unwanted response:\n",
  443. ipoib );
  444. DBGC_HD ( ipoib, sa, sizeof ( *sa ) );
  445. break;
  446. }
  447. done:
  448. free_iob ( iobuf );
  449. }
  450. /** IPoIB metadata completion operations */
  451. static struct ib_completion_queue_operations ipoib_meta_cq_op = {
  452. .complete_send = ipoib_meta_complete_send,
  453. .complete_recv = ipoib_meta_complete_recv,
  454. };
  455. /**
  456. * Poll IPoIB network device
  457. *
  458. * @v netdev Network device
  459. */
  460. static void ipoib_poll ( struct net_device *netdev ) {
  461. struct ipoib_device *ipoib = netdev->priv;
  462. struct ib_device *ibdev = ipoib->ibdev;
  463. ib_poll_eq ( ibdev );
  464. }
  465. /**
  466. * Enable/disable interrupts on IPoIB network device
  467. *
  468. * @v netdev Network device
  469. * @v enable Interrupts should be enabled
  470. */
  471. static void ipoib_irq ( struct net_device *netdev __unused,
  472. int enable __unused ) {
  473. /* No implementation */
  474. }
  475. /**
  476. * Join IPv4 broadcast multicast group
  477. *
  478. * @v ipoib IPoIB device
  479. * @ret rc Return status code
  480. */
  481. static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
  482. int rc;
  483. if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->data.qp,
  484. &ipoib->broadcast.gid ) ) != 0 ) {
  485. DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
  486. ipoib, strerror ( rc ) );
  487. return rc;
  488. }
  489. ipoib->broadcast_joined = 1;
  490. return 0;
  491. }
  492. /**
  493. * Leave IPv4 broadcast multicast group
  494. *
  495. * @v ipoib IPoIB device
  496. */
  497. static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
  498. /* Detach data queue from broadcast multicast GID */
  499. if ( ipoib->broadcast_joined ) {
  500. ib_mcast_leave ( ipoib->ibdev, ipoib->data.qp,
  501. &ipoib->broadcast.gid );
  502. ipoib->broadcast_joined = 0;
  503. }
  504. }
  505. /**
  506. * Open IPoIB network device
  507. *
  508. * @v netdev Network device
  509. * @ret rc Return status code
  510. */
  511. static int ipoib_open ( struct net_device *netdev ) {
  512. struct ipoib_device *ipoib = netdev->priv;
  513. struct ib_device *ibdev = ipoib->ibdev;
  514. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  515. int rc;
  516. /* Open IB device */
  517. if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
  518. DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
  519. ipoib, strerror ( rc ) );
  520. goto err_ib_open;
  521. }
  522. /* Allocate metadata queue set */
  523. if ( ( rc = ib_create_qset ( ibdev, &ipoib->meta,
  524. IPOIB_META_NUM_CQES, &ipoib_meta_cq_op,
  525. IPOIB_META_NUM_SEND_WQES,
  526. IPOIB_META_NUM_RECV_WQES,
  527. IB_QKEY_GMA ) ) != 0 ) {
  528. DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
  529. ipoib, strerror ( rc ) );
  530. goto err_create_meta_qset;
  531. }
  532. ib_qp_set_ownerdata ( ipoib->meta.qp, ipoib );
  533. /* Allocate data queue set */
  534. if ( ( rc = ib_create_qset ( ibdev, &ipoib->data,
  535. IPOIB_DATA_NUM_CQES, &ipoib_data_cq_op,
  536. IPOIB_DATA_NUM_SEND_WQES,
  537. IPOIB_DATA_NUM_RECV_WQES,
  538. IB_QKEY_GMA ) ) != 0 ) {
  539. DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
  540. ipoib, strerror ( rc ) );
  541. goto err_create_data_qset;
  542. }
  543. ib_qp_set_ownerdata ( ipoib->data.qp, ipoib );
  544. /* Update MAC address with data QPN */
  545. mac->qpn = htonl ( ipoib->data.qp->qpn );
  546. /* Fill receive rings */
  547. ib_refill_recv ( ibdev, ipoib->meta.qp );
  548. ib_refill_recv ( ibdev, ipoib->data.qp );
  549. /* Join broadcast group */
  550. if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
  551. DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
  552. ipoib, strerror ( rc ) );
  553. goto err_join_broadcast;
  554. }
  555. return 0;
  556. err_join_broadcast:
  557. ib_destroy_qset ( ibdev, &ipoib->data );
  558. err_create_data_qset:
  559. ib_destroy_qset ( ibdev, &ipoib->meta );
  560. err_create_meta_qset:
  561. ib_close ( ibdev );
  562. err_ib_open:
  563. return rc;
  564. }
  565. /**
  566. * Close IPoIB network device
  567. *
  568. * @v netdev Network device
  569. */
  570. static void ipoib_close ( struct net_device *netdev ) {
  571. struct ipoib_device *ipoib = netdev->priv;
  572. struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  573. /* Leave broadcast group */
  574. ipoib_leave_broadcast_group ( ipoib );
  575. /* Remove data QPN from MAC address */
  576. mac->qpn = 0;
  577. /* Tear down the queues */
  578. ib_destroy_qset ( ipoib->ibdev, &ipoib->data );
  579. ib_destroy_qset ( ipoib->ibdev, &ipoib->meta );
  580. /* Close IB device */
  581. ib_close ( ipoib->ibdev );
  582. }
  583. /** IPoIB network device operations */
  584. static struct net_device_operations ipoib_operations = {
  585. .open = ipoib_open,
  586. .close = ipoib_close,
  587. .transmit = ipoib_transmit,
  588. .poll = ipoib_poll,
  589. .irq = ipoib_irq,
  590. };
  591. /**
  592. * Update IPoIB dynamic Infiniband parameters
  593. *
  594. * @v ipoib IPoIB device
  595. *
  596. * The Infiniband port GID and partition key will change at runtime,
  597. * when the link is established (or lost). The MAC address is based
  598. * on the port GID, and the broadcast GID is based on the partition
  599. * key. This function recalculates these IPoIB device parameters.
  600. */
  601. static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
  602. struct ib_device *ibdev = ipoib->ibdev;
  603. struct net_device *netdev = ipoib->netdev;
  604. struct ipoib_mac *mac;
  605. /* Calculate GID portion of MAC address based on port GID */
  606. mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
  607. memcpy ( &mac->gid, &ibdev->gid, sizeof ( mac->gid ) );
  608. /* Calculate broadcast GID based on partition key */
  609. memcpy ( &ipoib->broadcast, &ipoib_broadcast,
  610. sizeof ( ipoib->broadcast ) );
  611. ipoib->broadcast.gid.u.words[2] = htons ( ibdev->pkey );
  612. /* Set net device link state to reflect Infiniband link state */
  613. if ( ib_link_ok ( ibdev ) ) {
  614. netdev_link_up ( netdev );
  615. } else {
  616. netdev_link_down ( netdev );
  617. }
  618. }
  619. /**
  620. * Handle link status change
  621. *
  622. * @v ibdev Infiniband device
  623. */
  624. void ipoib_link_state_changed ( struct ib_device *ibdev ) {
  625. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  626. struct ipoib_device *ipoib = netdev->priv;
  627. int rc;
  628. /* Leave existing broadcast group */
  629. ipoib_leave_broadcast_group ( ipoib );
  630. /* Update MAC address and broadcast GID based on new port GID
  631. * and partition key.
  632. */
  633. ipoib_set_ib_params ( ipoib );
  634. /* Join new broadcast group */
  635. if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
  636. DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
  637. "%s\n", ipoib, strerror ( rc ) );
  638. return;
  639. }
  640. }
  641. /**
  642. * Probe IPoIB device
  643. *
  644. * @v ibdev Infiniband device
  645. * @ret rc Return status code
  646. */
  647. int ipoib_probe ( struct ib_device *ibdev ) {
  648. struct net_device *netdev;
  649. struct ipoib_device *ipoib;
  650. int rc;
  651. /* Allocate network device */
  652. netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
  653. if ( ! netdev )
  654. return -ENOMEM;
  655. netdev_init ( netdev, &ipoib_operations );
  656. ipoib = netdev->priv;
  657. ib_set_ownerdata ( ibdev, netdev );
  658. netdev->dev = ibdev->dev;
  659. netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
  660. memset ( ipoib, 0, sizeof ( *ipoib ) );
  661. ipoib->netdev = netdev;
  662. ipoib->ibdev = ibdev;
  663. /* Calculate as much of the broadcast GID and the MAC address
  664. * as we can. We won't know either of these in full until we
  665. * have link-up.
  666. */
  667. ipoib_set_ib_params ( ipoib );
  668. /* Register network device */
  669. if ( ( rc = register_netdev ( netdev ) ) != 0 )
  670. goto err_register_netdev;
  671. return 0;
  672. err_register_netdev:
  673. netdev_nullify ( netdev );
  674. netdev_put ( netdev );
  675. return rc;
  676. }
  677. /**
  678. * Remove IPoIB device
  679. *
  680. * @v ibdev Infiniband device
  681. */
  682. void ipoib_remove ( struct ib_device *ibdev ) {
  683. struct net_device *netdev = ib_get_ownerdata ( ibdev );
  684. unregister_netdev ( netdev );
  685. netdev_nullify ( netdev );
  686. netdev_put ( netdev );
  687. }