You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

eoib.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. /*
  2. * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdio.h>
  25. #include <string.h>
  26. #include <errno.h>
  27. #include <ipxe/errortab.h>
  28. #include <ipxe/malloc.h>
  29. #include <ipxe/iobuf.h>
  30. #include <ipxe/if_ether.h>
  31. #include <ipxe/netdevice.h>
  32. #include <ipxe/ethernet.h>
  33. #include <ipxe/infiniband.h>
  34. #include <ipxe/ib_mcast.h>
  35. #include <ipxe/ib_pathrec.h>
  36. #include <ipxe/eoib.h>
  37. /** @file
  38. *
  39. * Ethernet over Infiniband
  40. *
  41. */
  42. /** Number of EoIB send work queue entries */
  43. #define EOIB_NUM_SEND_WQES 8
  44. /** Number of EoIB receive work queue entries */
  45. #define EOIB_NUM_RECV_WQES 4
  46. /** Number of EoIB completion queue entries */
  47. #define EOIB_NUM_CQES 16
  48. /** Link status for "broadcast join in progress" */
  49. #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
  50. #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
  51. ( EINFO_EINPROGRESS, 0x01, "Joining" )
  52. /** Human-readable message for the link status */
  53. struct errortab eoib_errors[] __errortab = {
  54. __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
  55. };
  56. /** List of EoIB devices */
  57. static LIST_HEAD ( eoib_devices );
  58. static struct net_device_operations eoib_operations;
  59. /****************************************************************************
  60. *
  61. * EoIB peer cache
  62. *
  63. ****************************************************************************
  64. */
  65. /** An EoIB peer cache entry */
  66. struct eoib_peer {
  67. /** List of EoIB peer cache entries */
  68. struct list_head list;
  69. /** Ethernet MAC */
  70. uint8_t mac[ETH_ALEN];
  71. /** Infiniband address vector */
  72. struct ib_address_vector av;
  73. };
  74. /**
  75. * Find EoIB peer cache entry
  76. *
  77. * @v eoib EoIB device
  78. * @v mac Ethernet MAC
  79. * @ret peer EoIB peer, or NULL if not found
  80. */
  81. static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
  82. const uint8_t *mac ) {
  83. struct eoib_peer *peer;
  84. /* Find peer cache entry */
  85. list_for_each_entry ( peer, &eoib->peers, list ) {
  86. if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
  87. /* Move peer to start of list */
  88. list_del ( &peer->list );
  89. list_add ( &peer->list, &eoib->peers );
  90. return peer;
  91. }
  92. }
  93. return NULL;
  94. }
  95. /**
  96. * Create EoIB peer cache entry
  97. *
  98. * @v eoib EoIB device
  99. * @v mac Ethernet MAC
  100. * @ret peer EoIB peer, or NULL on error
  101. */
  102. static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
  103. const uint8_t *mac ) {
  104. struct eoib_peer *peer;
  105. /* Allocate and initialise peer cache entry */
  106. peer = zalloc ( sizeof ( *peer ) );
  107. if ( peer ) {
  108. memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
  109. list_add ( &peer->list, &eoib->peers );
  110. }
  111. return peer;
  112. }
  113. /**
  114. * Flush EoIB peer cache
  115. *
  116. * @v eoib EoIB device
  117. */
  118. static void eoib_flush_peers ( struct eoib_device *eoib ) {
  119. struct eoib_peer *peer;
  120. struct eoib_peer *tmp;
  121. list_for_each_entry_safe ( peer, tmp, &eoib->peers, list ) {
  122. list_del ( &peer->list );
  123. free ( peer );
  124. }
  125. }
  126. /**
  127. * Discard some entries from the peer cache
  128. *
  129. * @ret discarded Number of cached items discarded
  130. */
  131. static unsigned int eoib_discard ( void ) {
  132. struct net_device *netdev;
  133. struct eoib_device *eoib;
  134. struct eoib_peer *peer;
  135. unsigned int discarded = 0;
  136. /* Try to discard one cache entry for each EoIB device */
  137. for_each_netdev ( netdev ) {
  138. /* Skip non-EoIB devices */
  139. if ( netdev->op != &eoib_operations )
  140. continue;
  141. eoib = netdev->priv;
  142. /* Discard least recently used cache entry (if any) */
  143. list_for_each_entry_reverse ( peer, &eoib->peers, list ) {
  144. list_del ( &peer->list );
  145. free ( peer );
  146. discarded++;
  147. break;
  148. }
  149. }
  150. return discarded;
  151. }
  152. /** EoIB cache discarder */
  153. struct cache_discarder eoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
  154. .discard = eoib_discard,
  155. };
  156. /**
  157. * Find destination address vector
  158. *
  159. * @v eoib EoIB device
  160. * @v mac Ethernet MAC
  161. * @ret av Address vector, or NULL to send as broadcast
  162. */
  163. static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
  164. const uint8_t *mac ) {
  165. struct ib_device *ibdev = eoib->ibdev;
  166. struct eoib_peer *peer;
  167. int rc;
  168. /* If this is a broadcast or multicast MAC address, then send
  169. * this packet as a broadcast.
  170. */
  171. if ( is_multicast_ether_addr ( mac ) ) {
  172. DBGCP ( eoib, "EoIB %s %s TX multicast\n",
  173. eoib->name, eth_ntoa ( mac ) );
  174. return NULL;
  175. }
  176. /* If we have no peer cache entry, then create one and send
  177. * this packet as a broadcast.
  178. */
  179. peer = eoib_find_peer ( eoib, mac );
  180. if ( ! peer ) {
  181. DBGC ( eoib, "EoIB %s %s TX unknown\n",
  182. eoib->name, eth_ntoa ( mac ) );
  183. eoib_create_peer ( eoib, mac );
  184. return NULL;
  185. }
  186. /* If we have not yet recorded a received GID and QPN for this
  187. * peer cache entry, then send this packet as a broadcast.
  188. */
  189. if ( ! peer->av.gid_present ) {
  190. DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
  191. eoib->name, eth_ntoa ( mac ) );
  192. return NULL;
  193. }
  194. /* If we have not yet resolved a path to this peer, then send
  195. * this packet as a broadcast.
  196. */
  197. if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
  198. DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
  199. eoib->name, eth_ntoa ( mac ) );
  200. return NULL;
  201. }
  202. /* Force use of GRH even for local destinations */
  203. peer->av.gid_present = 1;
  204. /* We have a fully resolved peer: send this packet as a
  205. * unicast.
  206. */
  207. DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
  208. eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
  209. return &peer->av;
  210. }
  211. /**
  212. * Record source address vector
  213. *
  214. * @v eoib EoIB device
  215. * @v mac Ethernet MAC
  216. * @v lid Infiniband LID
  217. */
  218. static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
  219. const struct ib_address_vector *av ) {
  220. const union ib_gid *gid = &av->gid;
  221. unsigned long qpn = av->qpn;
  222. struct eoib_peer *peer;
  223. /* Sanity checks */
  224. if ( ! av->gid_present ) {
  225. DBGC ( eoib, "EoIB %s %s RX with no GID\n",
  226. eoib->name, eth_ntoa ( mac ) );
  227. return;
  228. }
  229. /* Find peer cache entry (if any) */
  230. peer = eoib_find_peer ( eoib, mac );
  231. if ( ! peer ) {
  232. DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
  233. eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
  234. return;
  235. }
  236. /* Some dubious EoIB implementations utilise an Ethernet-to-
  237. * EoIB gateway that will send packets from the wrong QPN.
  238. */
  239. if ( eoib_has_gateway ( eoib ) &&
  240. ( memcmp ( gid, &eoib->gateway.gid, sizeof ( *gid ) ) == 0 ) ) {
  241. qpn = eoib->gateway.qpn;
  242. }
  243. /* Do nothing if peer cache entry is complete and correct */
  244. if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
  245. DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
  246. eoib->name, eth_ntoa ( mac ) );
  247. return;
  248. }
  249. /* Update peer cache entry */
  250. peer->av.qpn = qpn;
  251. peer->av.qkey = eoib->broadcast.qkey;
  252. peer->av.gid_present = 1;
  253. memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
  254. DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
  255. eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
  256. }
  257. /****************************************************************************
  258. *
  259. * EoIB network device
  260. *
  261. ****************************************************************************
  262. */
  263. /**
  264. * Transmit packet via EoIB network device
  265. *
  266. * @v netdev Network device
  267. * @v iobuf I/O buffer
  268. * @ret rc Return status code
  269. */
  270. static int eoib_transmit ( struct net_device *netdev,
  271. struct io_buffer *iobuf ) {
  272. struct eoib_device *eoib = netdev->priv;
  273. struct eoib_header *eoib_hdr;
  274. struct ethhdr *ethhdr;
  275. struct ib_address_vector *av;
  276. size_t zlen;
  277. /* Sanity checks */
  278. assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
  279. assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
  280. /* Look up destination address vector */
  281. ethhdr = iobuf->data;
  282. av = eoib_tx_av ( eoib, ethhdr->h_dest );
  283. /* Prepend EoIB header */
  284. eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
  285. eoib_hdr->magic = htons ( EOIB_MAGIC );
  286. eoib_hdr->reserved = 0;
  287. /* Pad buffer to minimum Ethernet frame size */
  288. zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
  289. assert ( zlen <= IOB_ZLEN );
  290. if ( iob_len ( iobuf ) < zlen )
  291. iob_pad ( iobuf, zlen );
  292. /* If we have no unicast address then send as a broadcast,
  293. * with a duplicate sent to the gateway if applicable.
  294. */
  295. if ( ! av ) {
  296. av = &eoib->broadcast;
  297. if ( eoib_has_gateway ( eoib ) )
  298. eoib->duplicate ( eoib, iobuf );
  299. }
  300. /* Post send work queue entry */
  301. return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
  302. }
  303. /**
  304. * Handle EoIB send completion
  305. *
  306. * @v ibdev Infiniband device
  307. * @v qp Queue pair
  308. * @v iobuf I/O buffer
  309. * @v rc Completion status code
  310. */
  311. static void eoib_complete_send ( struct ib_device *ibdev __unused,
  312. struct ib_queue_pair *qp,
  313. struct io_buffer *iobuf, int rc ) {
  314. struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
  315. netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
  316. }
  317. /**
  318. * Handle EoIB receive completion
  319. *
  320. * @v ibdev Infiniband device
  321. * @v qp Queue pair
  322. * @v dest Destination address vector, or NULL
  323. * @v source Source address vector, or NULL
  324. * @v iobuf I/O buffer
  325. * @v rc Completion status code
  326. */
  327. static void eoib_complete_recv ( struct ib_device *ibdev __unused,
  328. struct ib_queue_pair *qp,
  329. struct ib_address_vector *dest __unused,
  330. struct ib_address_vector *source,
  331. struct io_buffer *iobuf, int rc ) {
  332. struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
  333. struct net_device *netdev = eoib->netdev;
  334. struct eoib_header *eoib_hdr;
  335. struct ethhdr *ethhdr;
  336. /* Record errors */
  337. if ( rc != 0 ) {
  338. netdev_rx_err ( netdev, iobuf, rc );
  339. return;
  340. }
  341. /* Sanity check */
  342. if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
  343. DBGC ( eoib, "EoIB %s received packet too short to "
  344. "contain EoIB and Ethernet headers\n", eoib->name );
  345. DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
  346. netdev_rx_err ( netdev, iobuf, -EIO );
  347. return;
  348. }
  349. if ( ! source ) {
  350. DBGC ( eoib, "EoIB %s received packet without address "
  351. "vector\n", eoib->name );
  352. netdev_rx_err ( netdev, iobuf, -ENOTTY );
  353. return;
  354. }
  355. /* Strip EoIB header */
  356. iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
  357. /* Update neighbour cache entry, if any */
  358. ethhdr = iobuf->data;
  359. eoib_rx_av ( eoib, ethhdr->h_source, source );
  360. /* Hand off to network layer */
  361. netdev_rx ( netdev, iobuf );
  362. }
  363. /** EoIB completion operations */
  364. static struct ib_completion_queue_operations eoib_cq_op = {
  365. .complete_send = eoib_complete_send,
  366. .complete_recv = eoib_complete_recv,
  367. };
  368. /** EoIB queue pair operations */
  369. static struct ib_queue_pair_operations eoib_qp_op = {
  370. .alloc_iob = alloc_iob,
  371. };
  372. /**
  373. * Poll EoIB network device
  374. *
  375. * @v netdev Network device
  376. */
  377. static void eoib_poll ( struct net_device *netdev ) {
  378. struct eoib_device *eoib = netdev->priv;
  379. struct ib_device *ibdev = eoib->ibdev;
  380. /* Poll Infiniband device */
  381. ib_poll_eq ( ibdev );
  382. /* Poll the retry timers (required for EoIB multicast join) */
  383. retry_poll();
  384. }
  385. /**
  386. * Handle EoIB broadcast multicast group join completion
  387. *
  388. * @v membership Multicast group membership
  389. * @v rc Status code
  390. */
  391. static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
  392. struct eoib_device *eoib =
  393. container_of ( membership, struct eoib_device, membership );
  394. /* Record join status as link status */
  395. netdev_link_err ( eoib->netdev, rc );
  396. }
  397. /**
  398. * Join EoIB broadcast multicast group
  399. *
  400. * @v eoib EoIB device
  401. * @ret rc Return status code
  402. */
  403. static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
  404. int rc;
  405. /* Join multicast group */
  406. if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
  407. &eoib->membership, &eoib->broadcast,
  408. eoib->mask, eoib_join_complete ) ) != 0 ) {
  409. DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
  410. eoib->name, strerror ( rc ) );
  411. return rc;
  412. }
  413. return 0;
  414. }
  415. /**
  416. * Leave EoIB broadcast multicast group
  417. *
  418. * @v eoib EoIB device
  419. */
  420. static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
  421. /* Leave multicast group */
  422. ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
  423. }
  424. /**
  425. * Handle link status change
  426. *
  427. * @v eoib EoIB device
  428. */
  429. static void eoib_link_state_changed ( struct eoib_device *eoib ) {
  430. struct net_device *netdev = eoib->netdev;
  431. struct ib_device *ibdev = eoib->ibdev;
  432. int rc;
  433. /* Leave existing broadcast group */
  434. if ( eoib->qp )
  435. eoib_leave_broadcast_group ( eoib );
  436. /* Update broadcast GID based on potentially-new partition key */
  437. eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
  438. /* Set net device link state to reflect Infiniband link state */
  439. rc = ib_link_rc ( ibdev );
  440. netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
  441. /* Join new broadcast group */
  442. if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
  443. ( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
  444. DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
  445. "%s\n", eoib->name, strerror ( rc ) );
  446. netdev_link_err ( netdev, rc );
  447. return;
  448. }
  449. }
  450. /**
  451. * Open EoIB network device
  452. *
  453. * @v netdev Network device
  454. * @ret rc Return status code
  455. */
  456. static int eoib_open ( struct net_device *netdev ) {
  457. struct eoib_device *eoib = netdev->priv;
  458. struct ib_device *ibdev = eoib->ibdev;
  459. int rc;
  460. /* Open IB device */
  461. if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
  462. DBGC ( eoib, "EoIB %s could not open %s: %s\n",
  463. eoib->name, ibdev->name, strerror ( rc ) );
  464. goto err_ib_open;
  465. }
  466. /* Allocate completion queue */
  467. if ( ( rc = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op,
  468. &eoib->cq ) ) != 0 ) {
  469. DBGC ( eoib, "EoIB %s could not create completion queue: %s\n",
  470. eoib->name, strerror ( rc ) );
  471. goto err_create_cq;
  472. }
  473. /* Allocate queue pair */
  474. if ( ( rc = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
  475. eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
  476. &eoib_qp_op, netdev->name, &eoib->qp ) )!=0){
  477. DBGC ( eoib, "EoIB %s could not create queue pair: %s\n",
  478. eoib->name, strerror ( rc ) );
  479. goto err_create_qp;
  480. }
  481. ib_qp_set_ownerdata ( eoib->qp, eoib );
  482. /* Fill receive rings */
  483. ib_refill_recv ( ibdev, eoib->qp );
  484. /* Fake a link status change to join the broadcast group */
  485. eoib_link_state_changed ( eoib );
  486. return 0;
  487. ib_destroy_qp ( ibdev, eoib->qp );
  488. eoib->qp = NULL;
  489. err_create_qp:
  490. ib_destroy_cq ( ibdev, eoib->cq );
  491. eoib->cq = NULL;
  492. err_create_cq:
  493. ib_close ( ibdev );
  494. err_ib_open:
  495. return rc;
  496. }
  497. /**
  498. * Close EoIB network device
  499. *
  500. * @v netdev Network device
  501. */
  502. static void eoib_close ( struct net_device *netdev ) {
  503. struct eoib_device *eoib = netdev->priv;
  504. struct ib_device *ibdev = eoib->ibdev;
  505. /* Flush peer cache */
  506. eoib_flush_peers ( eoib );
  507. /* Leave broadcast group */
  508. eoib_leave_broadcast_group ( eoib );
  509. /* Tear down the queues */
  510. ib_destroy_qp ( ibdev, eoib->qp );
  511. eoib->qp = NULL;
  512. ib_destroy_cq ( ibdev, eoib->cq );
  513. eoib->cq = NULL;
  514. /* Close IB device */
  515. ib_close ( ibdev );
  516. }
  517. /** EoIB network device operations */
  518. static struct net_device_operations eoib_operations = {
  519. .open = eoib_open,
  520. .close = eoib_close,
  521. .transmit = eoib_transmit,
  522. .poll = eoib_poll,
  523. };
  524. /**
  525. * Create EoIB device
  526. *
  527. * @v ibdev Infiniband device
  528. * @v hw_addr Ethernet MAC
  529. * @v broadcast Broadcast address vector
  530. * @v name Interface name (or NULL to use default)
  531. * @ret rc Return status code
  532. */
  533. int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
  534. struct ib_address_vector *broadcast, const char *name ) {
  535. struct net_device *netdev;
  536. struct eoib_device *eoib;
  537. int rc;
  538. /* Allocate network device */
  539. netdev = alloc_etherdev ( sizeof ( *eoib ) );
  540. if ( ! netdev ) {
  541. rc = -ENOMEM;
  542. goto err_alloc;
  543. }
  544. netdev_init ( netdev, &eoib_operations );
  545. eoib = netdev->priv;
  546. netdev->dev = ibdev->dev;
  547. eoib->netdev = netdev;
  548. eoib->ibdev = ibdev_get ( ibdev );
  549. memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
  550. INIT_LIST_HEAD ( &eoib->peers );
  551. /* Set MAC address */
  552. memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
  553. /* Set interface name, if applicable */
  554. if ( name )
  555. snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
  556. eoib->name = netdev->name;
  557. /* Add to list of EoIB devices */
  558. list_add_tail ( &eoib->list, &eoib_devices );
  559. /* Register network device */
  560. if ( ( rc = register_netdev ( netdev ) ) != 0 )
  561. goto err_register;
  562. DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
  563. eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
  564. DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
  565. eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
  566. return 0;
  567. unregister_netdev ( netdev );
  568. err_register:
  569. list_del ( &eoib->list );
  570. ibdev_put ( ibdev );
  571. netdev_nullify ( netdev );
  572. netdev_put ( netdev );
  573. err_alloc:
  574. return rc;
  575. }
  576. /**
  577. * Find EoIB device
  578. *
  579. * @v ibdev Infiniband device
  580. * @v hw_addr Original Ethernet MAC
  581. * @ret eoib EoIB device
  582. */
  583. struct eoib_device * eoib_find ( struct ib_device *ibdev,
  584. const uint8_t *hw_addr ) {
  585. struct eoib_device *eoib;
  586. list_for_each_entry ( eoib, &eoib_devices, list ) {
  587. if ( ( eoib->ibdev == ibdev ) &&
  588. ( memcmp ( eoib->netdev->hw_addr, hw_addr,
  589. ETH_ALEN ) == 0 ) )
  590. return eoib;
  591. }
  592. return NULL;
  593. }
  594. /**
  595. * Remove EoIB device
  596. *
  597. * @v eoib EoIB device
  598. */
  599. void eoib_destroy ( struct eoib_device *eoib ) {
  600. struct net_device *netdev = eoib->netdev;
  601. /* Unregister network device */
  602. unregister_netdev ( netdev );
  603. /* Remove from list of network devices */
  604. list_del ( &eoib->list );
  605. /* Drop reference to Infiniband device */
  606. ibdev_put ( eoib->ibdev );
  607. /* Free network device */
  608. DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
  609. netdev_nullify ( netdev );
  610. netdev_put ( netdev );
  611. }
  612. /**
  613. * Probe EoIB device
  614. *
  615. * @v ibdev Infiniband device
  616. * @ret rc Return status code
  617. */
  618. static int eoib_probe ( struct ib_device *ibdev __unused ) {
  619. /* EoIB devices are not created automatically */
  620. return 0;
  621. }
  622. /**
  623. * Handle device or link status change
  624. *
  625. * @v ibdev Infiniband device
  626. */
  627. static void eoib_notify ( struct ib_device *ibdev ) {
  628. struct eoib_device *eoib;
  629. /* Handle link status change for any attached EoIB devices */
  630. list_for_each_entry ( eoib, &eoib_devices, list ) {
  631. if ( eoib->ibdev != ibdev )
  632. continue;
  633. eoib_link_state_changed ( eoib );
  634. }
  635. }
  636. /**
  637. * Remove EoIB device
  638. *
  639. * @v ibdev Infiniband device
  640. */
  641. static void eoib_remove ( struct ib_device *ibdev ) {
  642. struct eoib_device *eoib;
  643. struct eoib_device *tmp;
  644. /* Remove any attached EoIB devices */
  645. list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
  646. if ( eoib->ibdev != ibdev )
  647. continue;
  648. eoib_destroy ( eoib );
  649. }
  650. }
  651. /** EoIB driver */
  652. struct ib_driver eoib_driver __ib_driver = {
  653. .name = "EoIB",
  654. .probe = eoib_probe,
  655. .notify = eoib_notify,
  656. .remove = eoib_remove,
  657. };
  658. /****************************************************************************
  659. *
  660. * EoIB heartbeat packets
  661. *
  662. ****************************************************************************
  663. */
  664. /**
  665. * Silently ignore incoming EoIB heartbeat packets
  666. *
  667. * @v iobuf I/O buffer
  668. * @v netdev Network device
  669. * @v ll_source Link-layer source address
  670. * @v flags Packet flags
  671. * @ret rc Return status code
  672. */
  673. static int eoib_heartbeat_rx ( struct io_buffer *iobuf,
  674. struct net_device *netdev __unused,
  675. const void *ll_dest __unused,
  676. const void *ll_source __unused,
  677. unsigned int flags __unused ) {
  678. free_iob ( iobuf );
  679. return 0;
  680. }
  681. /**
  682. * Transcribe EoIB heartbeat address
  683. *
  684. * @v net_addr EoIB heartbeat address
  685. * @ret string "<EoIB>"
  686. *
  687. * This operation is meaningless for the EoIB heartbeat protocol.
  688. */
  689. static const char * eoib_heartbeat_ntoa ( const void *net_addr __unused ) {
  690. return "<EoIB>";
  691. }
  692. /** EoIB heartbeat network protocol */
  693. struct net_protocol eoib_heartbeat_protocol __net_protocol = {
  694. .name = "EoIB",
  695. .net_proto = htons ( EOIB_MAGIC ),
  696. .rx = eoib_heartbeat_rx,
  697. .ntoa = eoib_heartbeat_ntoa,
  698. };
  699. /****************************************************************************
  700. *
  701. * EoIB gateway
  702. *
  703. ****************************************************************************
  704. *
  705. * Some dubious EoIB implementations require all broadcast traffic to
  706. * be sent twice: once to the actual broadcast group, and once as a
  707. * unicast to the EoIB-to-Ethernet gateway. This somewhat curious
  708. * design arises since the EoIB-to-Ethernet gateway hardware lacks the
  709. * ability to attach a queue pair to a multicast GID (or LID), and so
  710. * cannot receive traffic sent to the broadcast group.
  711. *
  712. */
  713. /**
  714. * Transmit duplicate packet to the EoIB gateway
  715. *
  716. * @v eoib EoIB device
  717. * @v original Original I/O buffer
  718. */
  719. static void eoib_duplicate ( struct eoib_device *eoib,
  720. struct io_buffer *original ) {
  721. struct net_device *netdev = eoib->netdev;
  722. struct ib_device *ibdev = eoib->ibdev;
  723. struct ib_address_vector *av = &eoib->gateway;
  724. size_t len = iob_len ( original );
  725. struct io_buffer *copy;
  726. int rc;
  727. /* Create copy of I/O buffer */
  728. copy = alloc_iob ( len );
  729. if ( ! copy ) {
  730. rc = -ENOMEM;
  731. goto err_alloc;
  732. }
  733. memcpy ( iob_put ( copy, len ), original->data, len );
  734. /* Append to network device's transmit queue */
  735. list_add_tail ( &copy->list, &original->list );
  736. /* Resolve path to gateway */
  737. if ( ( rc = ib_resolve_path ( ibdev, av ) ) != 0 ) {
  738. DBGC ( eoib, "EoIB %s no path to gateway: %s\n",
  739. eoib->name, strerror ( rc ) );
  740. goto err_path;
  741. }
  742. /* Force use of GRH even for local destinations */
  743. av->gid_present = 1;
  744. /* Post send work queue entry */
  745. if ( ( rc = ib_post_send ( eoib->ibdev, eoib->qp, av, copy ) ) != 0 )
  746. goto err_post_send;
  747. return;
  748. err_post_send:
  749. err_path:
  750. list_del ( &copy->list );
  751. err_alloc:
  752. netdev_tx_err ( netdev, copy, rc );
  753. }
  754. /**
  755. * Set EoIB gateway
  756. *
  757. * @v eoib EoIB device
  758. * @v av Address vector, or NULL to clear gateway
  759. */
  760. void eoib_set_gateway ( struct eoib_device *eoib,
  761. struct ib_address_vector *av ) {
  762. if ( av ) {
  763. DBGC ( eoib, "EoIB %s using gateway " IB_GID_FMT "\n",
  764. eoib->name, IB_GID_ARGS ( &av->gid ) );
  765. memcpy ( &eoib->gateway, av, sizeof ( eoib->gateway ) );
  766. eoib->duplicate = eoib_duplicate;
  767. } else {
  768. DBGC ( eoib, "EoIB %s not using gateway\n", eoib->name );
  769. eoib->duplicate = NULL;
  770. }
  771. }