You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <gpxe/list.h>
  28. #include <gpxe/if_arp.h>
  29. #include <gpxe/netdevice.h>
  30. #include <gpxe/iobuf.h>
  31. #include <gpxe/ipoib.h>
  32. #include <gpxe/process.h>
  33. #include <gpxe/infiniband.h>
  34. #include <gpxe/ib_gma.h>
  35. /** @file
  36. *
  37. * Infiniband protocol
  38. *
  39. */
  40. /** List of Infiniband devices */
  41. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  42. /***************************************************************************
  43. *
  44. * Completion queues
  45. *
  46. ***************************************************************************
  47. */
  48. /**
  49. * Create completion queue
  50. *
  51. * @v ibdev Infiniband device
  52. * @v num_cqes Number of completion queue entries
  53. * @v op Completion queue operations
  54. * @ret cq New completion queue
  55. */
  56. struct ib_completion_queue *
  57. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  58. struct ib_completion_queue_operations *op ) {
  59. struct ib_completion_queue *cq;
  60. int rc;
  61. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  62. /* Allocate and initialise data structure */
  63. cq = zalloc ( sizeof ( *cq ) );
  64. if ( ! cq )
  65. goto err_alloc_cq;
  66. cq->ibdev = ibdev;
  67. list_add ( &cq->list, &ibdev->cqs );
  68. cq->num_cqes = num_cqes;
  69. INIT_LIST_HEAD ( &cq->work_queues );
  70. cq->op = op;
  71. /* Perform device-specific initialisation and get CQN */
  72. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  73. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  74. "queue: %s\n", ibdev, strerror ( rc ) );
  75. goto err_dev_create_cq;
  76. }
  77. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  78. "with CQN %#lx\n", ibdev, num_cqes, cq,
  79. ib_cq_get_drvdata ( cq ), cq->cqn );
  80. return cq;
  81. ibdev->op->destroy_cq ( ibdev, cq );
  82. err_dev_create_cq:
  83. list_del ( &cq->list );
  84. free ( cq );
  85. err_alloc_cq:
  86. return NULL;
  87. }
  88. /**
  89. * Destroy completion queue
  90. *
  91. * @v ibdev Infiniband device
  92. * @v cq Completion queue
  93. */
  94. void ib_destroy_cq ( struct ib_device *ibdev,
  95. struct ib_completion_queue *cq ) {
  96. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  97. ibdev, cq->cqn );
  98. assert ( list_empty ( &cq->work_queues ) );
  99. ibdev->op->destroy_cq ( ibdev, cq );
  100. list_del ( &cq->list );
  101. free ( cq );
  102. }
  103. /**
  104. * Poll completion queue
  105. *
  106. * @v ibdev Infiniband device
  107. * @v cq Completion queue
  108. */
  109. void ib_poll_cq ( struct ib_device *ibdev,
  110. struct ib_completion_queue *cq ) {
  111. struct ib_work_queue *wq;
  112. /* Poll completion queue */
  113. ibdev->op->poll_cq ( ibdev, cq );
  114. /* Refill receive work queues */
  115. list_for_each_entry ( wq, &cq->work_queues, list ) {
  116. if ( ! wq->is_send )
  117. ib_refill_recv ( ibdev, wq->qp );
  118. }
  119. }
  120. /***************************************************************************
  121. *
  122. * Work queues
  123. *
  124. ***************************************************************************
  125. */
  126. /**
  127. * Create queue pair
  128. *
  129. * @v ibdev Infiniband device
  130. * @v type Queue pair type
  131. * @v num_send_wqes Number of send work queue entries
  132. * @v send_cq Send completion queue
  133. * @v num_recv_wqes Number of receive work queue entries
  134. * @v recv_cq Receive completion queue
  135. * @ret qp Queue pair
  136. *
  137. * The queue pair will be left in the INIT state; you must call
  138. * ib_modify_qp() before it is ready to use for sending and receiving.
  139. */
  140. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  141. enum ib_queue_pair_type type,
  142. unsigned int num_send_wqes,
  143. struct ib_completion_queue *send_cq,
  144. unsigned int num_recv_wqes,
  145. struct ib_completion_queue *recv_cq ) {
  146. struct ib_queue_pair *qp;
  147. size_t total_size;
  148. int rc;
  149. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  150. /* Allocate and initialise data structure */
  151. total_size = ( sizeof ( *qp ) +
  152. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  153. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  154. qp = zalloc ( total_size );
  155. if ( ! qp )
  156. goto err_alloc_qp;
  157. qp->ibdev = ibdev;
  158. list_add ( &qp->list, &ibdev->qps );
  159. qp->type = type;
  160. qp->send.qp = qp;
  161. qp->send.is_send = 1;
  162. qp->send.cq = send_cq;
  163. list_add ( &qp->send.list, &send_cq->work_queues );
  164. qp->send.psn = ( random() & 0xffffffUL );
  165. qp->send.num_wqes = num_send_wqes;
  166. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  167. qp->recv.qp = qp;
  168. qp->recv.cq = recv_cq;
  169. list_add ( &qp->recv.list, &recv_cq->work_queues );
  170. qp->recv.psn = ( random() & 0xffffffUL );
  171. qp->recv.num_wqes = num_recv_wqes;
  172. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  173. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  174. INIT_LIST_HEAD ( &qp->mgids );
  175. /* Perform device-specific initialisation and get QPN */
  176. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  177. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  178. "%s\n", ibdev, strerror ( rc ) );
  179. goto err_dev_create_qp;
  180. }
  181. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  182. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  183. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  184. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  185. qp->recv.iobufs );
  186. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  187. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  188. ( ( ( void * ) qp ) + total_size ) );
  189. /* Calculate externally-visible QPN */
  190. switch ( type ) {
  191. case IB_QPT_SMI:
  192. qp->ext_qpn = IB_QPN_SMI;
  193. break;
  194. case IB_QPT_GSI:
  195. qp->ext_qpn = IB_QPN_GSI;
  196. break;
  197. default:
  198. qp->ext_qpn = qp->qpn;
  199. break;
  200. }
  201. if ( qp->ext_qpn != qp->qpn ) {
  202. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  203. ibdev, qp->qpn, qp->ext_qpn );
  204. }
  205. return qp;
  206. ibdev->op->destroy_qp ( ibdev, qp );
  207. err_dev_create_qp:
  208. list_del ( &qp->send.list );
  209. list_del ( &qp->recv.list );
  210. list_del ( &qp->list );
  211. free ( qp );
  212. err_alloc_qp:
  213. return NULL;
  214. }
  215. /**
  216. * Modify queue pair
  217. *
  218. * @v ibdev Infiniband device
  219. * @v qp Queue pair
  220. * @v av New address vector, if applicable
  221. * @ret rc Return status code
  222. */
  223. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  224. int rc;
  225. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  226. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  227. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  228. ibdev, qp->qpn, strerror ( rc ) );
  229. return rc;
  230. }
  231. return 0;
  232. }
  233. /**
  234. * Destroy queue pair
  235. *
  236. * @v ibdev Infiniband device
  237. * @v qp Queue pair
  238. */
  239. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  240. struct io_buffer *iobuf;
  241. unsigned int i;
  242. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  243. ibdev, qp->qpn );
  244. assert ( list_empty ( &qp->mgids ) );
  245. /* Perform device-specific destruction */
  246. ibdev->op->destroy_qp ( ibdev, qp );
  247. /* Complete any remaining I/O buffers with errors */
  248. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  249. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  250. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  251. }
  252. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  253. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  254. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  255. -ECANCELED );
  256. }
  257. }
  258. /* Remove work queues from completion queue */
  259. list_del ( &qp->send.list );
  260. list_del ( &qp->recv.list );
  261. /* Free QP */
  262. list_del ( &qp->list );
  263. free ( qp );
  264. }
  265. /**
  266. * Find queue pair by QPN
  267. *
  268. * @v ibdev Infiniband device
  269. * @v qpn Queue pair number
  270. * @ret qp Queue pair, or NULL
  271. */
  272. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  273. unsigned long qpn ) {
  274. struct ib_queue_pair *qp;
  275. list_for_each_entry ( qp, &ibdev->qps, list ) {
  276. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  277. return qp;
  278. }
  279. return NULL;
  280. }
  281. /**
  282. * Find queue pair by multicast GID
  283. *
  284. * @v ibdev Infiniband device
  285. * @v gid Multicast GID
  286. * @ret qp Queue pair, or NULL
  287. */
  288. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  289. struct ib_gid *gid ) {
  290. struct ib_queue_pair *qp;
  291. struct ib_multicast_gid *mgid;
  292. list_for_each_entry ( qp, &ibdev->qps, list ) {
  293. list_for_each_entry ( mgid, &qp->mgids, list ) {
  294. if ( memcmp ( &mgid->gid, gid,
  295. sizeof ( mgid->gid ) ) == 0 ) {
  296. return qp;
  297. }
  298. }
  299. }
  300. return NULL;
  301. }
  302. /**
  303. * Find work queue belonging to completion queue
  304. *
  305. * @v cq Completion queue
  306. * @v qpn Queue pair number
  307. * @v is_send Find send work queue (rather than receive)
  308. * @ret wq Work queue, or NULL if not found
  309. */
  310. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  311. unsigned long qpn, int is_send ) {
  312. struct ib_work_queue *wq;
  313. list_for_each_entry ( wq, &cq->work_queues, list ) {
  314. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  315. return wq;
  316. }
  317. return NULL;
  318. }
  319. /**
  320. * Post send work queue entry
  321. *
  322. * @v ibdev Infiniband device
  323. * @v qp Queue pair
  324. * @v av Address vector
  325. * @v iobuf I/O buffer
  326. * @ret rc Return status code
  327. */
  328. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  329. struct ib_address_vector *av,
  330. struct io_buffer *iobuf ) {
  331. struct ib_address_vector av_copy;
  332. int rc;
  333. /* Check queue fill level */
  334. if ( qp->send.fill >= qp->send.num_wqes ) {
  335. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  336. ibdev, qp->qpn );
  337. return -ENOBUFS;
  338. }
  339. /* Use default address vector if none specified */
  340. if ( ! av )
  341. av = &qp->av;
  342. /* Make modifiable copy of address vector */
  343. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  344. av = &av_copy;
  345. /* Fill in optional parameters in address vector */
  346. if ( ! av->qkey )
  347. av->qkey = qp->qkey;
  348. if ( ! av->rate )
  349. av->rate = IB_RATE_2_5;
  350. /* Post to hardware */
  351. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  352. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  353. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  354. return rc;
  355. }
  356. qp->send.fill++;
  357. return 0;
  358. }
  359. /**
  360. * Post receive work queue entry
  361. *
  362. * @v ibdev Infiniband device
  363. * @v qp Queue pair
  364. * @v iobuf I/O buffer
  365. * @ret rc Return status code
  366. */
  367. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  368. struct io_buffer *iobuf ) {
  369. int rc;
  370. /* Check packet length */
  371. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  372. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  373. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  374. return -EINVAL;
  375. }
  376. /* Check queue fill level */
  377. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  378. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  379. ibdev, qp->qpn );
  380. return -ENOBUFS;
  381. }
  382. /* Post to hardware */
  383. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  384. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  385. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  386. return rc;
  387. }
  388. qp->recv.fill++;
  389. return 0;
  390. }
  391. /**
  392. * Complete send work queue entry
  393. *
  394. * @v ibdev Infiniband device
  395. * @v qp Queue pair
  396. * @v iobuf I/O buffer
  397. * @v rc Completion status code
  398. */
  399. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  400. struct io_buffer *iobuf, int rc ) {
  401. if ( qp->send.cq->op->complete_send ) {
  402. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  403. } else {
  404. free_iob ( iobuf );
  405. }
  406. qp->send.fill--;
  407. }
  408. /**
  409. * Complete receive work queue entry
  410. *
  411. * @v ibdev Infiniband device
  412. * @v qp Queue pair
  413. * @v av Address vector
  414. * @v iobuf I/O buffer
  415. * @v rc Completion status code
  416. */
  417. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  418. struct ib_address_vector *av,
  419. struct io_buffer *iobuf, int rc ) {
  420. if ( qp->recv.cq->op->complete_recv ) {
  421. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  422. } else {
  423. free_iob ( iobuf );
  424. }
  425. qp->recv.fill--;
  426. }
  427. /**
  428. * Refill receive work queue
  429. *
  430. * @v ibdev Infiniband device
  431. * @v qp Queue pair
  432. */
  433. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  434. struct io_buffer *iobuf;
  435. int rc;
  436. /* Keep filling while unfilled entries remain */
  437. while ( qp->recv.fill < qp->recv.num_wqes ) {
  438. /* Allocate I/O buffer */
  439. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  440. if ( ! iobuf ) {
  441. /* Non-fatal; we will refill on next attempt */
  442. return;
  443. }
  444. /* Post I/O buffer */
  445. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  446. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  447. ibdev, strerror ( rc ) );
  448. free_iob ( iobuf );
  449. /* Give up */
  450. return;
  451. }
  452. }
  453. }
  454. /***************************************************************************
  455. *
  456. * Link control
  457. *
  458. ***************************************************************************
  459. */
  460. /**
  461. * Open port
  462. *
  463. * @v ibdev Infiniband device
  464. * @ret rc Return status code
  465. */
  466. int ib_open ( struct ib_device *ibdev ) {
  467. int rc;
  468. /* Increment device open request counter */
  469. if ( ibdev->open_count++ > 0 ) {
  470. /* Device was already open; do nothing */
  471. return 0;
  472. }
  473. /* Create subnet management agent */
  474. ibdev->sma = ib_create_gma ( ibdev, IB_QPT_SMI );
  475. if ( ! ibdev->sma ) {
  476. DBGC ( ibdev, "IBDEV %p could not create SMA\n", ibdev );
  477. rc = -ENOMEM;
  478. goto err_create_sma;
  479. }
  480. /* Create general management agent */
  481. ibdev->gma = ib_create_gma ( ibdev, IB_QPT_GSI );
  482. if ( ! ibdev->gma ) {
  483. DBGC ( ibdev, "IBDEV %p could not create GMA\n", ibdev );
  484. rc = -ENOMEM;
  485. goto err_create_gma;
  486. }
  487. /* Open device */
  488. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  489. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  490. ibdev, strerror ( rc ) );
  491. goto err_open;
  492. }
  493. assert ( ibdev->open_count == 1 );
  494. return 0;
  495. ibdev->op->close ( ibdev );
  496. err_open:
  497. ib_destroy_gma ( ibdev->gma );
  498. err_create_gma:
  499. ib_destroy_gma ( ibdev->sma );
  500. err_create_sma:
  501. assert ( ibdev->open_count == 1 );
  502. ibdev->open_count = 0;
  503. return rc;
  504. }
  505. /**
  506. * Close port
  507. *
  508. * @v ibdev Infiniband device
  509. */
  510. void ib_close ( struct ib_device *ibdev ) {
  511. /* Decrement device open request counter */
  512. ibdev->open_count--;
  513. /* Close device if this was the last remaining requested opening */
  514. if ( ibdev->open_count == 0 ) {
  515. ib_destroy_gma ( ibdev->gma );
  516. ib_destroy_gma ( ibdev->sma );
  517. ibdev->op->close ( ibdev );
  518. }
  519. }
  520. /***************************************************************************
  521. *
  522. * Multicast
  523. *
  524. ***************************************************************************
  525. */
  526. /**
  527. * Attach to multicast group
  528. *
  529. * @v ibdev Infiniband device
  530. * @v qp Queue pair
  531. * @v gid Multicast GID
  532. * @ret rc Return status code
  533. *
  534. * Note that this function handles only the local device's attachment
  535. * to the multicast GID; it does not issue the relevant MADs to join
  536. * the multicast group on the subnet.
  537. */
  538. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  539. struct ib_gid *gid ) {
  540. struct ib_multicast_gid *mgid;
  541. int rc;
  542. /* Add to software multicast GID list */
  543. mgid = zalloc ( sizeof ( *mgid ) );
  544. if ( ! mgid ) {
  545. rc = -ENOMEM;
  546. goto err_alloc_mgid;
  547. }
  548. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  549. list_add ( &mgid->list, &qp->mgids );
  550. /* Add to hardware multicast GID list */
  551. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  552. goto err_dev_mcast_attach;
  553. return 0;
  554. err_dev_mcast_attach:
  555. list_del ( &mgid->list );
  556. free ( mgid );
  557. err_alloc_mgid:
  558. return rc;
  559. }
  560. /**
  561. * Detach from multicast group
  562. *
  563. * @v ibdev Infiniband device
  564. * @v qp Queue pair
  565. * @v gid Multicast GID
  566. */
  567. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  568. struct ib_gid *gid ) {
  569. struct ib_multicast_gid *mgid;
  570. /* Remove from hardware multicast GID list */
  571. ibdev->op->mcast_detach ( ibdev, qp, gid );
  572. /* Remove from software multicast GID list */
  573. list_for_each_entry ( mgid, &qp->mgids, list ) {
  574. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  575. list_del ( &mgid->list );
  576. free ( mgid );
  577. break;
  578. }
  579. }
  580. }
  581. /***************************************************************************
  582. *
  583. * Miscellaneous
  584. *
  585. ***************************************************************************
  586. */
  587. /**
  588. * Get Infiniband HCA information
  589. *
  590. * @v ibdev Infiniband device
  591. * @ret hca_guid HCA GUID
  592. * @ret num_ports Number of ports
  593. */
  594. int ib_get_hca_info ( struct ib_device *ibdev,
  595. struct ib_gid_half *hca_guid ) {
  596. struct ib_device *tmp;
  597. int num_ports = 0;
  598. /* Search for IB devices with the same physical device to
  599. * identify port count and a suitable Node GUID.
  600. */
  601. for_each_ibdev ( tmp ) {
  602. if ( tmp->dev != ibdev->dev )
  603. continue;
  604. if ( num_ports == 0 ) {
  605. memcpy ( hca_guid, &tmp->gid.u.half[1],
  606. sizeof ( *hca_guid ) );
  607. }
  608. num_ports++;
  609. }
  610. return num_ports;
  611. }
  612. /**
  613. * Set port information
  614. *
  615. * @v ibdev Infiniband device
  616. * @v mad Set port information MAD
  617. */
  618. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  619. int rc;
  620. /* Adapters with embedded SMAs do not need to support this method */
  621. if ( ! ibdev->op->set_port_info ) {
  622. DBGC ( ibdev, "IBDEV %p does not support setting port "
  623. "information\n", ibdev );
  624. return -ENOTSUP;
  625. }
  626. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  627. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  628. ibdev, strerror ( rc ) );
  629. return rc;
  630. }
  631. return 0;
  632. };
  633. /**
  634. * Set partition key table
  635. *
  636. * @v ibdev Infiniband device
  637. * @v mad Set partition key table MAD
  638. */
  639. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  640. int rc;
  641. /* Adapters with embedded SMAs do not need to support this method */
  642. if ( ! ibdev->op->set_pkey_table ) {
  643. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  644. "key table\n", ibdev );
  645. return -ENOTSUP;
  646. }
  647. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  648. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  649. "%s\n", ibdev, strerror ( rc ) );
  650. return rc;
  651. }
  652. return 0;
  653. };
  654. /***************************************************************************
  655. *
  656. * Event queues
  657. *
  658. ***************************************************************************
  659. */
  660. /**
  661. * Handle Infiniband link state change
  662. *
  663. * @v ibdev Infiniband device
  664. */
  665. void ib_link_state_changed ( struct ib_device *ibdev ) {
  666. /* Notify IPoIB of link state change */
  667. ipoib_link_state_changed ( ibdev );
  668. }
  669. /**
  670. * Poll event queue
  671. *
  672. * @v ibdev Infiniband device
  673. */
  674. void ib_poll_eq ( struct ib_device *ibdev ) {
  675. struct ib_completion_queue *cq;
  676. /* Poll device's event queue */
  677. ibdev->op->poll_eq ( ibdev );
  678. /* Poll all completion queues */
  679. list_for_each_entry ( cq, &ibdev->cqs, list )
  680. ib_poll_cq ( ibdev, cq );
  681. }
  682. /**
  683. * Single-step the Infiniband event queue
  684. *
  685. * @v process Infiniband event queue process
  686. */
  687. static void ib_step ( struct process *process __unused ) {
  688. struct ib_device *ibdev;
  689. for_each_ibdev ( ibdev )
  690. ib_poll_eq ( ibdev );
  691. }
  692. /** Infiniband event queue process */
  693. struct process ib_process __permanent_process = {
  694. .step = ib_step,
  695. };
  696. /***************************************************************************
  697. *
  698. * Infiniband device creation/destruction
  699. *
  700. ***************************************************************************
  701. */
  702. /**
  703. * Allocate Infiniband device
  704. *
  705. * @v priv_size Size of driver private data area
  706. * @ret ibdev Infiniband device, or NULL
  707. */
  708. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  709. struct ib_device *ibdev;
  710. void *drv_priv;
  711. size_t total_len;
  712. total_len = ( sizeof ( *ibdev ) + priv_size );
  713. ibdev = zalloc ( total_len );
  714. if ( ibdev ) {
  715. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  716. ib_set_drvdata ( ibdev, drv_priv );
  717. INIT_LIST_HEAD ( &ibdev->cqs );
  718. INIT_LIST_HEAD ( &ibdev->qps );
  719. ibdev->lid = IB_LID_NONE;
  720. ibdev->pkey = IB_PKEY_NONE;
  721. }
  722. return ibdev;
  723. }
  724. /**
  725. * Register Infiniband device
  726. *
  727. * @v ibdev Infiniband device
  728. * @ret rc Return status code
  729. */
  730. int register_ibdev ( struct ib_device *ibdev ) {
  731. int rc;
  732. /* Add to device list */
  733. ibdev_get ( ibdev );
  734. list_add_tail ( &ibdev->list, &ib_devices );
  735. /* Add IPoIB device */
  736. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  737. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  738. ibdev, strerror ( rc ) );
  739. goto err_ipoib_probe;
  740. }
  741. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  742. ibdev->dev->name );
  743. return 0;
  744. err_ipoib_probe:
  745. list_del ( &ibdev->list );
  746. ibdev_put ( ibdev );
  747. return rc;
  748. }
  749. /**
  750. * Unregister Infiniband device
  751. *
  752. * @v ibdev Infiniband device
  753. */
  754. void unregister_ibdev ( struct ib_device *ibdev ) {
  755. /* Close device */
  756. ipoib_remove ( ibdev );
  757. /* Remove from device list */
  758. list_del ( &ibdev->list );
  759. ibdev_put ( ibdev );
  760. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  761. }