You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <gpxe/list.h>
  28. #include <gpxe/if_arp.h>
  29. #include <gpxe/netdevice.h>
  30. #include <gpxe/iobuf.h>
  31. #include <gpxe/ipoib.h>
  32. #include <gpxe/process.h>
  33. #include <gpxe/infiniband.h>
  34. #include <gpxe/ib_mi.h>
  35. #include <gpxe/ib_sma.h>
  36. /** @file
  37. *
  38. * Infiniband protocol
  39. *
  40. */
  41. /** List of Infiniband devices */
  42. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  43. /***************************************************************************
  44. *
  45. * Completion queues
  46. *
  47. ***************************************************************************
  48. */
  49. /**
  50. * Create completion queue
  51. *
  52. * @v ibdev Infiniband device
  53. * @v num_cqes Number of completion queue entries
  54. * @v op Completion queue operations
  55. * @ret cq New completion queue
  56. */
  57. struct ib_completion_queue *
  58. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  59. struct ib_completion_queue_operations *op ) {
  60. struct ib_completion_queue *cq;
  61. int rc;
  62. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  63. /* Allocate and initialise data structure */
  64. cq = zalloc ( sizeof ( *cq ) );
  65. if ( ! cq )
  66. goto err_alloc_cq;
  67. cq->ibdev = ibdev;
  68. list_add ( &cq->list, &ibdev->cqs );
  69. cq->num_cqes = num_cqes;
  70. INIT_LIST_HEAD ( &cq->work_queues );
  71. cq->op = op;
  72. /* Perform device-specific initialisation and get CQN */
  73. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  74. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  75. "queue: %s\n", ibdev, strerror ( rc ) );
  76. goto err_dev_create_cq;
  77. }
  78. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  79. "with CQN %#lx\n", ibdev, num_cqes, cq,
  80. ib_cq_get_drvdata ( cq ), cq->cqn );
  81. return cq;
  82. ibdev->op->destroy_cq ( ibdev, cq );
  83. err_dev_create_cq:
  84. list_del ( &cq->list );
  85. free ( cq );
  86. err_alloc_cq:
  87. return NULL;
  88. }
  89. /**
  90. * Destroy completion queue
  91. *
  92. * @v ibdev Infiniband device
  93. * @v cq Completion queue
  94. */
  95. void ib_destroy_cq ( struct ib_device *ibdev,
  96. struct ib_completion_queue *cq ) {
  97. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  98. ibdev, cq->cqn );
  99. assert ( list_empty ( &cq->work_queues ) );
  100. ibdev->op->destroy_cq ( ibdev, cq );
  101. list_del ( &cq->list );
  102. free ( cq );
  103. }
  104. /**
  105. * Poll completion queue
  106. *
  107. * @v ibdev Infiniband device
  108. * @v cq Completion queue
  109. */
  110. void ib_poll_cq ( struct ib_device *ibdev,
  111. struct ib_completion_queue *cq ) {
  112. struct ib_work_queue *wq;
  113. /* Poll completion queue */
  114. ibdev->op->poll_cq ( ibdev, cq );
  115. /* Refill receive work queues */
  116. list_for_each_entry ( wq, &cq->work_queues, list ) {
  117. if ( ! wq->is_send )
  118. ib_refill_recv ( ibdev, wq->qp );
  119. }
  120. }
  121. /***************************************************************************
  122. *
  123. * Work queues
  124. *
  125. ***************************************************************************
  126. */
  127. /**
  128. * Create queue pair
  129. *
  130. * @v ibdev Infiniband device
  131. * @v type Queue pair type
  132. * @v num_send_wqes Number of send work queue entries
  133. * @v send_cq Send completion queue
  134. * @v num_recv_wqes Number of receive work queue entries
  135. * @v recv_cq Receive completion queue
  136. * @ret qp Queue pair
  137. *
  138. * The queue pair will be left in the INIT state; you must call
  139. * ib_modify_qp() before it is ready to use for sending and receiving.
  140. */
  141. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  142. enum ib_queue_pair_type type,
  143. unsigned int num_send_wqes,
  144. struct ib_completion_queue *send_cq,
  145. unsigned int num_recv_wqes,
  146. struct ib_completion_queue *recv_cq ) {
  147. struct ib_queue_pair *qp;
  148. size_t total_size;
  149. int rc;
  150. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  151. /* Allocate and initialise data structure */
  152. total_size = ( sizeof ( *qp ) +
  153. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  154. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  155. qp = zalloc ( total_size );
  156. if ( ! qp )
  157. goto err_alloc_qp;
  158. qp->ibdev = ibdev;
  159. list_add ( &qp->list, &ibdev->qps );
  160. qp->type = type;
  161. qp->send.qp = qp;
  162. qp->send.is_send = 1;
  163. qp->send.cq = send_cq;
  164. list_add ( &qp->send.list, &send_cq->work_queues );
  165. qp->send.psn = ( random() & 0xffffffUL );
  166. qp->send.num_wqes = num_send_wqes;
  167. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  168. qp->recv.qp = qp;
  169. qp->recv.cq = recv_cq;
  170. list_add ( &qp->recv.list, &recv_cq->work_queues );
  171. qp->recv.psn = ( random() & 0xffffffUL );
  172. qp->recv.num_wqes = num_recv_wqes;
  173. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  174. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  175. INIT_LIST_HEAD ( &qp->mgids );
  176. /* Perform device-specific initialisation and get QPN */
  177. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  178. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  179. "%s\n", ibdev, strerror ( rc ) );
  180. goto err_dev_create_qp;
  181. }
  182. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  183. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  184. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  185. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  186. qp->recv.iobufs );
  187. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  188. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  189. ( ( ( void * ) qp ) + total_size ) );
  190. /* Calculate externally-visible QPN */
  191. switch ( type ) {
  192. case IB_QPT_SMI:
  193. qp->ext_qpn = IB_QPN_SMI;
  194. break;
  195. case IB_QPT_GSI:
  196. qp->ext_qpn = IB_QPN_GSI;
  197. break;
  198. default:
  199. qp->ext_qpn = qp->qpn;
  200. break;
  201. }
  202. if ( qp->ext_qpn != qp->qpn ) {
  203. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  204. ibdev, qp->qpn, qp->ext_qpn );
  205. }
  206. return qp;
  207. ibdev->op->destroy_qp ( ibdev, qp );
  208. err_dev_create_qp:
  209. list_del ( &qp->send.list );
  210. list_del ( &qp->recv.list );
  211. list_del ( &qp->list );
  212. free ( qp );
  213. err_alloc_qp:
  214. return NULL;
  215. }
  216. /**
  217. * Modify queue pair
  218. *
  219. * @v ibdev Infiniband device
  220. * @v qp Queue pair
  221. * @v av New address vector, if applicable
  222. * @ret rc Return status code
  223. */
  224. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  225. int rc;
  226. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  227. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  228. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  229. ibdev, qp->qpn, strerror ( rc ) );
  230. return rc;
  231. }
  232. return 0;
  233. }
  234. /**
  235. * Destroy queue pair
  236. *
  237. * @v ibdev Infiniband device
  238. * @v qp Queue pair
  239. */
  240. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  241. struct io_buffer *iobuf;
  242. unsigned int i;
  243. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  244. ibdev, qp->qpn );
  245. assert ( list_empty ( &qp->mgids ) );
  246. /* Perform device-specific destruction */
  247. ibdev->op->destroy_qp ( ibdev, qp );
  248. /* Complete any remaining I/O buffers with errors */
  249. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  250. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  251. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  252. }
  253. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  254. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  255. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  256. -ECANCELED );
  257. }
  258. }
  259. /* Remove work queues from completion queue */
  260. list_del ( &qp->send.list );
  261. list_del ( &qp->recv.list );
  262. /* Free QP */
  263. list_del ( &qp->list );
  264. free ( qp );
  265. }
  266. /**
  267. * Find queue pair by QPN
  268. *
  269. * @v ibdev Infiniband device
  270. * @v qpn Queue pair number
  271. * @ret qp Queue pair, or NULL
  272. */
  273. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  274. unsigned long qpn ) {
  275. struct ib_queue_pair *qp;
  276. list_for_each_entry ( qp, &ibdev->qps, list ) {
  277. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  278. return qp;
  279. }
  280. return NULL;
  281. }
  282. /**
  283. * Find queue pair by multicast GID
  284. *
  285. * @v ibdev Infiniband device
  286. * @v gid Multicast GID
  287. * @ret qp Queue pair, or NULL
  288. */
  289. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  290. struct ib_gid *gid ) {
  291. struct ib_queue_pair *qp;
  292. struct ib_multicast_gid *mgid;
  293. list_for_each_entry ( qp, &ibdev->qps, list ) {
  294. list_for_each_entry ( mgid, &qp->mgids, list ) {
  295. if ( memcmp ( &mgid->gid, gid,
  296. sizeof ( mgid->gid ) ) == 0 ) {
  297. return qp;
  298. }
  299. }
  300. }
  301. return NULL;
  302. }
  303. /**
  304. * Find work queue belonging to completion queue
  305. *
  306. * @v cq Completion queue
  307. * @v qpn Queue pair number
  308. * @v is_send Find send work queue (rather than receive)
  309. * @ret wq Work queue, or NULL if not found
  310. */
  311. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  312. unsigned long qpn, int is_send ) {
  313. struct ib_work_queue *wq;
  314. list_for_each_entry ( wq, &cq->work_queues, list ) {
  315. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  316. return wq;
  317. }
  318. return NULL;
  319. }
  320. /**
  321. * Post send work queue entry
  322. *
  323. * @v ibdev Infiniband device
  324. * @v qp Queue pair
  325. * @v av Address vector
  326. * @v iobuf I/O buffer
  327. * @ret rc Return status code
  328. */
  329. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  330. struct ib_address_vector *av,
  331. struct io_buffer *iobuf ) {
  332. struct ib_address_vector av_copy;
  333. int rc;
  334. /* Check queue fill level */
  335. if ( qp->send.fill >= qp->send.num_wqes ) {
  336. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  337. ibdev, qp->qpn );
  338. return -ENOBUFS;
  339. }
  340. /* Use default address vector if none specified */
  341. if ( ! av )
  342. av = &qp->av;
  343. /* Make modifiable copy of address vector */
  344. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  345. av = &av_copy;
  346. /* Fill in optional parameters in address vector */
  347. if ( ! av->qkey )
  348. av->qkey = qp->qkey;
  349. if ( ! av->rate )
  350. av->rate = IB_RATE_2_5;
  351. /* Post to hardware */
  352. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  353. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  354. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  355. return rc;
  356. }
  357. qp->send.fill++;
  358. return 0;
  359. }
  360. /**
  361. * Post receive work queue entry
  362. *
  363. * @v ibdev Infiniband device
  364. * @v qp Queue pair
  365. * @v iobuf I/O buffer
  366. * @ret rc Return status code
  367. */
  368. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  369. struct io_buffer *iobuf ) {
  370. int rc;
  371. /* Check packet length */
  372. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  373. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  374. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  375. return -EINVAL;
  376. }
  377. /* Check queue fill level */
  378. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  379. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  380. ibdev, qp->qpn );
  381. return -ENOBUFS;
  382. }
  383. /* Post to hardware */
  384. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  385. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  386. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  387. return rc;
  388. }
  389. qp->recv.fill++;
  390. return 0;
  391. }
  392. /**
  393. * Complete send work queue entry
  394. *
  395. * @v ibdev Infiniband device
  396. * @v qp Queue pair
  397. * @v iobuf I/O buffer
  398. * @v rc Completion status code
  399. */
  400. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  401. struct io_buffer *iobuf, int rc ) {
  402. if ( qp->send.cq->op->complete_send ) {
  403. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  404. } else {
  405. free_iob ( iobuf );
  406. }
  407. qp->send.fill--;
  408. }
  409. /**
  410. * Complete receive work queue entry
  411. *
  412. * @v ibdev Infiniband device
  413. * @v qp Queue pair
  414. * @v av Address vector
  415. * @v iobuf I/O buffer
  416. * @v rc Completion status code
  417. */
  418. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  419. struct ib_address_vector *av,
  420. struct io_buffer *iobuf, int rc ) {
  421. if ( qp->recv.cq->op->complete_recv ) {
  422. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  423. } else {
  424. free_iob ( iobuf );
  425. }
  426. qp->recv.fill--;
  427. }
  428. /**
  429. * Refill receive work queue
  430. *
  431. * @v ibdev Infiniband device
  432. * @v qp Queue pair
  433. */
  434. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  435. struct io_buffer *iobuf;
  436. int rc;
  437. /* Keep filling while unfilled entries remain */
  438. while ( qp->recv.fill < qp->recv.num_wqes ) {
  439. /* Allocate I/O buffer */
  440. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  441. if ( ! iobuf ) {
  442. /* Non-fatal; we will refill on next attempt */
  443. return;
  444. }
  445. /* Post I/O buffer */
  446. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  447. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  448. ibdev, strerror ( rc ) );
  449. free_iob ( iobuf );
  450. /* Give up */
  451. return;
  452. }
  453. }
  454. }
  455. /***************************************************************************
  456. *
  457. * Link control
  458. *
  459. ***************************************************************************
  460. */
  461. /**
  462. * Open port
  463. *
  464. * @v ibdev Infiniband device
  465. * @ret rc Return status code
  466. */
  467. int ib_open ( struct ib_device *ibdev ) {
  468. int rc;
  469. /* Increment device open request counter */
  470. if ( ibdev->open_count++ > 0 ) {
  471. /* Device was already open; do nothing */
  472. return 0;
  473. }
  474. /* Create subnet management interface */
  475. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  476. if ( ! ibdev->smi ) {
  477. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  478. rc = -ENOMEM;
  479. goto err_create_smi;
  480. }
  481. /* Create subnet management agent */
  482. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  483. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  484. ibdev, strerror ( rc ) );
  485. goto err_create_sma;
  486. }
  487. /* Create general services interface */
  488. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  489. if ( ! ibdev->gsi ) {
  490. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  491. rc = -ENOMEM;
  492. goto err_create_gsi;
  493. }
  494. /* Open device */
  495. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  496. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  497. ibdev, strerror ( rc ) );
  498. goto err_open;
  499. }
  500. assert ( ibdev->open_count == 1 );
  501. return 0;
  502. ibdev->op->close ( ibdev );
  503. err_open:
  504. ib_destroy_mi ( ibdev, ibdev->gsi );
  505. err_create_gsi:
  506. ib_destroy_sma ( ibdev, ibdev->smi );
  507. err_create_sma:
  508. ib_destroy_mi ( ibdev, ibdev->smi );
  509. err_create_smi:
  510. assert ( ibdev->open_count == 1 );
  511. ibdev->open_count = 0;
  512. return rc;
  513. }
  514. /**
  515. * Close port
  516. *
  517. * @v ibdev Infiniband device
  518. */
  519. void ib_close ( struct ib_device *ibdev ) {
  520. /* Decrement device open request counter */
  521. ibdev->open_count--;
  522. /* Close device if this was the last remaining requested opening */
  523. if ( ibdev->open_count == 0 ) {
  524. ib_destroy_mi ( ibdev, ibdev->gsi );
  525. ib_destroy_sma ( ibdev, ibdev->smi );
  526. ib_destroy_mi ( ibdev, ibdev->smi );
  527. ibdev->op->close ( ibdev );
  528. }
  529. }
  530. /***************************************************************************
  531. *
  532. * Multicast
  533. *
  534. ***************************************************************************
  535. */
  536. /**
  537. * Attach to multicast group
  538. *
  539. * @v ibdev Infiniband device
  540. * @v qp Queue pair
  541. * @v gid Multicast GID
  542. * @ret rc Return status code
  543. *
  544. * Note that this function handles only the local device's attachment
  545. * to the multicast GID; it does not issue the relevant MADs to join
  546. * the multicast group on the subnet.
  547. */
  548. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  549. struct ib_gid *gid ) {
  550. struct ib_multicast_gid *mgid;
  551. int rc;
  552. /* Add to software multicast GID list */
  553. mgid = zalloc ( sizeof ( *mgid ) );
  554. if ( ! mgid ) {
  555. rc = -ENOMEM;
  556. goto err_alloc_mgid;
  557. }
  558. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  559. list_add ( &mgid->list, &qp->mgids );
  560. /* Add to hardware multicast GID list */
  561. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  562. goto err_dev_mcast_attach;
  563. return 0;
  564. err_dev_mcast_attach:
  565. list_del ( &mgid->list );
  566. free ( mgid );
  567. err_alloc_mgid:
  568. return rc;
  569. }
  570. /**
  571. * Detach from multicast group
  572. *
  573. * @v ibdev Infiniband device
  574. * @v qp Queue pair
  575. * @v gid Multicast GID
  576. */
  577. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  578. struct ib_gid *gid ) {
  579. struct ib_multicast_gid *mgid;
  580. /* Remove from hardware multicast GID list */
  581. ibdev->op->mcast_detach ( ibdev, qp, gid );
  582. /* Remove from software multicast GID list */
  583. list_for_each_entry ( mgid, &qp->mgids, list ) {
  584. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  585. list_del ( &mgid->list );
  586. free ( mgid );
  587. break;
  588. }
  589. }
  590. }
  591. /***************************************************************************
  592. *
  593. * Miscellaneous
  594. *
  595. ***************************************************************************
  596. */
  597. /**
  598. * Get Infiniband HCA information
  599. *
  600. * @v ibdev Infiniband device
  601. * @ret hca_guid HCA GUID
  602. * @ret num_ports Number of ports
  603. */
  604. int ib_get_hca_info ( struct ib_device *ibdev,
  605. struct ib_gid_half *hca_guid ) {
  606. struct ib_device *tmp;
  607. int num_ports = 0;
  608. /* Search for IB devices with the same physical device to
  609. * identify port count and a suitable Node GUID.
  610. */
  611. for_each_ibdev ( tmp ) {
  612. if ( tmp->dev != ibdev->dev )
  613. continue;
  614. if ( num_ports == 0 ) {
  615. memcpy ( hca_guid, &tmp->gid.u.half[1],
  616. sizeof ( *hca_guid ) );
  617. }
  618. num_ports++;
  619. }
  620. return num_ports;
  621. }
  622. /**
  623. * Set port information
  624. *
  625. * @v ibdev Infiniband device
  626. * @v mad Set port information MAD
  627. */
  628. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  629. int rc;
  630. /* Adapters with embedded SMAs do not need to support this method */
  631. if ( ! ibdev->op->set_port_info ) {
  632. DBGC ( ibdev, "IBDEV %p does not support setting port "
  633. "information\n", ibdev );
  634. return -ENOTSUP;
  635. }
  636. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  637. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  638. ibdev, strerror ( rc ) );
  639. return rc;
  640. }
  641. return 0;
  642. };
  643. /**
  644. * Set partition key table
  645. *
  646. * @v ibdev Infiniband device
  647. * @v mad Set partition key table MAD
  648. */
  649. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  650. int rc;
  651. /* Adapters with embedded SMAs do not need to support this method */
  652. if ( ! ibdev->op->set_pkey_table ) {
  653. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  654. "key table\n", ibdev );
  655. return -ENOTSUP;
  656. }
  657. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  658. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  659. "%s\n", ibdev, strerror ( rc ) );
  660. return rc;
  661. }
  662. return 0;
  663. };
  664. /***************************************************************************
  665. *
  666. * Event queues
  667. *
  668. ***************************************************************************
  669. */
  670. /**
  671. * Handle Infiniband link state change
  672. *
  673. * @v ibdev Infiniband device
  674. */
  675. void ib_link_state_changed ( struct ib_device *ibdev ) {
  676. /* Notify IPoIB of link state change */
  677. ipoib_link_state_changed ( ibdev );
  678. }
  679. /**
  680. * Poll event queue
  681. *
  682. * @v ibdev Infiniband device
  683. */
  684. void ib_poll_eq ( struct ib_device *ibdev ) {
  685. struct ib_completion_queue *cq;
  686. /* Poll device's event queue */
  687. ibdev->op->poll_eq ( ibdev );
  688. /* Poll all completion queues */
  689. list_for_each_entry ( cq, &ibdev->cqs, list )
  690. ib_poll_cq ( ibdev, cq );
  691. }
  692. /**
  693. * Single-step the Infiniband event queue
  694. *
  695. * @v process Infiniband event queue process
  696. */
  697. static void ib_step ( struct process *process __unused ) {
  698. struct ib_device *ibdev;
  699. for_each_ibdev ( ibdev )
  700. ib_poll_eq ( ibdev );
  701. }
  702. /** Infiniband event queue process */
  703. struct process ib_process __permanent_process = {
  704. .step = ib_step,
  705. };
  706. /***************************************************************************
  707. *
  708. * Infiniband device creation/destruction
  709. *
  710. ***************************************************************************
  711. */
  712. /**
  713. * Allocate Infiniband device
  714. *
  715. * @v priv_size Size of driver private data area
  716. * @ret ibdev Infiniband device, or NULL
  717. */
  718. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  719. struct ib_device *ibdev;
  720. void *drv_priv;
  721. size_t total_len;
  722. total_len = ( sizeof ( *ibdev ) + priv_size );
  723. ibdev = zalloc ( total_len );
  724. if ( ibdev ) {
  725. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  726. ib_set_drvdata ( ibdev, drv_priv );
  727. INIT_LIST_HEAD ( &ibdev->cqs );
  728. INIT_LIST_HEAD ( &ibdev->qps );
  729. ibdev->lid = IB_LID_NONE;
  730. ibdev->pkey = IB_PKEY_NONE;
  731. }
  732. return ibdev;
  733. }
  734. /**
  735. * Register Infiniband device
  736. *
  737. * @v ibdev Infiniband device
  738. * @ret rc Return status code
  739. */
  740. int register_ibdev ( struct ib_device *ibdev ) {
  741. int rc;
  742. /* Add to device list */
  743. ibdev_get ( ibdev );
  744. list_add_tail ( &ibdev->list, &ib_devices );
  745. /* Add IPoIB device */
  746. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  747. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  748. ibdev, strerror ( rc ) );
  749. goto err_ipoib_probe;
  750. }
  751. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  752. ibdev->dev->name );
  753. return 0;
  754. err_ipoib_probe:
  755. list_del ( &ibdev->list );
  756. ibdev_put ( ibdev );
  757. return rc;
  758. }
  759. /**
  760. * Unregister Infiniband device
  761. *
  762. * @v ibdev Infiniband device
  763. */
  764. void unregister_ibdev ( struct ib_device *ibdev ) {
  765. /* Close device */
  766. ipoib_remove ( ibdev );
  767. /* Remove from device list */
  768. list_del ( &ibdev->list );
  769. ibdev_put ( ibdev );
  770. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  771. }