You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. */
  19. FILE_LICENCE ( GPL2_OR_LATER );
  20. #include <stdint.h>
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. #include <unistd.h>
  25. #include <byteswap.h>
  26. #include <errno.h>
  27. #include <assert.h>
  28. #include <ipxe/list.h>
  29. #include <ipxe/errortab.h>
  30. #include <ipxe/if_arp.h>
  31. #include <ipxe/netdevice.h>
  32. #include <ipxe/iobuf.h>
  33. #include <ipxe/process.h>
  34. #include <ipxe/infiniband.h>
  35. #include <ipxe/ib_mi.h>
  36. #include <ipxe/ib_sma.h>
  37. /** @file
  38. *
  39. * Infiniband protocol
  40. *
  41. */
  42. /** List of Infiniband devices */
  43. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  44. /** List of open Infiniband devices, in reverse order of opening */
  45. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  46. /* Disambiguate the various possible EINPROGRESSes */
  47. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  48. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  49. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  50. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  51. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  52. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  53. /** Human-readable message for the link statuses */
  54. struct errortab infiniband_errors[] __errortab = {
  55. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  56. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  57. };
  58. /***************************************************************************
  59. *
  60. * Completion queues
  61. *
  62. ***************************************************************************
  63. */
  64. /**
  65. * Create completion queue
  66. *
  67. * @v ibdev Infiniband device
  68. * @v num_cqes Number of completion queue entries
  69. * @v op Completion queue operations
  70. * @ret cq New completion queue
  71. */
  72. struct ib_completion_queue *
  73. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  74. struct ib_completion_queue_operations *op ) {
  75. struct ib_completion_queue *cq;
  76. int rc;
  77. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  78. /* Allocate and initialise data structure */
  79. cq = zalloc ( sizeof ( *cq ) );
  80. if ( ! cq )
  81. goto err_alloc_cq;
  82. cq->ibdev = ibdev;
  83. list_add ( &cq->list, &ibdev->cqs );
  84. cq->num_cqes = num_cqes;
  85. INIT_LIST_HEAD ( &cq->work_queues );
  86. cq->op = op;
  87. /* Perform device-specific initialisation and get CQN */
  88. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  89. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  90. "queue: %s\n", ibdev, strerror ( rc ) );
  91. goto err_dev_create_cq;
  92. }
  93. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  94. "with CQN %#lx\n", ibdev, num_cqes, cq,
  95. ib_cq_get_drvdata ( cq ), cq->cqn );
  96. return cq;
  97. ibdev->op->destroy_cq ( ibdev, cq );
  98. err_dev_create_cq:
  99. list_del ( &cq->list );
  100. free ( cq );
  101. err_alloc_cq:
  102. return NULL;
  103. }
  104. /**
  105. * Destroy completion queue
  106. *
  107. * @v ibdev Infiniband device
  108. * @v cq Completion queue
  109. */
  110. void ib_destroy_cq ( struct ib_device *ibdev,
  111. struct ib_completion_queue *cq ) {
  112. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  113. ibdev, cq->cqn );
  114. assert ( list_empty ( &cq->work_queues ) );
  115. ibdev->op->destroy_cq ( ibdev, cq );
  116. list_del ( &cq->list );
  117. free ( cq );
  118. }
  119. /**
  120. * Poll completion queue
  121. *
  122. * @v ibdev Infiniband device
  123. * @v cq Completion queue
  124. */
  125. void ib_poll_cq ( struct ib_device *ibdev,
  126. struct ib_completion_queue *cq ) {
  127. struct ib_work_queue *wq;
  128. /* Poll completion queue */
  129. ibdev->op->poll_cq ( ibdev, cq );
  130. /* Refill receive work queues */
  131. list_for_each_entry ( wq, &cq->work_queues, list ) {
  132. if ( ! wq->is_send )
  133. ib_refill_recv ( ibdev, wq->qp );
  134. }
  135. }
  136. /***************************************************************************
  137. *
  138. * Work queues
  139. *
  140. ***************************************************************************
  141. */
  142. /**
  143. * Create queue pair
  144. *
  145. * @v ibdev Infiniband device
  146. * @v type Queue pair type
  147. * @v num_send_wqes Number of send work queue entries
  148. * @v send_cq Send completion queue
  149. * @v num_recv_wqes Number of receive work queue entries
  150. * @v recv_cq Receive completion queue
  151. * @v op Queue pair operations
  152. * @ret qp Queue pair
  153. *
  154. * The queue pair will be left in the INIT state; you must call
  155. * ib_modify_qp() before it is ready to use for sending and receiving.
  156. */
  157. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  158. enum ib_queue_pair_type type,
  159. unsigned int num_send_wqes,
  160. struct ib_completion_queue *send_cq,
  161. unsigned int num_recv_wqes,
  162. struct ib_completion_queue *recv_cq,
  163. struct ib_queue_pair_operations *op ) {
  164. struct ib_queue_pair *qp;
  165. size_t total_size;
  166. int rc;
  167. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  168. /* Allocate and initialise data structure */
  169. total_size = ( sizeof ( *qp ) +
  170. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  171. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  172. qp = zalloc ( total_size );
  173. if ( ! qp )
  174. goto err_alloc_qp;
  175. qp->ibdev = ibdev;
  176. list_add ( &qp->list, &ibdev->qps );
  177. qp->type = type;
  178. qp->send.qp = qp;
  179. qp->send.is_send = 1;
  180. qp->send.cq = send_cq;
  181. list_add ( &qp->send.list, &send_cq->work_queues );
  182. qp->send.psn = ( random() & 0xffffffUL );
  183. qp->send.num_wqes = num_send_wqes;
  184. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  185. qp->recv.qp = qp;
  186. qp->recv.cq = recv_cq;
  187. list_add ( &qp->recv.list, &recv_cq->work_queues );
  188. qp->recv.psn = ( random() & 0xffffffUL );
  189. qp->recv.num_wqes = num_recv_wqes;
  190. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  191. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  192. INIT_LIST_HEAD ( &qp->mgids );
  193. qp->op = op;
  194. /* Perform device-specific initialisation and get QPN */
  195. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  196. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  197. "%s\n", ibdev, strerror ( rc ) );
  198. goto err_dev_create_qp;
  199. }
  200. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  201. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  202. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  203. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  204. qp->recv.iobufs );
  205. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  206. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  207. ( ( ( void * ) qp ) + total_size ) );
  208. /* Calculate externally-visible QPN */
  209. switch ( type ) {
  210. case IB_QPT_SMI:
  211. qp->ext_qpn = IB_QPN_SMI;
  212. break;
  213. case IB_QPT_GSI:
  214. qp->ext_qpn = IB_QPN_GSI;
  215. break;
  216. default:
  217. qp->ext_qpn = qp->qpn;
  218. break;
  219. }
  220. if ( qp->ext_qpn != qp->qpn ) {
  221. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  222. ibdev, qp->qpn, qp->ext_qpn );
  223. }
  224. return qp;
  225. ibdev->op->destroy_qp ( ibdev, qp );
  226. err_dev_create_qp:
  227. list_del ( &qp->send.list );
  228. list_del ( &qp->recv.list );
  229. list_del ( &qp->list );
  230. free ( qp );
  231. err_alloc_qp:
  232. return NULL;
  233. }
  234. /**
  235. * Modify queue pair
  236. *
  237. * @v ibdev Infiniband device
  238. * @v qp Queue pair
  239. * @v av New address vector, if applicable
  240. * @ret rc Return status code
  241. */
  242. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  243. int rc;
  244. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  245. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  246. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  247. ibdev, qp->qpn, strerror ( rc ) );
  248. return rc;
  249. }
  250. return 0;
  251. }
  252. /**
  253. * Destroy queue pair
  254. *
  255. * @v ibdev Infiniband device
  256. * @v qp Queue pair
  257. */
  258. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  259. struct io_buffer *iobuf;
  260. unsigned int i;
  261. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  262. ibdev, qp->qpn );
  263. assert ( list_empty ( &qp->mgids ) );
  264. /* Perform device-specific destruction */
  265. ibdev->op->destroy_qp ( ibdev, qp );
  266. /* Complete any remaining I/O buffers with errors */
  267. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  268. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  269. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  270. }
  271. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  272. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  273. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  274. -ECANCELED );
  275. }
  276. }
  277. /* Remove work queues from completion queue */
  278. list_del ( &qp->send.list );
  279. list_del ( &qp->recv.list );
  280. /* Free QP */
  281. list_del ( &qp->list );
  282. free ( qp );
  283. }
  284. /**
  285. * Find queue pair by QPN
  286. *
  287. * @v ibdev Infiniband device
  288. * @v qpn Queue pair number
  289. * @ret qp Queue pair, or NULL
  290. */
  291. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  292. unsigned long qpn ) {
  293. struct ib_queue_pair *qp;
  294. list_for_each_entry ( qp, &ibdev->qps, list ) {
  295. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  296. return qp;
  297. }
  298. return NULL;
  299. }
  300. /**
  301. * Find queue pair by multicast GID
  302. *
  303. * @v ibdev Infiniband device
  304. * @v gid Multicast GID
  305. * @ret qp Queue pair, or NULL
  306. */
  307. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  308. union ib_gid *gid ) {
  309. struct ib_queue_pair *qp;
  310. struct ib_multicast_gid *mgid;
  311. list_for_each_entry ( qp, &ibdev->qps, list ) {
  312. list_for_each_entry ( mgid, &qp->mgids, list ) {
  313. if ( memcmp ( &mgid->gid, gid,
  314. sizeof ( mgid->gid ) ) == 0 ) {
  315. return qp;
  316. }
  317. }
  318. }
  319. return NULL;
  320. }
  321. /**
  322. * Find work queue belonging to completion queue
  323. *
  324. * @v cq Completion queue
  325. * @v qpn Queue pair number
  326. * @v is_send Find send work queue (rather than receive)
  327. * @ret wq Work queue, or NULL if not found
  328. */
  329. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  330. unsigned long qpn, int is_send ) {
  331. struct ib_work_queue *wq;
  332. list_for_each_entry ( wq, &cq->work_queues, list ) {
  333. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  334. return wq;
  335. }
  336. return NULL;
  337. }
  338. /**
  339. * Post send work queue entry
  340. *
  341. * @v ibdev Infiniband device
  342. * @v qp Queue pair
  343. * @v av Address vector
  344. * @v iobuf I/O buffer
  345. * @ret rc Return status code
  346. */
  347. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  348. struct ib_address_vector *av,
  349. struct io_buffer *iobuf ) {
  350. struct ib_address_vector av_copy;
  351. int rc;
  352. /* Check queue fill level */
  353. if ( qp->send.fill >= qp->send.num_wqes ) {
  354. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  355. ibdev, qp->qpn );
  356. return -ENOBUFS;
  357. }
  358. /* Use default address vector if none specified */
  359. if ( ! av )
  360. av = &qp->av;
  361. /* Make modifiable copy of address vector */
  362. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  363. av = &av_copy;
  364. /* Fill in optional parameters in address vector */
  365. if ( ! av->qkey )
  366. av->qkey = qp->qkey;
  367. if ( ! av->rate )
  368. av->rate = IB_RATE_2_5;
  369. /* Post to hardware */
  370. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  371. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  372. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  373. return rc;
  374. }
  375. qp->send.fill++;
  376. return 0;
  377. }
  378. /**
  379. * Post receive work queue entry
  380. *
  381. * @v ibdev Infiniband device
  382. * @v qp Queue pair
  383. * @v iobuf I/O buffer
  384. * @ret rc Return status code
  385. */
  386. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  387. struct io_buffer *iobuf ) {
  388. int rc;
  389. /* Check packet length */
  390. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  391. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  392. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  393. return -EINVAL;
  394. }
  395. /* Check queue fill level */
  396. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  397. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  398. ibdev, qp->qpn );
  399. return -ENOBUFS;
  400. }
  401. /* Post to hardware */
  402. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  403. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  404. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  405. return rc;
  406. }
  407. qp->recv.fill++;
  408. return 0;
  409. }
  410. /**
  411. * Complete send work queue entry
  412. *
  413. * @v ibdev Infiniband device
  414. * @v qp Queue pair
  415. * @v iobuf I/O buffer
  416. * @v rc Completion status code
  417. */
  418. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  419. struct io_buffer *iobuf, int rc ) {
  420. if ( qp->send.cq->op->complete_send ) {
  421. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  422. } else {
  423. free_iob ( iobuf );
  424. }
  425. qp->send.fill--;
  426. }
  427. /**
  428. * Complete receive work queue entry
  429. *
  430. * @v ibdev Infiniband device
  431. * @v qp Queue pair
  432. * @v av Address vector, or NULL
  433. * @v iobuf I/O buffer
  434. * @v rc Completion status code
  435. */
  436. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  437. struct ib_address_vector *av,
  438. struct io_buffer *iobuf, int rc ) {
  439. if ( qp->recv.cq->op->complete_recv ) {
  440. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  441. } else {
  442. free_iob ( iobuf );
  443. }
  444. qp->recv.fill--;
  445. }
  446. /**
  447. * Refill receive work queue
  448. *
  449. * @v ibdev Infiniband device
  450. * @v qp Queue pair
  451. */
  452. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  453. struct io_buffer *iobuf;
  454. int rc;
  455. /* Keep filling while unfilled entries remain */
  456. while ( qp->recv.fill < qp->recv.num_wqes ) {
  457. /* Allocate I/O buffer */
  458. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  459. if ( ! iobuf ) {
  460. /* Non-fatal; we will refill on next attempt */
  461. return;
  462. }
  463. /* Post I/O buffer */
  464. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  465. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  466. ibdev, strerror ( rc ) );
  467. free_iob ( iobuf );
  468. /* Give up */
  469. return;
  470. }
  471. }
  472. }
  473. /***************************************************************************
  474. *
  475. * Link control
  476. *
  477. ***************************************************************************
  478. */
  479. /**
  480. * Get link state
  481. *
  482. * @v ibdev Infiniband device
  483. * @ret rc Link status code
  484. */
  485. int ib_link_rc ( struct ib_device *ibdev ) {
  486. switch ( ibdev->port_state ) {
  487. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  488. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  489. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  490. case IB_PORT_STATE_ACTIVE: return 0;
  491. default: return -EINVAL;
  492. }
  493. }
  494. /**
  495. * Textual representation of Infiniband link state
  496. *
  497. * @v ibdev Infiniband device
  498. * @ret link_text Link state text
  499. */
  500. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  501. switch ( ibdev->port_state ) {
  502. case IB_PORT_STATE_DOWN: return "DOWN";
  503. case IB_PORT_STATE_INIT: return "INIT";
  504. case IB_PORT_STATE_ARMED: return "ARMED";
  505. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  506. default: return "UNKNOWN";
  507. }
  508. }
  509. /**
  510. * Notify drivers of Infiniband device or link state change
  511. *
  512. * @v ibdev Infiniband device
  513. */
  514. static void ib_notify ( struct ib_device *ibdev ) {
  515. struct ib_driver *driver;
  516. for_each_table_entry ( driver, IB_DRIVERS )
  517. driver->notify ( ibdev );
  518. }
  519. /**
  520. * Notify of Infiniband link state change
  521. *
  522. * @v ibdev Infiniband device
  523. */
  524. void ib_link_state_changed ( struct ib_device *ibdev ) {
  525. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  526. ibdev, ib_link_state_text ( ibdev ) );
  527. /* Notify drivers of link state change */
  528. ib_notify ( ibdev );
  529. }
  530. /**
  531. * Open port
  532. *
  533. * @v ibdev Infiniband device
  534. * @ret rc Return status code
  535. */
  536. int ib_open ( struct ib_device *ibdev ) {
  537. int rc;
  538. /* Increment device open request counter */
  539. if ( ibdev->open_count++ > 0 ) {
  540. /* Device was already open; do nothing */
  541. return 0;
  542. }
  543. /* Open device */
  544. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  545. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  546. ibdev, strerror ( rc ) );
  547. goto err_open;
  548. }
  549. /* Create subnet management interface */
  550. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  551. if ( ! ibdev->smi ) {
  552. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  553. rc = -ENOMEM;
  554. goto err_create_smi;
  555. }
  556. /* Create subnet management agent */
  557. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  558. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  559. ibdev, strerror ( rc ) );
  560. goto err_create_sma;
  561. }
  562. /* Create general services interface */
  563. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  564. if ( ! ibdev->gsi ) {
  565. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  566. rc = -ENOMEM;
  567. goto err_create_gsi;
  568. }
  569. /* Add to head of open devices list */
  570. list_add ( &ibdev->open_list, &open_ib_devices );
  571. /* Notify drivers of device state change */
  572. ib_notify ( ibdev );
  573. assert ( ibdev->open_count == 1 );
  574. return 0;
  575. ib_destroy_mi ( ibdev, ibdev->gsi );
  576. err_create_gsi:
  577. ib_destroy_sma ( ibdev, ibdev->smi );
  578. err_create_sma:
  579. ib_destroy_mi ( ibdev, ibdev->smi );
  580. err_create_smi:
  581. ibdev->op->close ( ibdev );
  582. err_open:
  583. assert ( ibdev->open_count == 1 );
  584. ibdev->open_count = 0;
  585. return rc;
  586. }
  587. /**
  588. * Close port
  589. *
  590. * @v ibdev Infiniband device
  591. */
  592. void ib_close ( struct ib_device *ibdev ) {
  593. /* Decrement device open request counter */
  594. ibdev->open_count--;
  595. /* Close device if this was the last remaining requested opening */
  596. if ( ibdev->open_count == 0 ) {
  597. ib_notify ( ibdev );
  598. list_del ( &ibdev->open_list );
  599. ib_destroy_mi ( ibdev, ibdev->gsi );
  600. ib_destroy_sma ( ibdev, ibdev->smi );
  601. ib_destroy_mi ( ibdev, ibdev->smi );
  602. ibdev->op->close ( ibdev );
  603. ibdev->port_state = IB_PORT_STATE_DOWN;
  604. }
  605. }
  606. /***************************************************************************
  607. *
  608. * Multicast
  609. *
  610. ***************************************************************************
  611. */
  612. /**
  613. * Attach to multicast group
  614. *
  615. * @v ibdev Infiniband device
  616. * @v qp Queue pair
  617. * @v gid Multicast GID
  618. * @ret rc Return status code
  619. *
  620. * Note that this function handles only the local device's attachment
  621. * to the multicast GID; it does not issue the relevant MADs to join
  622. * the multicast group on the subnet.
  623. */
  624. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  625. union ib_gid *gid ) {
  626. struct ib_multicast_gid *mgid;
  627. int rc;
  628. /* Add to software multicast GID list */
  629. mgid = zalloc ( sizeof ( *mgid ) );
  630. if ( ! mgid ) {
  631. rc = -ENOMEM;
  632. goto err_alloc_mgid;
  633. }
  634. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  635. list_add ( &mgid->list, &qp->mgids );
  636. /* Add to hardware multicast GID list */
  637. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  638. goto err_dev_mcast_attach;
  639. return 0;
  640. err_dev_mcast_attach:
  641. list_del ( &mgid->list );
  642. free ( mgid );
  643. err_alloc_mgid:
  644. return rc;
  645. }
  646. /**
  647. * Detach from multicast group
  648. *
  649. * @v ibdev Infiniband device
  650. * @v qp Queue pair
  651. * @v gid Multicast GID
  652. */
  653. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  654. union ib_gid *gid ) {
  655. struct ib_multicast_gid *mgid;
  656. /* Remove from hardware multicast GID list */
  657. ibdev->op->mcast_detach ( ibdev, qp, gid );
  658. /* Remove from software multicast GID list */
  659. list_for_each_entry ( mgid, &qp->mgids, list ) {
  660. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  661. list_del ( &mgid->list );
  662. free ( mgid );
  663. break;
  664. }
  665. }
  666. }
  667. /***************************************************************************
  668. *
  669. * Miscellaneous
  670. *
  671. ***************************************************************************
  672. */
  673. /**
  674. * Count Infiniband HCA ports
  675. *
  676. * @v ibdev Infiniband device
  677. * @ret num_ports Number of ports
  678. */
  679. int ib_count_ports ( struct ib_device *ibdev ) {
  680. struct ib_device *tmp;
  681. int num_ports = 0;
  682. /* Search for IB devices with the same physical device to
  683. * identify port count.
  684. */
  685. for_each_ibdev ( tmp ) {
  686. if ( tmp->dev == ibdev->dev )
  687. num_ports++;
  688. }
  689. return num_ports;
  690. }
  691. /**
  692. * Set port information
  693. *
  694. * @v ibdev Infiniband device
  695. * @v mad Set port information MAD
  696. */
  697. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  698. int rc;
  699. /* Adapters with embedded SMAs do not need to support this method */
  700. if ( ! ibdev->op->set_port_info ) {
  701. DBGC ( ibdev, "IBDEV %p does not support setting port "
  702. "information\n", ibdev );
  703. return -ENOTSUP;
  704. }
  705. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  706. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  707. ibdev, strerror ( rc ) );
  708. return rc;
  709. }
  710. return 0;
  711. };
  712. /**
  713. * Set partition key table
  714. *
  715. * @v ibdev Infiniband device
  716. * @v mad Set partition key table MAD
  717. */
  718. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  719. int rc;
  720. /* Adapters with embedded SMAs do not need to support this method */
  721. if ( ! ibdev->op->set_pkey_table ) {
  722. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  723. "key table\n", ibdev );
  724. return -ENOTSUP;
  725. }
  726. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  727. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  728. "%s\n", ibdev, strerror ( rc ) );
  729. return rc;
  730. }
  731. return 0;
  732. };
  733. /***************************************************************************
  734. *
  735. * Event queues
  736. *
  737. ***************************************************************************
  738. */
  739. /**
  740. * Poll event queue
  741. *
  742. * @v ibdev Infiniband device
  743. */
  744. void ib_poll_eq ( struct ib_device *ibdev ) {
  745. struct ib_completion_queue *cq;
  746. /* Poll device's event queue */
  747. ibdev->op->poll_eq ( ibdev );
  748. /* Poll all completion queues */
  749. list_for_each_entry ( cq, &ibdev->cqs, list )
  750. ib_poll_cq ( ibdev, cq );
  751. }
  752. /**
  753. * Single-step the Infiniband event queue
  754. *
  755. * @v process Infiniband event queue process
  756. */
  757. static void ib_step ( struct process *process __unused ) {
  758. struct ib_device *ibdev;
  759. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  760. ib_poll_eq ( ibdev );
  761. }
  762. /** Infiniband event queue process */
  763. PERMANENT_PROCESS ( ib_process, ib_step );
  764. /***************************************************************************
  765. *
  766. * Infiniband device creation/destruction
  767. *
  768. ***************************************************************************
  769. */
  770. /**
  771. * Allocate Infiniband device
  772. *
  773. * @v priv_size Size of driver private data area
  774. * @ret ibdev Infiniband device, or NULL
  775. */
  776. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  777. struct ib_device *ibdev;
  778. void *drv_priv;
  779. size_t total_len;
  780. total_len = ( sizeof ( *ibdev ) + priv_size );
  781. ibdev = zalloc ( total_len );
  782. if ( ibdev ) {
  783. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  784. ib_set_drvdata ( ibdev, drv_priv );
  785. INIT_LIST_HEAD ( &ibdev->list );
  786. INIT_LIST_HEAD ( &ibdev->open_list );
  787. INIT_LIST_HEAD ( &ibdev->cqs );
  788. INIT_LIST_HEAD ( &ibdev->qps );
  789. ibdev->port_state = IB_PORT_STATE_DOWN;
  790. ibdev->lid = IB_LID_NONE;
  791. ibdev->pkey = IB_PKEY_DEFAULT;
  792. }
  793. return ibdev;
  794. }
  795. /**
  796. * Register Infiniband device
  797. *
  798. * @v ibdev Infiniband device
  799. * @ret rc Return status code
  800. */
  801. int register_ibdev ( struct ib_device *ibdev ) {
  802. struct ib_driver *driver;
  803. int rc;
  804. /* Add to device list */
  805. ibdev_get ( ibdev );
  806. list_add_tail ( &ibdev->list, &ib_devices );
  807. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  808. ibdev->dev->name );
  809. /* Probe device */
  810. for_each_table_entry ( driver, IB_DRIVERS ) {
  811. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  812. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  813. ibdev, driver->name, strerror ( rc ) );
  814. goto err_probe;
  815. }
  816. }
  817. return 0;
  818. err_probe:
  819. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  820. driver->remove ( ibdev );
  821. list_del ( &ibdev->list );
  822. ibdev_put ( ibdev );
  823. return rc;
  824. }
  825. /**
  826. * Unregister Infiniband device
  827. *
  828. * @v ibdev Infiniband device
  829. */
  830. void unregister_ibdev ( struct ib_device *ibdev ) {
  831. struct ib_driver *driver;
  832. /* Remove device */
  833. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  834. driver->remove ( ibdev );
  835. /* Remove from device list */
  836. list_del ( &ibdev->list );
  837. ibdev_put ( ibdev );
  838. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  839. }
  840. /**
  841. * Find Infiniband device by GID
  842. *
  843. * @v gid GID
  844. * @ret ibdev Infiniband device, or NULL
  845. */
  846. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  847. struct ib_device *ibdev;
  848. for_each_ibdev ( ibdev ) {
  849. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  850. return ibdev;
  851. }
  852. return NULL;
  853. }
  854. /**
  855. * Get most recently opened Infiniband device
  856. *
  857. * @ret ibdev Most recently opened Infiniband device, or NULL
  858. */
  859. struct ib_device * last_opened_ibdev ( void ) {
  860. struct ib_device *ibdev;
  861. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  862. open_list );
  863. if ( ! ibdev )
  864. return NULL;
  865. assert ( ibdev->open_count != 0 );
  866. return ibdev;
  867. }
  868. /* Drag in IPoIB */
  869. REQUIRE_OBJECT ( ipoib );