Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

infiniband.c 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. */
  19. FILE_LICENCE ( GPL2_OR_LATER );
  20. #include <stdint.h>
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. #include <unistd.h>
  25. #include <byteswap.h>
  26. #include <errno.h>
  27. #include <assert.h>
  28. #include <ipxe/list.h>
  29. #include <ipxe/errortab.h>
  30. #include <ipxe/if_arp.h>
  31. #include <ipxe/netdevice.h>
  32. #include <ipxe/iobuf.h>
  33. #include <ipxe/process.h>
  34. #include <ipxe/infiniband.h>
  35. #include <ipxe/ib_mi.h>
  36. #include <ipxe/ib_sma.h>
  37. /** @file
  38. *
  39. * Infiniband protocol
  40. *
  41. */
  42. /** List of Infiniband devices */
  43. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  44. /** List of open Infiniband devices, in reverse order of opening */
  45. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  46. /* Disambiguate the various possible EINPROGRESSes */
  47. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  48. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  49. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  50. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  51. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  52. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  53. /** Human-readable message for the link statuses */
  54. struct errortab infiniband_errors[] __errortab = {
  55. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  56. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  57. };
  58. /***************************************************************************
  59. *
  60. * Completion queues
  61. *
  62. ***************************************************************************
  63. */
  64. /**
  65. * Create completion queue
  66. *
  67. * @v ibdev Infiniband device
  68. * @v num_cqes Number of completion queue entries
  69. * @v op Completion queue operations
  70. * @ret cq New completion queue
  71. */
  72. struct ib_completion_queue *
  73. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  74. struct ib_completion_queue_operations *op ) {
  75. struct ib_completion_queue *cq;
  76. int rc;
  77. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  78. /* Allocate and initialise data structure */
  79. cq = zalloc ( sizeof ( *cq ) );
  80. if ( ! cq )
  81. goto err_alloc_cq;
  82. cq->ibdev = ibdev;
  83. list_add ( &cq->list, &ibdev->cqs );
  84. cq->num_cqes = num_cqes;
  85. INIT_LIST_HEAD ( &cq->work_queues );
  86. cq->op = op;
  87. /* Perform device-specific initialisation and get CQN */
  88. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  89. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  90. "queue: %s\n", ibdev, strerror ( rc ) );
  91. goto err_dev_create_cq;
  92. }
  93. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  94. "with CQN %#lx\n", ibdev, num_cqes, cq,
  95. ib_cq_get_drvdata ( cq ), cq->cqn );
  96. return cq;
  97. ibdev->op->destroy_cq ( ibdev, cq );
  98. err_dev_create_cq:
  99. list_del ( &cq->list );
  100. free ( cq );
  101. err_alloc_cq:
  102. return NULL;
  103. }
  104. /**
  105. * Destroy completion queue
  106. *
  107. * @v ibdev Infiniband device
  108. * @v cq Completion queue
  109. */
  110. void ib_destroy_cq ( struct ib_device *ibdev,
  111. struct ib_completion_queue *cq ) {
  112. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  113. ibdev, cq->cqn );
  114. assert ( list_empty ( &cq->work_queues ) );
  115. ibdev->op->destroy_cq ( ibdev, cq );
  116. list_del ( &cq->list );
  117. free ( cq );
  118. }
  119. /**
  120. * Poll completion queue
  121. *
  122. * @v ibdev Infiniband device
  123. * @v cq Completion queue
  124. */
  125. void ib_poll_cq ( struct ib_device *ibdev,
  126. struct ib_completion_queue *cq ) {
  127. struct ib_work_queue *wq;
  128. /* Poll completion queue */
  129. ibdev->op->poll_cq ( ibdev, cq );
  130. /* Refill receive work queues */
  131. list_for_each_entry ( wq, &cq->work_queues, list ) {
  132. if ( ! wq->is_send )
  133. ib_refill_recv ( ibdev, wq->qp );
  134. }
  135. }
  136. /***************************************************************************
  137. *
  138. * Work queues
  139. *
  140. ***************************************************************************
  141. */
  142. /**
  143. * Create queue pair
  144. *
  145. * @v ibdev Infiniband device
  146. * @v type Queue pair type
  147. * @v num_send_wqes Number of send work queue entries
  148. * @v send_cq Send completion queue
  149. * @v num_recv_wqes Number of receive work queue entries
  150. * @v recv_cq Receive completion queue
  151. * @v op Queue pair operations
  152. * @ret qp Queue pair
  153. *
  154. * The queue pair will be left in the INIT state; you must call
  155. * ib_modify_qp() before it is ready to use for sending and receiving.
  156. */
  157. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  158. enum ib_queue_pair_type type,
  159. unsigned int num_send_wqes,
  160. struct ib_completion_queue *send_cq,
  161. unsigned int num_recv_wqes,
  162. struct ib_completion_queue *recv_cq,
  163. struct ib_queue_pair_operations *op ) {
  164. struct ib_queue_pair *qp;
  165. size_t total_size;
  166. int rc;
  167. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  168. /* Allocate and initialise data structure */
  169. total_size = ( sizeof ( *qp ) +
  170. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  171. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  172. qp = zalloc ( total_size );
  173. if ( ! qp )
  174. goto err_alloc_qp;
  175. qp->ibdev = ibdev;
  176. list_add ( &qp->list, &ibdev->qps );
  177. qp->type = type;
  178. qp->send.qp = qp;
  179. qp->send.is_send = 1;
  180. qp->send.cq = send_cq;
  181. list_add ( &qp->send.list, &send_cq->work_queues );
  182. qp->send.psn = ( random() & 0xffffffUL );
  183. qp->send.num_wqes = num_send_wqes;
  184. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  185. qp->recv.qp = qp;
  186. qp->recv.cq = recv_cq;
  187. list_add ( &qp->recv.list, &recv_cq->work_queues );
  188. qp->recv.psn = ( random() & 0xffffffUL );
  189. qp->recv.num_wqes = num_recv_wqes;
  190. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  191. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  192. INIT_LIST_HEAD ( &qp->mgids );
  193. qp->op = op;
  194. /* Perform device-specific initialisation and get QPN */
  195. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  196. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  197. "%s\n", ibdev, strerror ( rc ) );
  198. goto err_dev_create_qp;
  199. }
  200. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  201. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  202. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  203. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  204. qp->recv.iobufs );
  205. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  206. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  207. ( ( ( void * ) qp ) + total_size ) );
  208. /* Calculate externally-visible QPN */
  209. switch ( type ) {
  210. case IB_QPT_SMI:
  211. qp->ext_qpn = IB_QPN_SMI;
  212. break;
  213. case IB_QPT_GSI:
  214. qp->ext_qpn = IB_QPN_GSI;
  215. break;
  216. default:
  217. qp->ext_qpn = qp->qpn;
  218. break;
  219. }
  220. if ( qp->ext_qpn != qp->qpn ) {
  221. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  222. ibdev, qp->qpn, qp->ext_qpn );
  223. }
  224. return qp;
  225. ibdev->op->destroy_qp ( ibdev, qp );
  226. err_dev_create_qp:
  227. list_del ( &qp->send.list );
  228. list_del ( &qp->recv.list );
  229. list_del ( &qp->list );
  230. free ( qp );
  231. err_alloc_qp:
  232. return NULL;
  233. }
  234. /**
  235. * Modify queue pair
  236. *
  237. * @v ibdev Infiniband device
  238. * @v qp Queue pair
  239. * @ret rc Return status code
  240. */
  241. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  242. int rc;
  243. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  244. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  245. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  246. ibdev, qp->qpn, strerror ( rc ) );
  247. return rc;
  248. }
  249. return 0;
  250. }
  251. /**
  252. * Destroy queue pair
  253. *
  254. * @v ibdev Infiniband device
  255. * @v qp Queue pair
  256. */
  257. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  258. struct io_buffer *iobuf;
  259. unsigned int i;
  260. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  261. ibdev, qp->qpn );
  262. assert ( list_empty ( &qp->mgids ) );
  263. /* Perform device-specific destruction */
  264. ibdev->op->destroy_qp ( ibdev, qp );
  265. /* Complete any remaining I/O buffers with errors */
  266. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  267. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  268. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  269. }
  270. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  271. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  272. ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
  273. -ECANCELED );
  274. }
  275. }
  276. /* Remove work queues from completion queue */
  277. list_del ( &qp->send.list );
  278. list_del ( &qp->recv.list );
  279. /* Free QP */
  280. list_del ( &qp->list );
  281. free ( qp );
  282. }
  283. /**
  284. * Find queue pair by QPN
  285. *
  286. * @v ibdev Infiniband device
  287. * @v qpn Queue pair number
  288. * @ret qp Queue pair, or NULL
  289. */
  290. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  291. unsigned long qpn ) {
  292. struct ib_queue_pair *qp;
  293. list_for_each_entry ( qp, &ibdev->qps, list ) {
  294. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  295. return qp;
  296. }
  297. return NULL;
  298. }
  299. /**
  300. * Find queue pair by multicast GID
  301. *
  302. * @v ibdev Infiniband device
  303. * @v gid Multicast GID
  304. * @ret qp Queue pair, or NULL
  305. */
  306. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  307. union ib_gid *gid ) {
  308. struct ib_queue_pair *qp;
  309. struct ib_multicast_gid *mgid;
  310. list_for_each_entry ( qp, &ibdev->qps, list ) {
  311. list_for_each_entry ( mgid, &qp->mgids, list ) {
  312. if ( memcmp ( &mgid->gid, gid,
  313. sizeof ( mgid->gid ) ) == 0 ) {
  314. return qp;
  315. }
  316. }
  317. }
  318. return NULL;
  319. }
  320. /**
  321. * Find work queue belonging to completion queue
  322. *
  323. * @v cq Completion queue
  324. * @v qpn Queue pair number
  325. * @v is_send Find send work queue (rather than receive)
  326. * @ret wq Work queue, or NULL if not found
  327. */
  328. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  329. unsigned long qpn, int is_send ) {
  330. struct ib_work_queue *wq;
  331. list_for_each_entry ( wq, &cq->work_queues, list ) {
  332. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  333. return wq;
  334. }
  335. return NULL;
  336. }
  337. /**
  338. * Post send work queue entry
  339. *
  340. * @v ibdev Infiniband device
  341. * @v qp Queue pair
  342. * @v dest Destination address vector
  343. * @v iobuf I/O buffer
  344. * @ret rc Return status code
  345. */
  346. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  347. struct ib_address_vector *dest,
  348. struct io_buffer *iobuf ) {
  349. struct ib_address_vector dest_copy;
  350. int rc;
  351. /* Check queue fill level */
  352. if ( qp->send.fill >= qp->send.num_wqes ) {
  353. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  354. ibdev, qp->qpn );
  355. return -ENOBUFS;
  356. }
  357. /* Use default address vector if none specified */
  358. if ( ! dest )
  359. dest = &qp->av;
  360. /* Make modifiable copy of address vector */
  361. memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
  362. dest = &dest_copy;
  363. /* Fill in optional parameters in address vector */
  364. if ( ! dest->qkey )
  365. dest->qkey = qp->qkey;
  366. if ( ! dest->rate )
  367. dest->rate = IB_RATE_2_5;
  368. /* Post to hardware */
  369. if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
  370. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  371. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  372. return rc;
  373. }
  374. qp->send.fill++;
  375. return 0;
  376. }
  377. /**
  378. * Post receive work queue entry
  379. *
  380. * @v ibdev Infiniband device
  381. * @v qp Queue pair
  382. * @v iobuf I/O buffer
  383. * @ret rc Return status code
  384. */
  385. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  386. struct io_buffer *iobuf ) {
  387. int rc;
  388. /* Check packet length */
  389. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  390. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  391. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  392. return -EINVAL;
  393. }
  394. /* Check queue fill level */
  395. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  396. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  397. ibdev, qp->qpn );
  398. return -ENOBUFS;
  399. }
  400. /* Post to hardware */
  401. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  402. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  403. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  404. return rc;
  405. }
  406. qp->recv.fill++;
  407. return 0;
  408. }
  409. /**
  410. * Complete send work queue entry
  411. *
  412. * @v ibdev Infiniband device
  413. * @v qp Queue pair
  414. * @v iobuf I/O buffer
  415. * @v rc Completion status code
  416. */
  417. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  418. struct io_buffer *iobuf, int rc ) {
  419. if ( qp->send.cq->op->complete_send ) {
  420. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  421. } else {
  422. free_iob ( iobuf );
  423. }
  424. qp->send.fill--;
  425. }
  426. /**
  427. * Complete receive work queue entry
  428. *
  429. * @v ibdev Infiniband device
  430. * @v qp Queue pair
  431. * @v dest Destination address vector, or NULL
  432. * @v source Source address vector, or NULL
  433. * @v iobuf I/O buffer
  434. * @v rc Completion status code
  435. */
  436. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  437. struct ib_address_vector *dest,
  438. struct ib_address_vector *source,
  439. struct io_buffer *iobuf, int rc ) {
  440. if ( qp->recv.cq->op->complete_recv ) {
  441. qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
  442. iobuf, rc );
  443. } else {
  444. free_iob ( iobuf );
  445. }
  446. qp->recv.fill--;
  447. }
  448. /**
  449. * Refill receive work queue
  450. *
  451. * @v ibdev Infiniband device
  452. * @v qp Queue pair
  453. */
  454. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  455. struct io_buffer *iobuf;
  456. int rc;
  457. /* Keep filling while unfilled entries remain */
  458. while ( qp->recv.fill < qp->recv.num_wqes ) {
  459. /* Allocate I/O buffer */
  460. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  461. if ( ! iobuf ) {
  462. /* Non-fatal; we will refill on next attempt */
  463. return;
  464. }
  465. /* Post I/O buffer */
  466. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  467. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  468. ibdev, strerror ( rc ) );
  469. free_iob ( iobuf );
  470. /* Give up */
  471. return;
  472. }
  473. }
  474. }
  475. /***************************************************************************
  476. *
  477. * Link control
  478. *
  479. ***************************************************************************
  480. */
  481. /**
  482. * Get link state
  483. *
  484. * @v ibdev Infiniband device
  485. * @ret rc Link status code
  486. */
  487. int ib_link_rc ( struct ib_device *ibdev ) {
  488. switch ( ibdev->port_state ) {
  489. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  490. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  491. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  492. case IB_PORT_STATE_ACTIVE: return 0;
  493. default: return -EINVAL;
  494. }
  495. }
  496. /**
  497. * Textual representation of Infiniband link state
  498. *
  499. * @v ibdev Infiniband device
  500. * @ret link_text Link state text
  501. */
  502. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  503. switch ( ibdev->port_state ) {
  504. case IB_PORT_STATE_DOWN: return "DOWN";
  505. case IB_PORT_STATE_INIT: return "INIT";
  506. case IB_PORT_STATE_ARMED: return "ARMED";
  507. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  508. default: return "UNKNOWN";
  509. }
  510. }
  511. /**
  512. * Notify drivers of Infiniband device or link state change
  513. *
  514. * @v ibdev Infiniband device
  515. */
  516. static void ib_notify ( struct ib_device *ibdev ) {
  517. struct ib_driver *driver;
  518. for_each_table_entry ( driver, IB_DRIVERS )
  519. driver->notify ( ibdev );
  520. }
  521. /**
  522. * Notify of Infiniband link state change
  523. *
  524. * @v ibdev Infiniband device
  525. */
  526. void ib_link_state_changed ( struct ib_device *ibdev ) {
  527. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  528. ibdev, ib_link_state_text ( ibdev ) );
  529. /* Notify drivers of link state change */
  530. ib_notify ( ibdev );
  531. }
  532. /**
  533. * Open port
  534. *
  535. * @v ibdev Infiniband device
  536. * @ret rc Return status code
  537. */
  538. int ib_open ( struct ib_device *ibdev ) {
  539. int rc;
  540. /* Increment device open request counter */
  541. if ( ibdev->open_count++ > 0 ) {
  542. /* Device was already open; do nothing */
  543. return 0;
  544. }
  545. /* Open device */
  546. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  547. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  548. ibdev, strerror ( rc ) );
  549. goto err_open;
  550. }
  551. /* Create subnet management interface */
  552. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  553. if ( ! ibdev->smi ) {
  554. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  555. rc = -ENOMEM;
  556. goto err_create_smi;
  557. }
  558. /* Create subnet management agent */
  559. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  560. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  561. ibdev, strerror ( rc ) );
  562. goto err_create_sma;
  563. }
  564. /* Create general services interface */
  565. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  566. if ( ! ibdev->gsi ) {
  567. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  568. rc = -ENOMEM;
  569. goto err_create_gsi;
  570. }
  571. /* Add to head of open devices list */
  572. list_add ( &ibdev->open_list, &open_ib_devices );
  573. /* Notify drivers of device state change */
  574. ib_notify ( ibdev );
  575. assert ( ibdev->open_count == 1 );
  576. return 0;
  577. ib_destroy_mi ( ibdev, ibdev->gsi );
  578. err_create_gsi:
  579. ib_destroy_sma ( ibdev, ibdev->smi );
  580. err_create_sma:
  581. ib_destroy_mi ( ibdev, ibdev->smi );
  582. err_create_smi:
  583. ibdev->op->close ( ibdev );
  584. err_open:
  585. assert ( ibdev->open_count == 1 );
  586. ibdev->open_count = 0;
  587. return rc;
  588. }
  589. /**
  590. * Close port
  591. *
  592. * @v ibdev Infiniband device
  593. */
  594. void ib_close ( struct ib_device *ibdev ) {
  595. /* Decrement device open request counter */
  596. ibdev->open_count--;
  597. /* Close device if this was the last remaining requested opening */
  598. if ( ibdev->open_count == 0 ) {
  599. ib_notify ( ibdev );
  600. list_del ( &ibdev->open_list );
  601. ib_destroy_mi ( ibdev, ibdev->gsi );
  602. ib_destroy_sma ( ibdev, ibdev->smi );
  603. ib_destroy_mi ( ibdev, ibdev->smi );
  604. ibdev->op->close ( ibdev );
  605. ibdev->port_state = IB_PORT_STATE_DOWN;
  606. }
  607. }
  608. /***************************************************************************
  609. *
  610. * Multicast
  611. *
  612. ***************************************************************************
  613. */
  614. /**
  615. * Attach to multicast group
  616. *
  617. * @v ibdev Infiniband device
  618. * @v qp Queue pair
  619. * @v gid Multicast GID
  620. * @ret rc Return status code
  621. *
  622. * Note that this function handles only the local device's attachment
  623. * to the multicast GID; it does not issue the relevant MADs to join
  624. * the multicast group on the subnet.
  625. */
  626. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  627. union ib_gid *gid ) {
  628. struct ib_multicast_gid *mgid;
  629. int rc;
  630. /* Add to software multicast GID list */
  631. mgid = zalloc ( sizeof ( *mgid ) );
  632. if ( ! mgid ) {
  633. rc = -ENOMEM;
  634. goto err_alloc_mgid;
  635. }
  636. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  637. list_add ( &mgid->list, &qp->mgids );
  638. /* Add to hardware multicast GID list */
  639. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  640. goto err_dev_mcast_attach;
  641. return 0;
  642. err_dev_mcast_attach:
  643. list_del ( &mgid->list );
  644. free ( mgid );
  645. err_alloc_mgid:
  646. return rc;
  647. }
  648. /**
  649. * Detach from multicast group
  650. *
  651. * @v ibdev Infiniband device
  652. * @v qp Queue pair
  653. * @v gid Multicast GID
  654. */
  655. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  656. union ib_gid *gid ) {
  657. struct ib_multicast_gid *mgid;
  658. /* Remove from hardware multicast GID list */
  659. ibdev->op->mcast_detach ( ibdev, qp, gid );
  660. /* Remove from software multicast GID list */
  661. list_for_each_entry ( mgid, &qp->mgids, list ) {
  662. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  663. list_del ( &mgid->list );
  664. free ( mgid );
  665. break;
  666. }
  667. }
  668. }
  669. /***************************************************************************
  670. *
  671. * Miscellaneous
  672. *
  673. ***************************************************************************
  674. */
  675. /**
  676. * Count Infiniband HCA ports
  677. *
  678. * @v ibdev Infiniband device
  679. * @ret num_ports Number of ports
  680. */
  681. int ib_count_ports ( struct ib_device *ibdev ) {
  682. struct ib_device *tmp;
  683. int num_ports = 0;
  684. /* Search for IB devices with the same physical device to
  685. * identify port count.
  686. */
  687. for_each_ibdev ( tmp ) {
  688. if ( tmp->dev == ibdev->dev )
  689. num_ports++;
  690. }
  691. return num_ports;
  692. }
  693. /**
  694. * Set port information
  695. *
  696. * @v ibdev Infiniband device
  697. * @v mad Set port information MAD
  698. */
  699. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  700. int rc;
  701. /* Adapters with embedded SMAs do not need to support this method */
  702. if ( ! ibdev->op->set_port_info ) {
  703. DBGC ( ibdev, "IBDEV %p does not support setting port "
  704. "information\n", ibdev );
  705. return -ENOTSUP;
  706. }
  707. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  708. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  709. ibdev, strerror ( rc ) );
  710. return rc;
  711. }
  712. return 0;
  713. };
  714. /**
  715. * Set partition key table
  716. *
  717. * @v ibdev Infiniband device
  718. * @v mad Set partition key table MAD
  719. */
  720. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  721. int rc;
  722. /* Adapters with embedded SMAs do not need to support this method */
  723. if ( ! ibdev->op->set_pkey_table ) {
  724. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  725. "key table\n", ibdev );
  726. return -ENOTSUP;
  727. }
  728. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  729. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  730. "%s\n", ibdev, strerror ( rc ) );
  731. return rc;
  732. }
  733. return 0;
  734. };
  735. /***************************************************************************
  736. *
  737. * Event queues
  738. *
  739. ***************************************************************************
  740. */
  741. /**
  742. * Poll event queue
  743. *
  744. * @v ibdev Infiniband device
  745. */
  746. void ib_poll_eq ( struct ib_device *ibdev ) {
  747. struct ib_completion_queue *cq;
  748. /* Poll device's event queue */
  749. ibdev->op->poll_eq ( ibdev );
  750. /* Poll all completion queues */
  751. list_for_each_entry ( cq, &ibdev->cqs, list )
  752. ib_poll_cq ( ibdev, cq );
  753. }
  754. /**
  755. * Single-step the Infiniband event queue
  756. *
  757. * @v process Infiniband event queue process
  758. */
  759. static void ib_step ( struct process *process __unused ) {
  760. struct ib_device *ibdev;
  761. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  762. ib_poll_eq ( ibdev );
  763. }
  764. /** Infiniband event queue process */
  765. PERMANENT_PROCESS ( ib_process, ib_step );
  766. /***************************************************************************
  767. *
  768. * Infiniband device creation/destruction
  769. *
  770. ***************************************************************************
  771. */
  772. /**
  773. * Allocate Infiniband device
  774. *
  775. * @v priv_size Size of driver private data area
  776. * @ret ibdev Infiniband device, or NULL
  777. */
  778. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  779. struct ib_device *ibdev;
  780. void *drv_priv;
  781. size_t total_len;
  782. total_len = ( sizeof ( *ibdev ) + priv_size );
  783. ibdev = zalloc ( total_len );
  784. if ( ibdev ) {
  785. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  786. ib_set_drvdata ( ibdev, drv_priv );
  787. INIT_LIST_HEAD ( &ibdev->list );
  788. INIT_LIST_HEAD ( &ibdev->open_list );
  789. INIT_LIST_HEAD ( &ibdev->cqs );
  790. INIT_LIST_HEAD ( &ibdev->qps );
  791. ibdev->port_state = IB_PORT_STATE_DOWN;
  792. ibdev->lid = IB_LID_NONE;
  793. ibdev->pkey = IB_PKEY_DEFAULT;
  794. }
  795. return ibdev;
  796. }
  797. /**
  798. * Register Infiniband device
  799. *
  800. * @v ibdev Infiniband device
  801. * @ret rc Return status code
  802. */
  803. int register_ibdev ( struct ib_device *ibdev ) {
  804. struct ib_driver *driver;
  805. int rc;
  806. /* Add to device list */
  807. ibdev_get ( ibdev );
  808. list_add_tail ( &ibdev->list, &ib_devices );
  809. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  810. ibdev->dev->name );
  811. /* Probe device */
  812. for_each_table_entry ( driver, IB_DRIVERS ) {
  813. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  814. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  815. ibdev, driver->name, strerror ( rc ) );
  816. goto err_probe;
  817. }
  818. }
  819. return 0;
  820. err_probe:
  821. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  822. driver->remove ( ibdev );
  823. list_del ( &ibdev->list );
  824. ibdev_put ( ibdev );
  825. return rc;
  826. }
  827. /**
  828. * Unregister Infiniband device
  829. *
  830. * @v ibdev Infiniband device
  831. */
  832. void unregister_ibdev ( struct ib_device *ibdev ) {
  833. struct ib_driver *driver;
  834. /* Remove device */
  835. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  836. driver->remove ( ibdev );
  837. /* Remove from device list */
  838. list_del ( &ibdev->list );
  839. ibdev_put ( ibdev );
  840. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  841. }
  842. /**
  843. * Find Infiniband device by GID
  844. *
  845. * @v gid GID
  846. * @ret ibdev Infiniband device, or NULL
  847. */
  848. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  849. struct ib_device *ibdev;
  850. for_each_ibdev ( ibdev ) {
  851. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  852. return ibdev;
  853. }
  854. return NULL;
  855. }
  856. /**
  857. * Get most recently opened Infiniband device
  858. *
  859. * @ret ibdev Most recently opened Infiniband device, or NULL
  860. */
  861. struct ib_device * last_opened_ibdev ( void ) {
  862. struct ib_device *ibdev;
  863. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  864. open_list );
  865. if ( ! ibdev )
  866. return NULL;
  867. assert ( ibdev->open_count != 0 );
  868. return ibdev;
  869. }
  870. /* Drag in IPoIB */
  871. REQUIRE_OBJECT ( ipoib );