You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <gpxe/list.h>
  28. #include <gpxe/errortab.h>
  29. #include <gpxe/if_arp.h>
  30. #include <gpxe/netdevice.h>
  31. #include <gpxe/iobuf.h>
  32. #include <gpxe/ipoib.h>
  33. #include <gpxe/process.h>
  34. #include <gpxe/infiniband.h>
  35. #include <gpxe/ib_mi.h>
  36. #include <gpxe/ib_sma.h>
  37. /** @file
  38. *
  39. * Infiniband protocol
  40. *
  41. */
  42. /** List of Infiniband devices */
  43. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  44. /** List of open Infiniband devices, in reverse order of opening */
  45. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  46. /* Disambiguate the various possible EINPROGRESSes */
  47. #define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 )
  48. #define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 )
  49. /** Human-readable message for the link statuses */
  50. struct errortab infiniband_errors[] __errortab = {
  51. { EINPROGRESS_INIT, "Initialising" },
  52. { EINPROGRESS_ARMED, "Armed" },
  53. };
  54. /***************************************************************************
  55. *
  56. * Completion queues
  57. *
  58. ***************************************************************************
  59. */
  60. /**
  61. * Create completion queue
  62. *
  63. * @v ibdev Infiniband device
  64. * @v num_cqes Number of completion queue entries
  65. * @v op Completion queue operations
  66. * @ret cq New completion queue
  67. */
  68. struct ib_completion_queue *
  69. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  70. struct ib_completion_queue_operations *op ) {
  71. struct ib_completion_queue *cq;
  72. int rc;
  73. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  74. /* Allocate and initialise data structure */
  75. cq = zalloc ( sizeof ( *cq ) );
  76. if ( ! cq )
  77. goto err_alloc_cq;
  78. cq->ibdev = ibdev;
  79. list_add ( &cq->list, &ibdev->cqs );
  80. cq->num_cqes = num_cqes;
  81. INIT_LIST_HEAD ( &cq->work_queues );
  82. cq->op = op;
  83. /* Perform device-specific initialisation and get CQN */
  84. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  85. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  86. "queue: %s\n", ibdev, strerror ( rc ) );
  87. goto err_dev_create_cq;
  88. }
  89. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  90. "with CQN %#lx\n", ibdev, num_cqes, cq,
  91. ib_cq_get_drvdata ( cq ), cq->cqn );
  92. return cq;
  93. ibdev->op->destroy_cq ( ibdev, cq );
  94. err_dev_create_cq:
  95. list_del ( &cq->list );
  96. free ( cq );
  97. err_alloc_cq:
  98. return NULL;
  99. }
  100. /**
  101. * Destroy completion queue
  102. *
  103. * @v ibdev Infiniband device
  104. * @v cq Completion queue
  105. */
  106. void ib_destroy_cq ( struct ib_device *ibdev,
  107. struct ib_completion_queue *cq ) {
  108. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  109. ibdev, cq->cqn );
  110. assert ( list_empty ( &cq->work_queues ) );
  111. ibdev->op->destroy_cq ( ibdev, cq );
  112. list_del ( &cq->list );
  113. free ( cq );
  114. }
  115. /**
  116. * Poll completion queue
  117. *
  118. * @v ibdev Infiniband device
  119. * @v cq Completion queue
  120. */
  121. void ib_poll_cq ( struct ib_device *ibdev,
  122. struct ib_completion_queue *cq ) {
  123. struct ib_work_queue *wq;
  124. /* Poll completion queue */
  125. ibdev->op->poll_cq ( ibdev, cq );
  126. /* Refill receive work queues */
  127. list_for_each_entry ( wq, &cq->work_queues, list ) {
  128. if ( ! wq->is_send )
  129. ib_refill_recv ( ibdev, wq->qp );
  130. }
  131. }
  132. /***************************************************************************
  133. *
  134. * Work queues
  135. *
  136. ***************************************************************************
  137. */
  138. /**
  139. * Create queue pair
  140. *
  141. * @v ibdev Infiniband device
  142. * @v type Queue pair type
  143. * @v num_send_wqes Number of send work queue entries
  144. * @v send_cq Send completion queue
  145. * @v num_recv_wqes Number of receive work queue entries
  146. * @v recv_cq Receive completion queue
  147. * @ret qp Queue pair
  148. *
  149. * The queue pair will be left in the INIT state; you must call
  150. * ib_modify_qp() before it is ready to use for sending and receiving.
  151. */
  152. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  153. enum ib_queue_pair_type type,
  154. unsigned int num_send_wqes,
  155. struct ib_completion_queue *send_cq,
  156. unsigned int num_recv_wqes,
  157. struct ib_completion_queue *recv_cq ) {
  158. struct ib_queue_pair *qp;
  159. size_t total_size;
  160. int rc;
  161. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  162. /* Allocate and initialise data structure */
  163. total_size = ( sizeof ( *qp ) +
  164. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  165. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  166. qp = zalloc ( total_size );
  167. if ( ! qp )
  168. goto err_alloc_qp;
  169. qp->ibdev = ibdev;
  170. list_add ( &qp->list, &ibdev->qps );
  171. qp->type = type;
  172. qp->send.qp = qp;
  173. qp->send.is_send = 1;
  174. qp->send.cq = send_cq;
  175. list_add ( &qp->send.list, &send_cq->work_queues );
  176. qp->send.psn = ( random() & 0xffffffUL );
  177. qp->send.num_wqes = num_send_wqes;
  178. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  179. qp->recv.qp = qp;
  180. qp->recv.cq = recv_cq;
  181. list_add ( &qp->recv.list, &recv_cq->work_queues );
  182. qp->recv.psn = ( random() & 0xffffffUL );
  183. qp->recv.num_wqes = num_recv_wqes;
  184. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  185. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  186. INIT_LIST_HEAD ( &qp->mgids );
  187. /* Perform device-specific initialisation and get QPN */
  188. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  189. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  190. "%s\n", ibdev, strerror ( rc ) );
  191. goto err_dev_create_qp;
  192. }
  193. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  194. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  195. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  196. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  197. qp->recv.iobufs );
  198. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  199. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  200. ( ( ( void * ) qp ) + total_size ) );
  201. /* Calculate externally-visible QPN */
  202. switch ( type ) {
  203. case IB_QPT_SMI:
  204. qp->ext_qpn = IB_QPN_SMI;
  205. break;
  206. case IB_QPT_GSI:
  207. qp->ext_qpn = IB_QPN_GSI;
  208. break;
  209. default:
  210. qp->ext_qpn = qp->qpn;
  211. break;
  212. }
  213. if ( qp->ext_qpn != qp->qpn ) {
  214. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  215. ibdev, qp->qpn, qp->ext_qpn );
  216. }
  217. return qp;
  218. ibdev->op->destroy_qp ( ibdev, qp );
  219. err_dev_create_qp:
  220. list_del ( &qp->send.list );
  221. list_del ( &qp->recv.list );
  222. list_del ( &qp->list );
  223. free ( qp );
  224. err_alloc_qp:
  225. return NULL;
  226. }
  227. /**
  228. * Modify queue pair
  229. *
  230. * @v ibdev Infiniband device
  231. * @v qp Queue pair
  232. * @v av New address vector, if applicable
  233. * @ret rc Return status code
  234. */
  235. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  236. int rc;
  237. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  238. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  239. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  240. ibdev, qp->qpn, strerror ( rc ) );
  241. return rc;
  242. }
  243. return 0;
  244. }
  245. /**
  246. * Destroy queue pair
  247. *
  248. * @v ibdev Infiniband device
  249. * @v qp Queue pair
  250. */
  251. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  252. struct io_buffer *iobuf;
  253. unsigned int i;
  254. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  255. ibdev, qp->qpn );
  256. assert ( list_empty ( &qp->mgids ) );
  257. /* Perform device-specific destruction */
  258. ibdev->op->destroy_qp ( ibdev, qp );
  259. /* Complete any remaining I/O buffers with errors */
  260. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  261. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  262. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  263. }
  264. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  265. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  266. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  267. -ECANCELED );
  268. }
  269. }
  270. /* Remove work queues from completion queue */
  271. list_del ( &qp->send.list );
  272. list_del ( &qp->recv.list );
  273. /* Free QP */
  274. list_del ( &qp->list );
  275. free ( qp );
  276. }
  277. /**
  278. * Find queue pair by QPN
  279. *
  280. * @v ibdev Infiniband device
  281. * @v qpn Queue pair number
  282. * @ret qp Queue pair, or NULL
  283. */
  284. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  285. unsigned long qpn ) {
  286. struct ib_queue_pair *qp;
  287. list_for_each_entry ( qp, &ibdev->qps, list ) {
  288. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  289. return qp;
  290. }
  291. return NULL;
  292. }
  293. /**
  294. * Find queue pair by multicast GID
  295. *
  296. * @v ibdev Infiniband device
  297. * @v gid Multicast GID
  298. * @ret qp Queue pair, or NULL
  299. */
  300. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  301. struct ib_gid *gid ) {
  302. struct ib_queue_pair *qp;
  303. struct ib_multicast_gid *mgid;
  304. list_for_each_entry ( qp, &ibdev->qps, list ) {
  305. list_for_each_entry ( mgid, &qp->mgids, list ) {
  306. if ( memcmp ( &mgid->gid, gid,
  307. sizeof ( mgid->gid ) ) == 0 ) {
  308. return qp;
  309. }
  310. }
  311. }
  312. return NULL;
  313. }
  314. /**
  315. * Find work queue belonging to completion queue
  316. *
  317. * @v cq Completion queue
  318. * @v qpn Queue pair number
  319. * @v is_send Find send work queue (rather than receive)
  320. * @ret wq Work queue, or NULL if not found
  321. */
  322. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  323. unsigned long qpn, int is_send ) {
  324. struct ib_work_queue *wq;
  325. list_for_each_entry ( wq, &cq->work_queues, list ) {
  326. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  327. return wq;
  328. }
  329. return NULL;
  330. }
  331. /**
  332. * Post send work queue entry
  333. *
  334. * @v ibdev Infiniband device
  335. * @v qp Queue pair
  336. * @v av Address vector
  337. * @v iobuf I/O buffer
  338. * @ret rc Return status code
  339. */
  340. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  341. struct ib_address_vector *av,
  342. struct io_buffer *iobuf ) {
  343. struct ib_address_vector av_copy;
  344. int rc;
  345. /* Check queue fill level */
  346. if ( qp->send.fill >= qp->send.num_wqes ) {
  347. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  348. ibdev, qp->qpn );
  349. return -ENOBUFS;
  350. }
  351. /* Use default address vector if none specified */
  352. if ( ! av )
  353. av = &qp->av;
  354. /* Make modifiable copy of address vector */
  355. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  356. av = &av_copy;
  357. /* Fill in optional parameters in address vector */
  358. if ( ! av->qkey )
  359. av->qkey = qp->qkey;
  360. if ( ! av->rate )
  361. av->rate = IB_RATE_2_5;
  362. /* Post to hardware */
  363. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  364. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  365. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  366. return rc;
  367. }
  368. qp->send.fill++;
  369. return 0;
  370. }
  371. /**
  372. * Post receive work queue entry
  373. *
  374. * @v ibdev Infiniband device
  375. * @v qp Queue pair
  376. * @v iobuf I/O buffer
  377. * @ret rc Return status code
  378. */
  379. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  380. struct io_buffer *iobuf ) {
  381. int rc;
  382. /* Check packet length */
  383. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  384. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  385. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  386. return -EINVAL;
  387. }
  388. /* Check queue fill level */
  389. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  390. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  391. ibdev, qp->qpn );
  392. return -ENOBUFS;
  393. }
  394. /* Post to hardware */
  395. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  396. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  397. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  398. return rc;
  399. }
  400. qp->recv.fill++;
  401. return 0;
  402. }
  403. /**
  404. * Complete send work queue entry
  405. *
  406. * @v ibdev Infiniband device
  407. * @v qp Queue pair
  408. * @v iobuf I/O buffer
  409. * @v rc Completion status code
  410. */
  411. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  412. struct io_buffer *iobuf, int rc ) {
  413. if ( qp->send.cq->op->complete_send ) {
  414. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  415. } else {
  416. free_iob ( iobuf );
  417. }
  418. qp->send.fill--;
  419. }
  420. /**
  421. * Complete receive work queue entry
  422. *
  423. * @v ibdev Infiniband device
  424. * @v qp Queue pair
  425. * @v av Address vector
  426. * @v iobuf I/O buffer
  427. * @v rc Completion status code
  428. */
  429. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  430. struct ib_address_vector *av,
  431. struct io_buffer *iobuf, int rc ) {
  432. if ( qp->recv.cq->op->complete_recv ) {
  433. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  434. } else {
  435. free_iob ( iobuf );
  436. }
  437. qp->recv.fill--;
  438. }
  439. /**
  440. * Refill receive work queue
  441. *
  442. * @v ibdev Infiniband device
  443. * @v qp Queue pair
  444. */
  445. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  446. struct io_buffer *iobuf;
  447. int rc;
  448. /* Keep filling while unfilled entries remain */
  449. while ( qp->recv.fill < qp->recv.num_wqes ) {
  450. /* Allocate I/O buffer */
  451. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  452. if ( ! iobuf ) {
  453. /* Non-fatal; we will refill on next attempt */
  454. return;
  455. }
  456. /* Post I/O buffer */
  457. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  458. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  459. ibdev, strerror ( rc ) );
  460. free_iob ( iobuf );
  461. /* Give up */
  462. return;
  463. }
  464. }
  465. }
  466. /***************************************************************************
  467. *
  468. * Link control
  469. *
  470. ***************************************************************************
  471. */
  472. /**
  473. * Open port
  474. *
  475. * @v ibdev Infiniband device
  476. * @ret rc Return status code
  477. */
  478. int ib_open ( struct ib_device *ibdev ) {
  479. int rc;
  480. /* Increment device open request counter */
  481. if ( ibdev->open_count++ > 0 ) {
  482. /* Device was already open; do nothing */
  483. return 0;
  484. }
  485. /* Create subnet management interface */
  486. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  487. if ( ! ibdev->smi ) {
  488. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  489. rc = -ENOMEM;
  490. goto err_create_smi;
  491. }
  492. /* Create subnet management agent */
  493. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  494. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  495. ibdev, strerror ( rc ) );
  496. goto err_create_sma;
  497. }
  498. /* Create general services interface */
  499. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  500. if ( ! ibdev->gsi ) {
  501. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  502. rc = -ENOMEM;
  503. goto err_create_gsi;
  504. }
  505. /* Open device */
  506. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  507. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  508. ibdev, strerror ( rc ) );
  509. goto err_open;
  510. }
  511. /* Add to head of open devices list */
  512. list_add ( &ibdev->open_list, &open_ib_devices );
  513. assert ( ibdev->open_count == 1 );
  514. return 0;
  515. ibdev->op->close ( ibdev );
  516. err_open:
  517. ib_destroy_mi ( ibdev, ibdev->gsi );
  518. err_create_gsi:
  519. ib_destroy_sma ( ibdev, ibdev->smi );
  520. err_create_sma:
  521. ib_destroy_mi ( ibdev, ibdev->smi );
  522. err_create_smi:
  523. assert ( ibdev->open_count == 1 );
  524. ibdev->open_count = 0;
  525. return rc;
  526. }
  527. /**
  528. * Close port
  529. *
  530. * @v ibdev Infiniband device
  531. */
  532. void ib_close ( struct ib_device *ibdev ) {
  533. /* Decrement device open request counter */
  534. ibdev->open_count--;
  535. /* Close device if this was the last remaining requested opening */
  536. if ( ibdev->open_count == 0 ) {
  537. list_del ( &ibdev->open_list );
  538. ib_destroy_mi ( ibdev, ibdev->gsi );
  539. ib_destroy_sma ( ibdev, ibdev->smi );
  540. ib_destroy_mi ( ibdev, ibdev->smi );
  541. ibdev->op->close ( ibdev );
  542. }
  543. }
  544. /**
  545. * Get link state
  546. *
  547. * @v ibdev Infiniband device
  548. * @ret rc Link status code
  549. */
  550. int ib_link_rc ( struct ib_device *ibdev ) {
  551. switch ( ibdev->port_state ) {
  552. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  553. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  554. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  555. case IB_PORT_STATE_ACTIVE: return 0;
  556. default: return -EINVAL;
  557. }
  558. }
  559. /***************************************************************************
  560. *
  561. * Multicast
  562. *
  563. ***************************************************************************
  564. */
  565. /**
  566. * Attach to multicast group
  567. *
  568. * @v ibdev Infiniband device
  569. * @v qp Queue pair
  570. * @v gid Multicast GID
  571. * @ret rc Return status code
  572. *
  573. * Note that this function handles only the local device's attachment
  574. * to the multicast GID; it does not issue the relevant MADs to join
  575. * the multicast group on the subnet.
  576. */
  577. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  578. struct ib_gid *gid ) {
  579. struct ib_multicast_gid *mgid;
  580. int rc;
  581. /* Add to software multicast GID list */
  582. mgid = zalloc ( sizeof ( *mgid ) );
  583. if ( ! mgid ) {
  584. rc = -ENOMEM;
  585. goto err_alloc_mgid;
  586. }
  587. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  588. list_add ( &mgid->list, &qp->mgids );
  589. /* Add to hardware multicast GID list */
  590. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  591. goto err_dev_mcast_attach;
  592. return 0;
  593. err_dev_mcast_attach:
  594. list_del ( &mgid->list );
  595. free ( mgid );
  596. err_alloc_mgid:
  597. return rc;
  598. }
  599. /**
  600. * Detach from multicast group
  601. *
  602. * @v ibdev Infiniband device
  603. * @v qp Queue pair
  604. * @v gid Multicast GID
  605. */
  606. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  607. struct ib_gid *gid ) {
  608. struct ib_multicast_gid *mgid;
  609. /* Remove from hardware multicast GID list */
  610. ibdev->op->mcast_detach ( ibdev, qp, gid );
  611. /* Remove from software multicast GID list */
  612. list_for_each_entry ( mgid, &qp->mgids, list ) {
  613. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  614. list_del ( &mgid->list );
  615. free ( mgid );
  616. break;
  617. }
  618. }
  619. }
  620. /***************************************************************************
  621. *
  622. * Miscellaneous
  623. *
  624. ***************************************************************************
  625. */
  626. /**
  627. * Get Infiniband HCA information
  628. *
  629. * @v ibdev Infiniband device
  630. * @ret hca_guid HCA GUID
  631. * @ret num_ports Number of ports
  632. */
  633. int ib_get_hca_info ( struct ib_device *ibdev,
  634. struct ib_gid_half *hca_guid ) {
  635. struct ib_device *tmp;
  636. int num_ports = 0;
  637. /* Search for IB devices with the same physical device to
  638. * identify port count and a suitable Node GUID.
  639. */
  640. for_each_ibdev ( tmp ) {
  641. if ( tmp->dev != ibdev->dev )
  642. continue;
  643. if ( num_ports == 0 ) {
  644. memcpy ( hca_guid, &tmp->gid.u.half[1],
  645. sizeof ( *hca_guid ) );
  646. }
  647. num_ports++;
  648. }
  649. return num_ports;
  650. }
  651. /**
  652. * Set port information
  653. *
  654. * @v ibdev Infiniband device
  655. * @v mad Set port information MAD
  656. */
  657. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  658. int rc;
  659. /* Adapters with embedded SMAs do not need to support this method */
  660. if ( ! ibdev->op->set_port_info ) {
  661. DBGC ( ibdev, "IBDEV %p does not support setting port "
  662. "information\n", ibdev );
  663. return -ENOTSUP;
  664. }
  665. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  666. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  667. ibdev, strerror ( rc ) );
  668. return rc;
  669. }
  670. return 0;
  671. };
  672. /**
  673. * Set partition key table
  674. *
  675. * @v ibdev Infiniband device
  676. * @v mad Set partition key table MAD
  677. */
  678. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  679. int rc;
  680. /* Adapters with embedded SMAs do not need to support this method */
  681. if ( ! ibdev->op->set_pkey_table ) {
  682. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  683. "key table\n", ibdev );
  684. return -ENOTSUP;
  685. }
  686. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  687. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  688. "%s\n", ibdev, strerror ( rc ) );
  689. return rc;
  690. }
  691. return 0;
  692. };
  693. /***************************************************************************
  694. *
  695. * Event queues
  696. *
  697. ***************************************************************************
  698. */
  699. /**
  700. * Handle Infiniband link state change
  701. *
  702. * @v ibdev Infiniband device
  703. */
  704. void ib_link_state_changed ( struct ib_device *ibdev ) {
  705. /* Notify IPoIB of link state change */
  706. ipoib_link_state_changed ( ibdev );
  707. }
  708. /**
  709. * Poll event queue
  710. *
  711. * @v ibdev Infiniband device
  712. */
  713. void ib_poll_eq ( struct ib_device *ibdev ) {
  714. struct ib_completion_queue *cq;
  715. /* Poll device's event queue */
  716. ibdev->op->poll_eq ( ibdev );
  717. /* Poll all completion queues */
  718. list_for_each_entry ( cq, &ibdev->cqs, list )
  719. ib_poll_cq ( ibdev, cq );
  720. }
  721. /**
  722. * Single-step the Infiniband event queue
  723. *
  724. * @v process Infiniband event queue process
  725. */
  726. static void ib_step ( struct process *process __unused ) {
  727. struct ib_device *ibdev;
  728. for_each_ibdev ( ibdev )
  729. ib_poll_eq ( ibdev );
  730. }
  731. /** Infiniband event queue process */
  732. struct process ib_process __permanent_process = {
  733. .list = LIST_HEAD_INIT ( ib_process.list ),
  734. .step = ib_step,
  735. };
  736. /***************************************************************************
  737. *
  738. * Infiniband device creation/destruction
  739. *
  740. ***************************************************************************
  741. */
  742. /**
  743. * Allocate Infiniband device
  744. *
  745. * @v priv_size Size of driver private data area
  746. * @ret ibdev Infiniband device, or NULL
  747. */
  748. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  749. struct ib_device *ibdev;
  750. void *drv_priv;
  751. size_t total_len;
  752. total_len = ( sizeof ( *ibdev ) + priv_size );
  753. ibdev = zalloc ( total_len );
  754. if ( ibdev ) {
  755. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  756. ib_set_drvdata ( ibdev, drv_priv );
  757. INIT_LIST_HEAD ( &ibdev->cqs );
  758. INIT_LIST_HEAD ( &ibdev->qps );
  759. ibdev->port_state = IB_PORT_STATE_DOWN;
  760. ibdev->lid = IB_LID_NONE;
  761. ibdev->pkey = IB_PKEY_DEFAULT;
  762. }
  763. return ibdev;
  764. }
  765. /**
  766. * Register Infiniband device
  767. *
  768. * @v ibdev Infiniband device
  769. * @ret rc Return status code
  770. */
  771. int register_ibdev ( struct ib_device *ibdev ) {
  772. int rc;
  773. /* Add to device list */
  774. ibdev_get ( ibdev );
  775. list_add_tail ( &ibdev->list, &ib_devices );
  776. /* Add IPoIB device */
  777. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  778. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  779. ibdev, strerror ( rc ) );
  780. goto err_ipoib_probe;
  781. }
  782. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  783. ibdev->dev->name );
  784. return 0;
  785. err_ipoib_probe:
  786. list_del ( &ibdev->list );
  787. ibdev_put ( ibdev );
  788. return rc;
  789. }
  790. /**
  791. * Unregister Infiniband device
  792. *
  793. * @v ibdev Infiniband device
  794. */
  795. void unregister_ibdev ( struct ib_device *ibdev ) {
  796. /* Close device */
  797. ipoib_remove ( ibdev );
  798. /* Remove from device list */
  799. list_del ( &ibdev->list );
  800. ibdev_put ( ibdev );
  801. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  802. }
  803. /**
  804. * Find Infiniband device by GID
  805. *
  806. * @v gid GID
  807. * @ret ibdev Infiniband device, or NULL
  808. */
  809. struct ib_device * find_ibdev ( struct ib_gid *gid ) {
  810. struct ib_device *ibdev;
  811. for_each_ibdev ( ibdev ) {
  812. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  813. return ibdev;
  814. }
  815. return NULL;
  816. }
  817. /**
  818. * Get most recently opened Infiniband device
  819. *
  820. * @ret ibdev Most recently opened Infiniband device, or NULL
  821. */
  822. struct ib_device * last_opened_ibdev ( void ) {
  823. struct ib_device *ibdev;
  824. list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
  825. assert ( ibdev->open_count != 0 );
  826. return ibdev;
  827. }
  828. return NULL;
  829. }