Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

infiniband.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <ipxe/list.h>
  28. #include <ipxe/errortab.h>
  29. #include <ipxe/if_arp.h>
  30. #include <ipxe/netdevice.h>
  31. #include <ipxe/iobuf.h>
  32. #include <ipxe/process.h>
  33. #include <ipxe/infiniband.h>
  34. #include <ipxe/ib_mi.h>
  35. #include <ipxe/ib_sma.h>
  36. /** @file
  37. *
  38. * Infiniband protocol
  39. *
  40. */
  41. /** List of Infiniband devices */
  42. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  43. /** List of open Infiniband devices, in reverse order of opening */
  44. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  45. /* Disambiguate the various possible EINPROGRESSes */
  46. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  47. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  48. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  49. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  50. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  51. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  52. /** Human-readable message for the link statuses */
  53. struct errortab infiniband_errors[] __errortab = {
  54. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  55. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  56. };
  57. /***************************************************************************
  58. *
  59. * Completion queues
  60. *
  61. ***************************************************************************
  62. */
  63. /**
  64. * Create completion queue
  65. *
  66. * @v ibdev Infiniband device
  67. * @v num_cqes Number of completion queue entries
  68. * @v op Completion queue operations
  69. * @ret cq New completion queue
  70. */
  71. struct ib_completion_queue *
  72. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  73. struct ib_completion_queue_operations *op ) {
  74. struct ib_completion_queue *cq;
  75. int rc;
  76. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  77. /* Allocate and initialise data structure */
  78. cq = zalloc ( sizeof ( *cq ) );
  79. if ( ! cq )
  80. goto err_alloc_cq;
  81. cq->ibdev = ibdev;
  82. list_add ( &cq->list, &ibdev->cqs );
  83. cq->num_cqes = num_cqes;
  84. INIT_LIST_HEAD ( &cq->work_queues );
  85. cq->op = op;
  86. /* Perform device-specific initialisation and get CQN */
  87. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  88. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  89. "queue: %s\n", ibdev, strerror ( rc ) );
  90. goto err_dev_create_cq;
  91. }
  92. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  93. "with CQN %#lx\n", ibdev, num_cqes, cq,
  94. ib_cq_get_drvdata ( cq ), cq->cqn );
  95. return cq;
  96. ibdev->op->destroy_cq ( ibdev, cq );
  97. err_dev_create_cq:
  98. list_del ( &cq->list );
  99. free ( cq );
  100. err_alloc_cq:
  101. return NULL;
  102. }
  103. /**
  104. * Destroy completion queue
  105. *
  106. * @v ibdev Infiniband device
  107. * @v cq Completion queue
  108. */
  109. void ib_destroy_cq ( struct ib_device *ibdev,
  110. struct ib_completion_queue *cq ) {
  111. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  112. ibdev, cq->cqn );
  113. assert ( list_empty ( &cq->work_queues ) );
  114. ibdev->op->destroy_cq ( ibdev, cq );
  115. list_del ( &cq->list );
  116. free ( cq );
  117. }
  118. /**
  119. * Poll completion queue
  120. *
  121. * @v ibdev Infiniband device
  122. * @v cq Completion queue
  123. */
  124. void ib_poll_cq ( struct ib_device *ibdev,
  125. struct ib_completion_queue *cq ) {
  126. struct ib_work_queue *wq;
  127. /* Poll completion queue */
  128. ibdev->op->poll_cq ( ibdev, cq );
  129. /* Refill receive work queues */
  130. list_for_each_entry ( wq, &cq->work_queues, list ) {
  131. if ( ! wq->is_send )
  132. ib_refill_recv ( ibdev, wq->qp );
  133. }
  134. }
  135. /***************************************************************************
  136. *
  137. * Work queues
  138. *
  139. ***************************************************************************
  140. */
  141. /**
  142. * Create queue pair
  143. *
  144. * @v ibdev Infiniband device
  145. * @v type Queue pair type
  146. * @v num_send_wqes Number of send work queue entries
  147. * @v send_cq Send completion queue
  148. * @v num_recv_wqes Number of receive work queue entries
  149. * @v recv_cq Receive completion queue
  150. * @ret qp Queue pair
  151. *
  152. * The queue pair will be left in the INIT state; you must call
  153. * ib_modify_qp() before it is ready to use for sending and receiving.
  154. */
  155. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  156. enum ib_queue_pair_type type,
  157. unsigned int num_send_wqes,
  158. struct ib_completion_queue *send_cq,
  159. unsigned int num_recv_wqes,
  160. struct ib_completion_queue *recv_cq ) {
  161. struct ib_queue_pair *qp;
  162. size_t total_size;
  163. int rc;
  164. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  165. /* Allocate and initialise data structure */
  166. total_size = ( sizeof ( *qp ) +
  167. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  168. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  169. qp = zalloc ( total_size );
  170. if ( ! qp )
  171. goto err_alloc_qp;
  172. qp->ibdev = ibdev;
  173. list_add ( &qp->list, &ibdev->qps );
  174. qp->type = type;
  175. qp->send.qp = qp;
  176. qp->send.is_send = 1;
  177. qp->send.cq = send_cq;
  178. list_add ( &qp->send.list, &send_cq->work_queues );
  179. qp->send.psn = ( random() & 0xffffffUL );
  180. qp->send.num_wqes = num_send_wqes;
  181. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  182. qp->recv.qp = qp;
  183. qp->recv.cq = recv_cq;
  184. list_add ( &qp->recv.list, &recv_cq->work_queues );
  185. qp->recv.psn = ( random() & 0xffffffUL );
  186. qp->recv.num_wqes = num_recv_wqes;
  187. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  188. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  189. INIT_LIST_HEAD ( &qp->mgids );
  190. /* Perform device-specific initialisation and get QPN */
  191. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  192. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  193. "%s\n", ibdev, strerror ( rc ) );
  194. goto err_dev_create_qp;
  195. }
  196. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  197. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  198. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  199. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  200. qp->recv.iobufs );
  201. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  202. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  203. ( ( ( void * ) qp ) + total_size ) );
  204. /* Calculate externally-visible QPN */
  205. switch ( type ) {
  206. case IB_QPT_SMI:
  207. qp->ext_qpn = IB_QPN_SMI;
  208. break;
  209. case IB_QPT_GSI:
  210. qp->ext_qpn = IB_QPN_GSI;
  211. break;
  212. default:
  213. qp->ext_qpn = qp->qpn;
  214. break;
  215. }
  216. if ( qp->ext_qpn != qp->qpn ) {
  217. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  218. ibdev, qp->qpn, qp->ext_qpn );
  219. }
  220. return qp;
  221. ibdev->op->destroy_qp ( ibdev, qp );
  222. err_dev_create_qp:
  223. list_del ( &qp->send.list );
  224. list_del ( &qp->recv.list );
  225. list_del ( &qp->list );
  226. free ( qp );
  227. err_alloc_qp:
  228. return NULL;
  229. }
  230. /**
  231. * Modify queue pair
  232. *
  233. * @v ibdev Infiniband device
  234. * @v qp Queue pair
  235. * @v av New address vector, if applicable
  236. * @ret rc Return status code
  237. */
  238. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  239. int rc;
  240. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  241. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  242. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  243. ibdev, qp->qpn, strerror ( rc ) );
  244. return rc;
  245. }
  246. return 0;
  247. }
  248. /**
  249. * Destroy queue pair
  250. *
  251. * @v ibdev Infiniband device
  252. * @v qp Queue pair
  253. */
  254. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  255. struct io_buffer *iobuf;
  256. unsigned int i;
  257. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  258. ibdev, qp->qpn );
  259. assert ( list_empty ( &qp->mgids ) );
  260. /* Perform device-specific destruction */
  261. ibdev->op->destroy_qp ( ibdev, qp );
  262. /* Complete any remaining I/O buffers with errors */
  263. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  264. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  265. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  266. }
  267. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  268. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  269. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  270. -ECANCELED );
  271. }
  272. }
  273. /* Remove work queues from completion queue */
  274. list_del ( &qp->send.list );
  275. list_del ( &qp->recv.list );
  276. /* Free QP */
  277. list_del ( &qp->list );
  278. free ( qp );
  279. }
  280. /**
  281. * Find queue pair by QPN
  282. *
  283. * @v ibdev Infiniband device
  284. * @v qpn Queue pair number
  285. * @ret qp Queue pair, or NULL
  286. */
  287. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  288. unsigned long qpn ) {
  289. struct ib_queue_pair *qp;
  290. list_for_each_entry ( qp, &ibdev->qps, list ) {
  291. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  292. return qp;
  293. }
  294. return NULL;
  295. }
  296. /**
  297. * Find queue pair by multicast GID
  298. *
  299. * @v ibdev Infiniband device
  300. * @v gid Multicast GID
  301. * @ret qp Queue pair, or NULL
  302. */
  303. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  304. union ib_gid *gid ) {
  305. struct ib_queue_pair *qp;
  306. struct ib_multicast_gid *mgid;
  307. list_for_each_entry ( qp, &ibdev->qps, list ) {
  308. list_for_each_entry ( mgid, &qp->mgids, list ) {
  309. if ( memcmp ( &mgid->gid, gid,
  310. sizeof ( mgid->gid ) ) == 0 ) {
  311. return qp;
  312. }
  313. }
  314. }
  315. return NULL;
  316. }
  317. /**
  318. * Find work queue belonging to completion queue
  319. *
  320. * @v cq Completion queue
  321. * @v qpn Queue pair number
  322. * @v is_send Find send work queue (rather than receive)
  323. * @ret wq Work queue, or NULL if not found
  324. */
  325. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  326. unsigned long qpn, int is_send ) {
  327. struct ib_work_queue *wq;
  328. list_for_each_entry ( wq, &cq->work_queues, list ) {
  329. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  330. return wq;
  331. }
  332. return NULL;
  333. }
  334. /**
  335. * Post send work queue entry
  336. *
  337. * @v ibdev Infiniband device
  338. * @v qp Queue pair
  339. * @v av Address vector
  340. * @v iobuf I/O buffer
  341. * @ret rc Return status code
  342. */
  343. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  344. struct ib_address_vector *av,
  345. struct io_buffer *iobuf ) {
  346. struct ib_address_vector av_copy;
  347. int rc;
  348. /* Check queue fill level */
  349. if ( qp->send.fill >= qp->send.num_wqes ) {
  350. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  351. ibdev, qp->qpn );
  352. return -ENOBUFS;
  353. }
  354. /* Use default address vector if none specified */
  355. if ( ! av )
  356. av = &qp->av;
  357. /* Make modifiable copy of address vector */
  358. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  359. av = &av_copy;
  360. /* Fill in optional parameters in address vector */
  361. if ( ! av->qkey )
  362. av->qkey = qp->qkey;
  363. if ( ! av->rate )
  364. av->rate = IB_RATE_2_5;
  365. /* Post to hardware */
  366. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  367. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  368. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  369. return rc;
  370. }
  371. qp->send.fill++;
  372. return 0;
  373. }
  374. /**
  375. * Post receive work queue entry
  376. *
  377. * @v ibdev Infiniband device
  378. * @v qp Queue pair
  379. * @v iobuf I/O buffer
  380. * @ret rc Return status code
  381. */
  382. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  383. struct io_buffer *iobuf ) {
  384. int rc;
  385. /* Check packet length */
  386. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  387. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  388. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  389. return -EINVAL;
  390. }
  391. /* Check queue fill level */
  392. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  393. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  394. ibdev, qp->qpn );
  395. return -ENOBUFS;
  396. }
  397. /* Post to hardware */
  398. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  399. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  400. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  401. return rc;
  402. }
  403. qp->recv.fill++;
  404. return 0;
  405. }
  406. /**
  407. * Complete send work queue entry
  408. *
  409. * @v ibdev Infiniband device
  410. * @v qp Queue pair
  411. * @v iobuf I/O buffer
  412. * @v rc Completion status code
  413. */
  414. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  415. struct io_buffer *iobuf, int rc ) {
  416. if ( qp->send.cq->op->complete_send ) {
  417. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  418. } else {
  419. free_iob ( iobuf );
  420. }
  421. qp->send.fill--;
  422. }
  423. /**
  424. * Complete receive work queue entry
  425. *
  426. * @v ibdev Infiniband device
  427. * @v qp Queue pair
  428. * @v av Address vector, or NULL
  429. * @v iobuf I/O buffer
  430. * @v rc Completion status code
  431. */
  432. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  433. struct ib_address_vector *av,
  434. struct io_buffer *iobuf, int rc ) {
  435. if ( qp->recv.cq->op->complete_recv ) {
  436. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  437. } else {
  438. free_iob ( iobuf );
  439. }
  440. qp->recv.fill--;
  441. }
  442. /**
  443. * Refill receive work queue
  444. *
  445. * @v ibdev Infiniband device
  446. * @v qp Queue pair
  447. */
  448. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  449. struct io_buffer *iobuf;
  450. int rc;
  451. /* Keep filling while unfilled entries remain */
  452. while ( qp->recv.fill < qp->recv.num_wqes ) {
  453. /* Allocate I/O buffer */
  454. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  455. if ( ! iobuf ) {
  456. /* Non-fatal; we will refill on next attempt */
  457. return;
  458. }
  459. /* Post I/O buffer */
  460. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  461. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  462. ibdev, strerror ( rc ) );
  463. free_iob ( iobuf );
  464. /* Give up */
  465. return;
  466. }
  467. }
  468. }
  469. /***************************************************************************
  470. *
  471. * Link control
  472. *
  473. ***************************************************************************
  474. */
  475. /**
  476. * Get link state
  477. *
  478. * @v ibdev Infiniband device
  479. * @ret rc Link status code
  480. */
  481. int ib_link_rc ( struct ib_device *ibdev ) {
  482. switch ( ibdev->port_state ) {
  483. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  484. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  485. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  486. case IB_PORT_STATE_ACTIVE: return 0;
  487. default: return -EINVAL;
  488. }
  489. }
  490. /**
  491. * Textual representation of Infiniband link state
  492. *
  493. * @v ibdev Infiniband device
  494. * @ret link_text Link state text
  495. */
  496. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  497. switch ( ibdev->port_state ) {
  498. case IB_PORT_STATE_DOWN: return "DOWN";
  499. case IB_PORT_STATE_INIT: return "INIT";
  500. case IB_PORT_STATE_ARMED: return "ARMED";
  501. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  502. default: return "UNKNOWN";
  503. }
  504. }
  505. /**
  506. * Notify drivers of Infiniband device or link state change
  507. *
  508. * @v ibdev Infiniband device
  509. */
  510. static void ib_notify ( struct ib_device *ibdev ) {
  511. struct ib_driver *driver;
  512. for_each_table_entry ( driver, IB_DRIVERS )
  513. driver->notify ( ibdev );
  514. }
  515. /**
  516. * Notify of Infiniband link state change
  517. *
  518. * @v ibdev Infiniband device
  519. */
  520. void ib_link_state_changed ( struct ib_device *ibdev ) {
  521. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  522. ibdev, ib_link_state_text ( ibdev ) );
  523. /* Notify drivers of link state change */
  524. ib_notify ( ibdev );
  525. }
  526. /**
  527. * Open port
  528. *
  529. * @v ibdev Infiniband device
  530. * @ret rc Return status code
  531. */
  532. int ib_open ( struct ib_device *ibdev ) {
  533. int rc;
  534. /* Increment device open request counter */
  535. if ( ibdev->open_count++ > 0 ) {
  536. /* Device was already open; do nothing */
  537. return 0;
  538. }
  539. /* Create subnet management interface */
  540. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  541. if ( ! ibdev->smi ) {
  542. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  543. rc = -ENOMEM;
  544. goto err_create_smi;
  545. }
  546. /* Create subnet management agent */
  547. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  548. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  549. ibdev, strerror ( rc ) );
  550. goto err_create_sma;
  551. }
  552. /* Create general services interface */
  553. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  554. if ( ! ibdev->gsi ) {
  555. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  556. rc = -ENOMEM;
  557. goto err_create_gsi;
  558. }
  559. /* Open device */
  560. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  561. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  562. ibdev, strerror ( rc ) );
  563. goto err_open;
  564. }
  565. /* Add to head of open devices list */
  566. list_add ( &ibdev->open_list, &open_ib_devices );
  567. /* Notify drivers of device state change */
  568. ib_notify ( ibdev );
  569. assert ( ibdev->open_count == 1 );
  570. return 0;
  571. ibdev->op->close ( ibdev );
  572. err_open:
  573. ib_destroy_mi ( ibdev, ibdev->gsi );
  574. err_create_gsi:
  575. ib_destroy_sma ( ibdev, ibdev->smi );
  576. err_create_sma:
  577. ib_destroy_mi ( ibdev, ibdev->smi );
  578. err_create_smi:
  579. assert ( ibdev->open_count == 1 );
  580. ibdev->open_count = 0;
  581. return rc;
  582. }
  583. /**
  584. * Close port
  585. *
  586. * @v ibdev Infiniband device
  587. */
  588. void ib_close ( struct ib_device *ibdev ) {
  589. /* Decrement device open request counter */
  590. ibdev->open_count--;
  591. /* Close device if this was the last remaining requested opening */
  592. if ( ibdev->open_count == 0 ) {
  593. ib_notify ( ibdev );
  594. list_del ( &ibdev->open_list );
  595. ib_destroy_mi ( ibdev, ibdev->gsi );
  596. ib_destroy_sma ( ibdev, ibdev->smi );
  597. ib_destroy_mi ( ibdev, ibdev->smi );
  598. ibdev->op->close ( ibdev );
  599. }
  600. }
  601. /***************************************************************************
  602. *
  603. * Multicast
  604. *
  605. ***************************************************************************
  606. */
  607. /**
  608. * Attach to multicast group
  609. *
  610. * @v ibdev Infiniband device
  611. * @v qp Queue pair
  612. * @v gid Multicast GID
  613. * @ret rc Return status code
  614. *
  615. * Note that this function handles only the local device's attachment
  616. * to the multicast GID; it does not issue the relevant MADs to join
  617. * the multicast group on the subnet.
  618. */
  619. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  620. union ib_gid *gid ) {
  621. struct ib_multicast_gid *mgid;
  622. int rc;
  623. /* Add to software multicast GID list */
  624. mgid = zalloc ( sizeof ( *mgid ) );
  625. if ( ! mgid ) {
  626. rc = -ENOMEM;
  627. goto err_alloc_mgid;
  628. }
  629. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  630. list_add ( &mgid->list, &qp->mgids );
  631. /* Add to hardware multicast GID list */
  632. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  633. goto err_dev_mcast_attach;
  634. return 0;
  635. err_dev_mcast_attach:
  636. list_del ( &mgid->list );
  637. free ( mgid );
  638. err_alloc_mgid:
  639. return rc;
  640. }
  641. /**
  642. * Detach from multicast group
  643. *
  644. * @v ibdev Infiniband device
  645. * @v qp Queue pair
  646. * @v gid Multicast GID
  647. */
  648. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  649. union ib_gid *gid ) {
  650. struct ib_multicast_gid *mgid;
  651. /* Remove from hardware multicast GID list */
  652. ibdev->op->mcast_detach ( ibdev, qp, gid );
  653. /* Remove from software multicast GID list */
  654. list_for_each_entry ( mgid, &qp->mgids, list ) {
  655. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  656. list_del ( &mgid->list );
  657. free ( mgid );
  658. break;
  659. }
  660. }
  661. }
  662. /***************************************************************************
  663. *
  664. * Miscellaneous
  665. *
  666. ***************************************************************************
  667. */
  668. /**
  669. * Count Infiniband HCA ports
  670. *
  671. * @v ibdev Infiniband device
  672. * @ret num_ports Number of ports
  673. */
  674. int ib_count_ports ( struct ib_device *ibdev ) {
  675. struct ib_device *tmp;
  676. int num_ports = 0;
  677. /* Search for IB devices with the same physical device to
  678. * identify port count.
  679. */
  680. for_each_ibdev ( tmp ) {
  681. if ( tmp->dev == ibdev->dev )
  682. num_ports++;
  683. }
  684. return num_ports;
  685. }
  686. /**
  687. * Set port information
  688. *
  689. * @v ibdev Infiniband device
  690. * @v mad Set port information MAD
  691. */
  692. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  693. int rc;
  694. /* Adapters with embedded SMAs do not need to support this method */
  695. if ( ! ibdev->op->set_port_info ) {
  696. DBGC ( ibdev, "IBDEV %p does not support setting port "
  697. "information\n", ibdev );
  698. return -ENOTSUP;
  699. }
  700. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  701. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  702. ibdev, strerror ( rc ) );
  703. return rc;
  704. }
  705. return 0;
  706. };
  707. /**
  708. * Set partition key table
  709. *
  710. * @v ibdev Infiniband device
  711. * @v mad Set partition key table MAD
  712. */
  713. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  714. int rc;
  715. /* Adapters with embedded SMAs do not need to support this method */
  716. if ( ! ibdev->op->set_pkey_table ) {
  717. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  718. "key table\n", ibdev );
  719. return -ENOTSUP;
  720. }
  721. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  722. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  723. "%s\n", ibdev, strerror ( rc ) );
  724. return rc;
  725. }
  726. return 0;
  727. };
  728. /***************************************************************************
  729. *
  730. * Event queues
  731. *
  732. ***************************************************************************
  733. */
  734. /**
  735. * Poll event queue
  736. *
  737. * @v ibdev Infiniband device
  738. */
  739. void ib_poll_eq ( struct ib_device *ibdev ) {
  740. struct ib_completion_queue *cq;
  741. /* Poll device's event queue */
  742. ibdev->op->poll_eq ( ibdev );
  743. /* Poll all completion queues */
  744. list_for_each_entry ( cq, &ibdev->cqs, list )
  745. ib_poll_cq ( ibdev, cq );
  746. }
  747. /**
  748. * Single-step the Infiniband event queue
  749. *
  750. * @v process Infiniband event queue process
  751. */
  752. static void ib_step ( struct process *process __unused ) {
  753. struct ib_device *ibdev;
  754. for_each_ibdev ( ibdev )
  755. ib_poll_eq ( ibdev );
  756. }
  757. /** Infiniband event queue process */
  758. struct process ib_process __permanent_process = {
  759. .list = LIST_HEAD_INIT ( ib_process.list ),
  760. .step = ib_step,
  761. };
  762. /***************************************************************************
  763. *
  764. * Infiniband device creation/destruction
  765. *
  766. ***************************************************************************
  767. */
  768. /**
  769. * Allocate Infiniband device
  770. *
  771. * @v priv_size Size of driver private data area
  772. * @ret ibdev Infiniband device, or NULL
  773. */
  774. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  775. struct ib_device *ibdev;
  776. void *drv_priv;
  777. size_t total_len;
  778. total_len = ( sizeof ( *ibdev ) + priv_size );
  779. ibdev = zalloc ( total_len );
  780. if ( ibdev ) {
  781. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  782. ib_set_drvdata ( ibdev, drv_priv );
  783. INIT_LIST_HEAD ( &ibdev->list );
  784. INIT_LIST_HEAD ( &ibdev->open_list );
  785. INIT_LIST_HEAD ( &ibdev->cqs );
  786. INIT_LIST_HEAD ( &ibdev->qps );
  787. ibdev->port_state = IB_PORT_STATE_DOWN;
  788. ibdev->lid = IB_LID_NONE;
  789. ibdev->pkey = IB_PKEY_DEFAULT;
  790. }
  791. return ibdev;
  792. }
  793. /**
  794. * Register Infiniband device
  795. *
  796. * @v ibdev Infiniband device
  797. * @ret rc Return status code
  798. */
  799. int register_ibdev ( struct ib_device *ibdev ) {
  800. struct ib_driver *driver;
  801. int rc;
  802. /* Add to device list */
  803. ibdev_get ( ibdev );
  804. list_add_tail ( &ibdev->list, &ib_devices );
  805. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  806. ibdev->dev->name );
  807. /* Probe device */
  808. for_each_table_entry ( driver, IB_DRIVERS ) {
  809. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  810. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  811. ibdev, driver->name, strerror ( rc ) );
  812. goto err_probe;
  813. }
  814. }
  815. return 0;
  816. err_probe:
  817. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  818. driver->remove ( ibdev );
  819. list_del ( &ibdev->list );
  820. ibdev_put ( ibdev );
  821. return rc;
  822. }
  823. /**
  824. * Unregister Infiniband device
  825. *
  826. * @v ibdev Infiniband device
  827. */
  828. void unregister_ibdev ( struct ib_device *ibdev ) {
  829. struct ib_driver *driver;
  830. /* Remove device */
  831. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  832. driver->remove ( ibdev );
  833. /* Remove from device list */
  834. list_del ( &ibdev->list );
  835. ibdev_put ( ibdev );
  836. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  837. }
  838. /**
  839. * Find Infiniband device by GID
  840. *
  841. * @v gid GID
  842. * @ret ibdev Infiniband device, or NULL
  843. */
  844. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  845. struct ib_device *ibdev;
  846. for_each_ibdev ( ibdev ) {
  847. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  848. return ibdev;
  849. }
  850. return NULL;
  851. }
  852. /**
  853. * Get most recently opened Infiniband device
  854. *
  855. * @ret ibdev Most recently opened Infiniband device, or NULL
  856. */
  857. struct ib_device * last_opened_ibdev ( void ) {
  858. struct ib_device *ibdev;
  859. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  860. open_list );
  861. if ( ! ibdev )
  862. return NULL;
  863. assert ( ibdev->open_count != 0 );
  864. return ibdev;
  865. }
  866. /* Drag in IPoIB */
  867. REQUIRE_OBJECT ( ipoib );