Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

infiniband.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <ipxe/list.h>
  28. #include <ipxe/errortab.h>
  29. #include <ipxe/if_arp.h>
  30. #include <ipxe/netdevice.h>
  31. #include <ipxe/iobuf.h>
  32. #include <ipxe/process.h>
  33. #include <ipxe/infiniband.h>
  34. #include <ipxe/ib_mi.h>
  35. #include <ipxe/ib_sma.h>
  36. /** @file
  37. *
  38. * Infiniband protocol
  39. *
  40. */
  41. /** List of Infiniband devices */
  42. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  43. /** List of open Infiniband devices, in reverse order of opening */
  44. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  45. /* Disambiguate the various possible EINPROGRESSes */
  46. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  47. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  48. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  49. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  50. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  51. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  52. /** Human-readable message for the link statuses */
  53. struct errortab infiniband_errors[] __errortab = {
  54. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  55. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  56. };
  57. /***************************************************************************
  58. *
  59. * Completion queues
  60. *
  61. ***************************************************************************
  62. */
  63. /**
  64. * Create completion queue
  65. *
  66. * @v ibdev Infiniband device
  67. * @v num_cqes Number of completion queue entries
  68. * @v op Completion queue operations
  69. * @ret cq New completion queue
  70. */
  71. struct ib_completion_queue *
  72. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  73. struct ib_completion_queue_operations *op ) {
  74. struct ib_completion_queue *cq;
  75. int rc;
  76. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  77. /* Allocate and initialise data structure */
  78. cq = zalloc ( sizeof ( *cq ) );
  79. if ( ! cq )
  80. goto err_alloc_cq;
  81. cq->ibdev = ibdev;
  82. list_add ( &cq->list, &ibdev->cqs );
  83. cq->num_cqes = num_cqes;
  84. INIT_LIST_HEAD ( &cq->work_queues );
  85. cq->op = op;
  86. /* Perform device-specific initialisation and get CQN */
  87. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  88. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  89. "queue: %s\n", ibdev, strerror ( rc ) );
  90. goto err_dev_create_cq;
  91. }
  92. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  93. "with CQN %#lx\n", ibdev, num_cqes, cq,
  94. ib_cq_get_drvdata ( cq ), cq->cqn );
  95. return cq;
  96. ibdev->op->destroy_cq ( ibdev, cq );
  97. err_dev_create_cq:
  98. list_del ( &cq->list );
  99. free ( cq );
  100. err_alloc_cq:
  101. return NULL;
  102. }
  103. /**
  104. * Destroy completion queue
  105. *
  106. * @v ibdev Infiniband device
  107. * @v cq Completion queue
  108. */
  109. void ib_destroy_cq ( struct ib_device *ibdev,
  110. struct ib_completion_queue *cq ) {
  111. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  112. ibdev, cq->cqn );
  113. assert ( list_empty ( &cq->work_queues ) );
  114. ibdev->op->destroy_cq ( ibdev, cq );
  115. list_del ( &cq->list );
  116. free ( cq );
  117. }
  118. /**
  119. * Poll completion queue
  120. *
  121. * @v ibdev Infiniband device
  122. * @v cq Completion queue
  123. */
  124. void ib_poll_cq ( struct ib_device *ibdev,
  125. struct ib_completion_queue *cq ) {
  126. struct ib_work_queue *wq;
  127. /* Poll completion queue */
  128. ibdev->op->poll_cq ( ibdev, cq );
  129. /* Refill receive work queues */
  130. list_for_each_entry ( wq, &cq->work_queues, list ) {
  131. if ( ! wq->is_send )
  132. ib_refill_recv ( ibdev, wq->qp );
  133. }
  134. }
  135. /***************************************************************************
  136. *
  137. * Work queues
  138. *
  139. ***************************************************************************
  140. */
  141. /**
  142. * Create queue pair
  143. *
  144. * @v ibdev Infiniband device
  145. * @v type Queue pair type
  146. * @v num_send_wqes Number of send work queue entries
  147. * @v send_cq Send completion queue
  148. * @v num_recv_wqes Number of receive work queue entries
  149. * @v recv_cq Receive completion queue
  150. * @ret qp Queue pair
  151. *
  152. * The queue pair will be left in the INIT state; you must call
  153. * ib_modify_qp() before it is ready to use for sending and receiving.
  154. */
  155. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  156. enum ib_queue_pair_type type,
  157. unsigned int num_send_wqes,
  158. struct ib_completion_queue *send_cq,
  159. unsigned int num_recv_wqes,
  160. struct ib_completion_queue *recv_cq ) {
  161. struct ib_queue_pair *qp;
  162. size_t total_size;
  163. int rc;
  164. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  165. /* Allocate and initialise data structure */
  166. total_size = ( sizeof ( *qp ) +
  167. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  168. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  169. qp = zalloc ( total_size );
  170. if ( ! qp )
  171. goto err_alloc_qp;
  172. qp->ibdev = ibdev;
  173. list_add ( &qp->list, &ibdev->qps );
  174. qp->type = type;
  175. qp->send.qp = qp;
  176. qp->send.is_send = 1;
  177. qp->send.cq = send_cq;
  178. list_add ( &qp->send.list, &send_cq->work_queues );
  179. qp->send.psn = ( random() & 0xffffffUL );
  180. qp->send.num_wqes = num_send_wqes;
  181. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  182. qp->recv.qp = qp;
  183. qp->recv.cq = recv_cq;
  184. list_add ( &qp->recv.list, &recv_cq->work_queues );
  185. qp->recv.psn = ( random() & 0xffffffUL );
  186. qp->recv.num_wqes = num_recv_wqes;
  187. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  188. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  189. INIT_LIST_HEAD ( &qp->mgids );
  190. /* Perform device-specific initialisation and get QPN */
  191. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  192. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  193. "%s\n", ibdev, strerror ( rc ) );
  194. goto err_dev_create_qp;
  195. }
  196. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  197. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  198. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  199. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  200. qp->recv.iobufs );
  201. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  202. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  203. ( ( ( void * ) qp ) + total_size ) );
  204. /* Calculate externally-visible QPN */
  205. switch ( type ) {
  206. case IB_QPT_SMI:
  207. qp->ext_qpn = IB_QPN_SMI;
  208. break;
  209. case IB_QPT_GSI:
  210. qp->ext_qpn = IB_QPN_GSI;
  211. break;
  212. default:
  213. qp->ext_qpn = qp->qpn;
  214. break;
  215. }
  216. if ( qp->ext_qpn != qp->qpn ) {
  217. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  218. ibdev, qp->qpn, qp->ext_qpn );
  219. }
  220. return qp;
  221. ibdev->op->destroy_qp ( ibdev, qp );
  222. err_dev_create_qp:
  223. list_del ( &qp->send.list );
  224. list_del ( &qp->recv.list );
  225. list_del ( &qp->list );
  226. free ( qp );
  227. err_alloc_qp:
  228. return NULL;
  229. }
  230. /**
  231. * Modify queue pair
  232. *
  233. * @v ibdev Infiniband device
  234. * @v qp Queue pair
  235. * @v av New address vector, if applicable
  236. * @ret rc Return status code
  237. */
  238. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  239. int rc;
  240. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  241. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  242. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  243. ibdev, qp->qpn, strerror ( rc ) );
  244. return rc;
  245. }
  246. return 0;
  247. }
  248. /**
  249. * Destroy queue pair
  250. *
  251. * @v ibdev Infiniband device
  252. * @v qp Queue pair
  253. */
  254. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  255. struct io_buffer *iobuf;
  256. unsigned int i;
  257. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  258. ibdev, qp->qpn );
  259. assert ( list_empty ( &qp->mgids ) );
  260. /* Perform device-specific destruction */
  261. ibdev->op->destroy_qp ( ibdev, qp );
  262. /* Complete any remaining I/O buffers with errors */
  263. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  264. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  265. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  266. }
  267. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  268. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  269. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  270. -ECANCELED );
  271. }
  272. }
  273. /* Remove work queues from completion queue */
  274. list_del ( &qp->send.list );
  275. list_del ( &qp->recv.list );
  276. /* Free QP */
  277. list_del ( &qp->list );
  278. free ( qp );
  279. }
  280. /**
  281. * Find queue pair by QPN
  282. *
  283. * @v ibdev Infiniband device
  284. * @v qpn Queue pair number
  285. * @ret qp Queue pair, or NULL
  286. */
  287. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  288. unsigned long qpn ) {
  289. struct ib_queue_pair *qp;
  290. list_for_each_entry ( qp, &ibdev->qps, list ) {
  291. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  292. return qp;
  293. }
  294. return NULL;
  295. }
  296. /**
  297. * Find queue pair by multicast GID
  298. *
  299. * @v ibdev Infiniband device
  300. * @v gid Multicast GID
  301. * @ret qp Queue pair, or NULL
  302. */
  303. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  304. union ib_gid *gid ) {
  305. struct ib_queue_pair *qp;
  306. struct ib_multicast_gid *mgid;
  307. list_for_each_entry ( qp, &ibdev->qps, list ) {
  308. list_for_each_entry ( mgid, &qp->mgids, list ) {
  309. if ( memcmp ( &mgid->gid, gid,
  310. sizeof ( mgid->gid ) ) == 0 ) {
  311. return qp;
  312. }
  313. }
  314. }
  315. return NULL;
  316. }
  317. /**
  318. * Find work queue belonging to completion queue
  319. *
  320. * @v cq Completion queue
  321. * @v qpn Queue pair number
  322. * @v is_send Find send work queue (rather than receive)
  323. * @ret wq Work queue, or NULL if not found
  324. */
  325. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  326. unsigned long qpn, int is_send ) {
  327. struct ib_work_queue *wq;
  328. list_for_each_entry ( wq, &cq->work_queues, list ) {
  329. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  330. return wq;
  331. }
  332. return NULL;
  333. }
  334. /**
  335. * Post send work queue entry
  336. *
  337. * @v ibdev Infiniband device
  338. * @v qp Queue pair
  339. * @v av Address vector
  340. * @v iobuf I/O buffer
  341. * @ret rc Return status code
  342. */
  343. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  344. struct ib_address_vector *av,
  345. struct io_buffer *iobuf ) {
  346. struct ib_address_vector av_copy;
  347. int rc;
  348. /* Check queue fill level */
  349. if ( qp->send.fill >= qp->send.num_wqes ) {
  350. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  351. ibdev, qp->qpn );
  352. return -ENOBUFS;
  353. }
  354. /* Use default address vector if none specified */
  355. if ( ! av )
  356. av = &qp->av;
  357. /* Make modifiable copy of address vector */
  358. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  359. av = &av_copy;
  360. /* Fill in optional parameters in address vector */
  361. if ( ! av->qkey )
  362. av->qkey = qp->qkey;
  363. if ( ! av->rate )
  364. av->rate = IB_RATE_2_5;
  365. /* Post to hardware */
  366. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  367. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  368. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  369. return rc;
  370. }
  371. qp->send.fill++;
  372. return 0;
  373. }
  374. /**
  375. * Post receive work queue entry
  376. *
  377. * @v ibdev Infiniband device
  378. * @v qp Queue pair
  379. * @v iobuf I/O buffer
  380. * @ret rc Return status code
  381. */
  382. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  383. struct io_buffer *iobuf ) {
  384. int rc;
  385. /* Check packet length */
  386. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  387. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  388. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  389. return -EINVAL;
  390. }
  391. /* Check queue fill level */
  392. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  393. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  394. ibdev, qp->qpn );
  395. return -ENOBUFS;
  396. }
  397. /* Post to hardware */
  398. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  399. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  400. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  401. return rc;
  402. }
  403. qp->recv.fill++;
  404. return 0;
  405. }
  406. /**
  407. * Complete send work queue entry
  408. *
  409. * @v ibdev Infiniband device
  410. * @v qp Queue pair
  411. * @v iobuf I/O buffer
  412. * @v rc Completion status code
  413. */
  414. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  415. struct io_buffer *iobuf, int rc ) {
  416. if ( qp->send.cq->op->complete_send ) {
  417. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  418. } else {
  419. free_iob ( iobuf );
  420. }
  421. qp->send.fill--;
  422. }
  423. /**
  424. * Complete receive work queue entry
  425. *
  426. * @v ibdev Infiniband device
  427. * @v qp Queue pair
  428. * @v av Address vector, or NULL
  429. * @v iobuf I/O buffer
  430. * @v rc Completion status code
  431. */
  432. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  433. struct ib_address_vector *av,
  434. struct io_buffer *iobuf, int rc ) {
  435. if ( qp->recv.cq->op->complete_recv ) {
  436. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  437. } else {
  438. free_iob ( iobuf );
  439. }
  440. qp->recv.fill--;
  441. }
  442. /**
  443. * Refill receive work queue
  444. *
  445. * @v ibdev Infiniband device
  446. * @v qp Queue pair
  447. */
  448. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  449. struct io_buffer *iobuf;
  450. int rc;
  451. /* Keep filling while unfilled entries remain */
  452. while ( qp->recv.fill < qp->recv.num_wqes ) {
  453. /* Allocate I/O buffer */
  454. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  455. if ( ! iobuf ) {
  456. /* Non-fatal; we will refill on next attempt */
  457. return;
  458. }
  459. /* Post I/O buffer */
  460. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  461. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  462. ibdev, strerror ( rc ) );
  463. free_iob ( iobuf );
  464. /* Give up */
  465. return;
  466. }
  467. }
  468. }
  469. /***************************************************************************
  470. *
  471. * Link control
  472. *
  473. ***************************************************************************
  474. */
  475. /**
  476. * Get link state
  477. *
  478. * @v ibdev Infiniband device
  479. * @ret rc Link status code
  480. */
  481. int ib_link_rc ( struct ib_device *ibdev ) {
  482. switch ( ibdev->port_state ) {
  483. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  484. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  485. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  486. case IB_PORT_STATE_ACTIVE: return 0;
  487. default: return -EINVAL;
  488. }
  489. }
  490. /**
  491. * Textual representation of Infiniband link state
  492. *
  493. * @v ibdev Infiniband device
  494. * @ret link_text Link state text
  495. */
  496. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  497. switch ( ibdev->port_state ) {
  498. case IB_PORT_STATE_DOWN: return "DOWN";
  499. case IB_PORT_STATE_INIT: return "INIT";
  500. case IB_PORT_STATE_ARMED: return "ARMED";
  501. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  502. default: return "UNKNOWN";
  503. }
  504. }
  505. /**
  506. * Notify drivers of Infiniband device or link state change
  507. *
  508. * @v ibdev Infiniband device
  509. */
  510. static void ib_notify ( struct ib_device *ibdev ) {
  511. struct ib_driver *driver;
  512. for_each_table_entry ( driver, IB_DRIVERS )
  513. driver->notify ( ibdev );
  514. }
  515. /**
  516. * Notify of Infiniband link state change
  517. *
  518. * @v ibdev Infiniband device
  519. */
  520. void ib_link_state_changed ( struct ib_device *ibdev ) {
  521. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  522. ibdev, ib_link_state_text ( ibdev ) );
  523. /* Notify drivers of link state change */
  524. ib_notify ( ibdev );
  525. }
  526. /**
  527. * Open port
  528. *
  529. * @v ibdev Infiniband device
  530. * @ret rc Return status code
  531. */
  532. int ib_open ( struct ib_device *ibdev ) {
  533. int rc;
  534. /* Increment device open request counter */
  535. if ( ibdev->open_count++ > 0 ) {
  536. /* Device was already open; do nothing */
  537. return 0;
  538. }
  539. /* Open device */
  540. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  541. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  542. ibdev, strerror ( rc ) );
  543. goto err_open;
  544. }
  545. /* Create subnet management interface */
  546. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  547. if ( ! ibdev->smi ) {
  548. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  549. rc = -ENOMEM;
  550. goto err_create_smi;
  551. }
  552. /* Create subnet management agent */
  553. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  554. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  555. ibdev, strerror ( rc ) );
  556. goto err_create_sma;
  557. }
  558. /* Create general services interface */
  559. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  560. if ( ! ibdev->gsi ) {
  561. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  562. rc = -ENOMEM;
  563. goto err_create_gsi;
  564. }
  565. /* Add to head of open devices list */
  566. list_add ( &ibdev->open_list, &open_ib_devices );
  567. /* Notify drivers of device state change */
  568. ib_notify ( ibdev );
  569. assert ( ibdev->open_count == 1 );
  570. return 0;
  571. ib_destroy_mi ( ibdev, ibdev->gsi );
  572. err_create_gsi:
  573. ib_destroy_sma ( ibdev, ibdev->smi );
  574. err_create_sma:
  575. ib_destroy_mi ( ibdev, ibdev->smi );
  576. err_create_smi:
  577. ibdev->op->close ( ibdev );
  578. err_open:
  579. assert ( ibdev->open_count == 1 );
  580. ibdev->open_count = 0;
  581. return rc;
  582. }
  583. /**
  584. * Close port
  585. *
  586. * @v ibdev Infiniband device
  587. */
  588. void ib_close ( struct ib_device *ibdev ) {
  589. /* Decrement device open request counter */
  590. ibdev->open_count--;
  591. /* Close device if this was the last remaining requested opening */
  592. if ( ibdev->open_count == 0 ) {
  593. ib_notify ( ibdev );
  594. list_del ( &ibdev->open_list );
  595. ib_destroy_mi ( ibdev, ibdev->gsi );
  596. ib_destroy_sma ( ibdev, ibdev->smi );
  597. ib_destroy_mi ( ibdev, ibdev->smi );
  598. ibdev->op->close ( ibdev );
  599. ibdev->port_state = IB_PORT_STATE_DOWN;
  600. }
  601. }
  602. /***************************************************************************
  603. *
  604. * Multicast
  605. *
  606. ***************************************************************************
  607. */
  608. /**
  609. * Attach to multicast group
  610. *
  611. * @v ibdev Infiniband device
  612. * @v qp Queue pair
  613. * @v gid Multicast GID
  614. * @ret rc Return status code
  615. *
  616. * Note that this function handles only the local device's attachment
  617. * to the multicast GID; it does not issue the relevant MADs to join
  618. * the multicast group on the subnet.
  619. */
  620. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  621. union ib_gid *gid ) {
  622. struct ib_multicast_gid *mgid;
  623. int rc;
  624. /* Add to software multicast GID list */
  625. mgid = zalloc ( sizeof ( *mgid ) );
  626. if ( ! mgid ) {
  627. rc = -ENOMEM;
  628. goto err_alloc_mgid;
  629. }
  630. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  631. list_add ( &mgid->list, &qp->mgids );
  632. /* Add to hardware multicast GID list */
  633. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  634. goto err_dev_mcast_attach;
  635. return 0;
  636. err_dev_mcast_attach:
  637. list_del ( &mgid->list );
  638. free ( mgid );
  639. err_alloc_mgid:
  640. return rc;
  641. }
  642. /**
  643. * Detach from multicast group
  644. *
  645. * @v ibdev Infiniband device
  646. * @v qp Queue pair
  647. * @v gid Multicast GID
  648. */
  649. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  650. union ib_gid *gid ) {
  651. struct ib_multicast_gid *mgid;
  652. /* Remove from hardware multicast GID list */
  653. ibdev->op->mcast_detach ( ibdev, qp, gid );
  654. /* Remove from software multicast GID list */
  655. list_for_each_entry ( mgid, &qp->mgids, list ) {
  656. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  657. list_del ( &mgid->list );
  658. free ( mgid );
  659. break;
  660. }
  661. }
  662. }
  663. /***************************************************************************
  664. *
  665. * Miscellaneous
  666. *
  667. ***************************************************************************
  668. */
  669. /**
  670. * Count Infiniband HCA ports
  671. *
  672. * @v ibdev Infiniband device
  673. * @ret num_ports Number of ports
  674. */
  675. int ib_count_ports ( struct ib_device *ibdev ) {
  676. struct ib_device *tmp;
  677. int num_ports = 0;
  678. /* Search for IB devices with the same physical device to
  679. * identify port count.
  680. */
  681. for_each_ibdev ( tmp ) {
  682. if ( tmp->dev == ibdev->dev )
  683. num_ports++;
  684. }
  685. return num_ports;
  686. }
  687. /**
  688. * Set port information
  689. *
  690. * @v ibdev Infiniband device
  691. * @v mad Set port information MAD
  692. */
  693. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  694. int rc;
  695. /* Adapters with embedded SMAs do not need to support this method */
  696. if ( ! ibdev->op->set_port_info ) {
  697. DBGC ( ibdev, "IBDEV %p does not support setting port "
  698. "information\n", ibdev );
  699. return -ENOTSUP;
  700. }
  701. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  702. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  703. ibdev, strerror ( rc ) );
  704. return rc;
  705. }
  706. return 0;
  707. };
  708. /**
  709. * Set partition key table
  710. *
  711. * @v ibdev Infiniband device
  712. * @v mad Set partition key table MAD
  713. */
  714. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  715. int rc;
  716. /* Adapters with embedded SMAs do not need to support this method */
  717. if ( ! ibdev->op->set_pkey_table ) {
  718. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  719. "key table\n", ibdev );
  720. return -ENOTSUP;
  721. }
  722. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  723. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  724. "%s\n", ibdev, strerror ( rc ) );
  725. return rc;
  726. }
  727. return 0;
  728. };
  729. /***************************************************************************
  730. *
  731. * Event queues
  732. *
  733. ***************************************************************************
  734. */
  735. /**
  736. * Poll event queue
  737. *
  738. * @v ibdev Infiniband device
  739. */
  740. void ib_poll_eq ( struct ib_device *ibdev ) {
  741. struct ib_completion_queue *cq;
  742. /* Poll device's event queue */
  743. ibdev->op->poll_eq ( ibdev );
  744. /* Poll all completion queues */
  745. list_for_each_entry ( cq, &ibdev->cqs, list )
  746. ib_poll_cq ( ibdev, cq );
  747. }
  748. /**
  749. * Single-step the Infiniband event queue
  750. *
  751. * @v process Infiniband event queue process
  752. */
  753. static void ib_step ( struct process *process __unused ) {
  754. struct ib_device *ibdev;
  755. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  756. ib_poll_eq ( ibdev );
  757. }
  758. /** Infiniband event queue process */
  759. PERMANENT_PROCESS ( ib_process, ib_step );
  760. /***************************************************************************
  761. *
  762. * Infiniband device creation/destruction
  763. *
  764. ***************************************************************************
  765. */
  766. /**
  767. * Allocate Infiniband device
  768. *
  769. * @v priv_size Size of driver private data area
  770. * @ret ibdev Infiniband device, or NULL
  771. */
  772. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  773. struct ib_device *ibdev;
  774. void *drv_priv;
  775. size_t total_len;
  776. total_len = ( sizeof ( *ibdev ) + priv_size );
  777. ibdev = zalloc ( total_len );
  778. if ( ibdev ) {
  779. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  780. ib_set_drvdata ( ibdev, drv_priv );
  781. INIT_LIST_HEAD ( &ibdev->list );
  782. INIT_LIST_HEAD ( &ibdev->open_list );
  783. INIT_LIST_HEAD ( &ibdev->cqs );
  784. INIT_LIST_HEAD ( &ibdev->qps );
  785. ibdev->port_state = IB_PORT_STATE_DOWN;
  786. ibdev->lid = IB_LID_NONE;
  787. ibdev->pkey = IB_PKEY_DEFAULT;
  788. }
  789. return ibdev;
  790. }
  791. /**
  792. * Register Infiniband device
  793. *
  794. * @v ibdev Infiniband device
  795. * @ret rc Return status code
  796. */
  797. int register_ibdev ( struct ib_device *ibdev ) {
  798. struct ib_driver *driver;
  799. int rc;
  800. /* Add to device list */
  801. ibdev_get ( ibdev );
  802. list_add_tail ( &ibdev->list, &ib_devices );
  803. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  804. ibdev->dev->name );
  805. /* Probe device */
  806. for_each_table_entry ( driver, IB_DRIVERS ) {
  807. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  808. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  809. ibdev, driver->name, strerror ( rc ) );
  810. goto err_probe;
  811. }
  812. }
  813. return 0;
  814. err_probe:
  815. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  816. driver->remove ( ibdev );
  817. list_del ( &ibdev->list );
  818. ibdev_put ( ibdev );
  819. return rc;
  820. }
  821. /**
  822. * Unregister Infiniband device
  823. *
  824. * @v ibdev Infiniband device
  825. */
  826. void unregister_ibdev ( struct ib_device *ibdev ) {
  827. struct ib_driver *driver;
  828. /* Remove device */
  829. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  830. driver->remove ( ibdev );
  831. /* Remove from device list */
  832. list_del ( &ibdev->list );
  833. ibdev_put ( ibdev );
  834. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  835. }
  836. /**
  837. * Find Infiniband device by GID
  838. *
  839. * @v gid GID
  840. * @ret ibdev Infiniband device, or NULL
  841. */
  842. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  843. struct ib_device *ibdev;
  844. for_each_ibdev ( ibdev ) {
  845. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  846. return ibdev;
  847. }
  848. return NULL;
  849. }
  850. /**
  851. * Get most recently opened Infiniband device
  852. *
  853. * @ret ibdev Most recently opened Infiniband device, or NULL
  854. */
  855. struct ib_device * last_opened_ibdev ( void ) {
  856. struct ib_device *ibdev;
  857. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  858. open_list );
  859. if ( ! ibdev )
  860. return NULL;
  861. assert ( ibdev->open_count != 0 );
  862. return ibdev;
  863. }
  864. /* Drag in IPoIB */
  865. REQUIRE_OBJECT ( ipoib );