您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

infiniband.c 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <gpxe/list.h>
  28. #include <gpxe/if_arp.h>
  29. #include <gpxe/netdevice.h>
  30. #include <gpxe/iobuf.h>
  31. #include <gpxe/ipoib.h>
  32. #include <gpxe/process.h>
  33. #include <gpxe/infiniband.h>
  34. #include <gpxe/ib_mi.h>
  35. #include <gpxe/ib_sma.h>
  36. /** @file
  37. *
  38. * Infiniband protocol
  39. *
  40. */
  41. /** List of Infiniband devices */
  42. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  43. /** List of open Infiniband devices, in reverse order of opening */
  44. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  45. /***************************************************************************
  46. *
  47. * Completion queues
  48. *
  49. ***************************************************************************
  50. */
  51. /**
  52. * Create completion queue
  53. *
  54. * @v ibdev Infiniband device
  55. * @v num_cqes Number of completion queue entries
  56. * @v op Completion queue operations
  57. * @ret cq New completion queue
  58. */
  59. struct ib_completion_queue *
  60. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  61. struct ib_completion_queue_operations *op ) {
  62. struct ib_completion_queue *cq;
  63. int rc;
  64. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  65. /* Allocate and initialise data structure */
  66. cq = zalloc ( sizeof ( *cq ) );
  67. if ( ! cq )
  68. goto err_alloc_cq;
  69. cq->ibdev = ibdev;
  70. list_add ( &cq->list, &ibdev->cqs );
  71. cq->num_cqes = num_cqes;
  72. INIT_LIST_HEAD ( &cq->work_queues );
  73. cq->op = op;
  74. /* Perform device-specific initialisation and get CQN */
  75. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  76. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  77. "queue: %s\n", ibdev, strerror ( rc ) );
  78. goto err_dev_create_cq;
  79. }
  80. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  81. "with CQN %#lx\n", ibdev, num_cqes, cq,
  82. ib_cq_get_drvdata ( cq ), cq->cqn );
  83. return cq;
  84. ibdev->op->destroy_cq ( ibdev, cq );
  85. err_dev_create_cq:
  86. list_del ( &cq->list );
  87. free ( cq );
  88. err_alloc_cq:
  89. return NULL;
  90. }
  91. /**
  92. * Destroy completion queue
  93. *
  94. * @v ibdev Infiniband device
  95. * @v cq Completion queue
  96. */
  97. void ib_destroy_cq ( struct ib_device *ibdev,
  98. struct ib_completion_queue *cq ) {
  99. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  100. ibdev, cq->cqn );
  101. assert ( list_empty ( &cq->work_queues ) );
  102. ibdev->op->destroy_cq ( ibdev, cq );
  103. list_del ( &cq->list );
  104. free ( cq );
  105. }
  106. /**
  107. * Poll completion queue
  108. *
  109. * @v ibdev Infiniband device
  110. * @v cq Completion queue
  111. */
  112. void ib_poll_cq ( struct ib_device *ibdev,
  113. struct ib_completion_queue *cq ) {
  114. struct ib_work_queue *wq;
  115. /* Poll completion queue */
  116. ibdev->op->poll_cq ( ibdev, cq );
  117. /* Refill receive work queues */
  118. list_for_each_entry ( wq, &cq->work_queues, list ) {
  119. if ( ! wq->is_send )
  120. ib_refill_recv ( ibdev, wq->qp );
  121. }
  122. }
  123. /***************************************************************************
  124. *
  125. * Work queues
  126. *
  127. ***************************************************************************
  128. */
  129. /**
  130. * Create queue pair
  131. *
  132. * @v ibdev Infiniband device
  133. * @v type Queue pair type
  134. * @v num_send_wqes Number of send work queue entries
  135. * @v send_cq Send completion queue
  136. * @v num_recv_wqes Number of receive work queue entries
  137. * @v recv_cq Receive completion queue
  138. * @ret qp Queue pair
  139. *
  140. * The queue pair will be left in the INIT state; you must call
  141. * ib_modify_qp() before it is ready to use for sending and receiving.
  142. */
  143. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  144. enum ib_queue_pair_type type,
  145. unsigned int num_send_wqes,
  146. struct ib_completion_queue *send_cq,
  147. unsigned int num_recv_wqes,
  148. struct ib_completion_queue *recv_cq ) {
  149. struct ib_queue_pair *qp;
  150. size_t total_size;
  151. int rc;
  152. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  153. /* Allocate and initialise data structure */
  154. total_size = ( sizeof ( *qp ) +
  155. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  156. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  157. qp = zalloc ( total_size );
  158. if ( ! qp )
  159. goto err_alloc_qp;
  160. qp->ibdev = ibdev;
  161. list_add ( &qp->list, &ibdev->qps );
  162. qp->type = type;
  163. qp->send.qp = qp;
  164. qp->send.is_send = 1;
  165. qp->send.cq = send_cq;
  166. list_add ( &qp->send.list, &send_cq->work_queues );
  167. qp->send.psn = ( random() & 0xffffffUL );
  168. qp->send.num_wqes = num_send_wqes;
  169. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  170. qp->recv.qp = qp;
  171. qp->recv.cq = recv_cq;
  172. list_add ( &qp->recv.list, &recv_cq->work_queues );
  173. qp->recv.psn = ( random() & 0xffffffUL );
  174. qp->recv.num_wqes = num_recv_wqes;
  175. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  176. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  177. INIT_LIST_HEAD ( &qp->mgids );
  178. /* Perform device-specific initialisation and get QPN */
  179. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  180. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  181. "%s\n", ibdev, strerror ( rc ) );
  182. goto err_dev_create_qp;
  183. }
  184. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  185. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  186. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  187. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  188. qp->recv.iobufs );
  189. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  190. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  191. ( ( ( void * ) qp ) + total_size ) );
  192. /* Calculate externally-visible QPN */
  193. switch ( type ) {
  194. case IB_QPT_SMI:
  195. qp->ext_qpn = IB_QPN_SMI;
  196. break;
  197. case IB_QPT_GSI:
  198. qp->ext_qpn = IB_QPN_GSI;
  199. break;
  200. default:
  201. qp->ext_qpn = qp->qpn;
  202. break;
  203. }
  204. if ( qp->ext_qpn != qp->qpn ) {
  205. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  206. ibdev, qp->qpn, qp->ext_qpn );
  207. }
  208. return qp;
  209. ibdev->op->destroy_qp ( ibdev, qp );
  210. err_dev_create_qp:
  211. list_del ( &qp->send.list );
  212. list_del ( &qp->recv.list );
  213. list_del ( &qp->list );
  214. free ( qp );
  215. err_alloc_qp:
  216. return NULL;
  217. }
  218. /**
  219. * Modify queue pair
  220. *
  221. * @v ibdev Infiniband device
  222. * @v qp Queue pair
  223. * @v av New address vector, if applicable
  224. * @ret rc Return status code
  225. */
  226. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  227. int rc;
  228. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  229. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  230. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  231. ibdev, qp->qpn, strerror ( rc ) );
  232. return rc;
  233. }
  234. return 0;
  235. }
  236. /**
  237. * Destroy queue pair
  238. *
  239. * @v ibdev Infiniband device
  240. * @v qp Queue pair
  241. */
  242. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  243. struct io_buffer *iobuf;
  244. unsigned int i;
  245. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  246. ibdev, qp->qpn );
  247. assert ( list_empty ( &qp->mgids ) );
  248. /* Perform device-specific destruction */
  249. ibdev->op->destroy_qp ( ibdev, qp );
  250. /* Complete any remaining I/O buffers with errors */
  251. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  252. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  253. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  254. }
  255. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  256. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  257. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  258. -ECANCELED );
  259. }
  260. }
  261. /* Remove work queues from completion queue */
  262. list_del ( &qp->send.list );
  263. list_del ( &qp->recv.list );
  264. /* Free QP */
  265. list_del ( &qp->list );
  266. free ( qp );
  267. }
  268. /**
  269. * Find queue pair by QPN
  270. *
  271. * @v ibdev Infiniband device
  272. * @v qpn Queue pair number
  273. * @ret qp Queue pair, or NULL
  274. */
  275. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  276. unsigned long qpn ) {
  277. struct ib_queue_pair *qp;
  278. list_for_each_entry ( qp, &ibdev->qps, list ) {
  279. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  280. return qp;
  281. }
  282. return NULL;
  283. }
  284. /**
  285. * Find queue pair by multicast GID
  286. *
  287. * @v ibdev Infiniband device
  288. * @v gid Multicast GID
  289. * @ret qp Queue pair, or NULL
  290. */
  291. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  292. struct ib_gid *gid ) {
  293. struct ib_queue_pair *qp;
  294. struct ib_multicast_gid *mgid;
  295. list_for_each_entry ( qp, &ibdev->qps, list ) {
  296. list_for_each_entry ( mgid, &qp->mgids, list ) {
  297. if ( memcmp ( &mgid->gid, gid,
  298. sizeof ( mgid->gid ) ) == 0 ) {
  299. return qp;
  300. }
  301. }
  302. }
  303. return NULL;
  304. }
  305. /**
  306. * Find work queue belonging to completion queue
  307. *
  308. * @v cq Completion queue
  309. * @v qpn Queue pair number
  310. * @v is_send Find send work queue (rather than receive)
  311. * @ret wq Work queue, or NULL if not found
  312. */
  313. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  314. unsigned long qpn, int is_send ) {
  315. struct ib_work_queue *wq;
  316. list_for_each_entry ( wq, &cq->work_queues, list ) {
  317. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  318. return wq;
  319. }
  320. return NULL;
  321. }
  322. /**
  323. * Post send work queue entry
  324. *
  325. * @v ibdev Infiniband device
  326. * @v qp Queue pair
  327. * @v av Address vector
  328. * @v iobuf I/O buffer
  329. * @ret rc Return status code
  330. */
  331. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  332. struct ib_address_vector *av,
  333. struct io_buffer *iobuf ) {
  334. struct ib_address_vector av_copy;
  335. int rc;
  336. /* Check queue fill level */
  337. if ( qp->send.fill >= qp->send.num_wqes ) {
  338. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  339. ibdev, qp->qpn );
  340. return -ENOBUFS;
  341. }
  342. /* Use default address vector if none specified */
  343. if ( ! av )
  344. av = &qp->av;
  345. /* Make modifiable copy of address vector */
  346. memcpy ( &av_copy, av, sizeof ( av_copy ) );
  347. av = &av_copy;
  348. /* Fill in optional parameters in address vector */
  349. if ( ! av->qkey )
  350. av->qkey = qp->qkey;
  351. if ( ! av->rate )
  352. av->rate = IB_RATE_2_5;
  353. /* Post to hardware */
  354. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  355. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  356. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  357. return rc;
  358. }
  359. qp->send.fill++;
  360. return 0;
  361. }
  362. /**
  363. * Post receive work queue entry
  364. *
  365. * @v ibdev Infiniband device
  366. * @v qp Queue pair
  367. * @v iobuf I/O buffer
  368. * @ret rc Return status code
  369. */
  370. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  371. struct io_buffer *iobuf ) {
  372. int rc;
  373. /* Check packet length */
  374. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  375. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  376. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  377. return -EINVAL;
  378. }
  379. /* Check queue fill level */
  380. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  381. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  382. ibdev, qp->qpn );
  383. return -ENOBUFS;
  384. }
  385. /* Post to hardware */
  386. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  387. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  388. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  389. return rc;
  390. }
  391. qp->recv.fill++;
  392. return 0;
  393. }
  394. /**
  395. * Complete send work queue entry
  396. *
  397. * @v ibdev Infiniband device
  398. * @v qp Queue pair
  399. * @v iobuf I/O buffer
  400. * @v rc Completion status code
  401. */
  402. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  403. struct io_buffer *iobuf, int rc ) {
  404. if ( qp->send.cq->op->complete_send ) {
  405. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  406. } else {
  407. free_iob ( iobuf );
  408. }
  409. qp->send.fill--;
  410. }
  411. /**
  412. * Complete receive work queue entry
  413. *
  414. * @v ibdev Infiniband device
  415. * @v qp Queue pair
  416. * @v av Address vector
  417. * @v iobuf I/O buffer
  418. * @v rc Completion status code
  419. */
  420. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  421. struct ib_address_vector *av,
  422. struct io_buffer *iobuf, int rc ) {
  423. if ( qp->recv.cq->op->complete_recv ) {
  424. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  425. } else {
  426. free_iob ( iobuf );
  427. }
  428. qp->recv.fill--;
  429. }
  430. /**
  431. * Refill receive work queue
  432. *
  433. * @v ibdev Infiniband device
  434. * @v qp Queue pair
  435. */
  436. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  437. struct io_buffer *iobuf;
  438. int rc;
  439. /* Keep filling while unfilled entries remain */
  440. while ( qp->recv.fill < qp->recv.num_wqes ) {
  441. /* Allocate I/O buffer */
  442. iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  443. if ( ! iobuf ) {
  444. /* Non-fatal; we will refill on next attempt */
  445. return;
  446. }
  447. /* Post I/O buffer */
  448. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  449. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  450. ibdev, strerror ( rc ) );
  451. free_iob ( iobuf );
  452. /* Give up */
  453. return;
  454. }
  455. }
  456. }
  457. /***************************************************************************
  458. *
  459. * Link control
  460. *
  461. ***************************************************************************
  462. */
  463. /**
  464. * Open port
  465. *
  466. * @v ibdev Infiniband device
  467. * @ret rc Return status code
  468. */
  469. int ib_open ( struct ib_device *ibdev ) {
  470. int rc;
  471. /* Increment device open request counter */
  472. if ( ibdev->open_count++ > 0 ) {
  473. /* Device was already open; do nothing */
  474. return 0;
  475. }
  476. /* Create subnet management interface */
  477. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  478. if ( ! ibdev->smi ) {
  479. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  480. rc = -ENOMEM;
  481. goto err_create_smi;
  482. }
  483. /* Create subnet management agent */
  484. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  485. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  486. ibdev, strerror ( rc ) );
  487. goto err_create_sma;
  488. }
  489. /* Create general services interface */
  490. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  491. if ( ! ibdev->gsi ) {
  492. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  493. rc = -ENOMEM;
  494. goto err_create_gsi;
  495. }
  496. /* Open device */
  497. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  498. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  499. ibdev, strerror ( rc ) );
  500. goto err_open;
  501. }
  502. /* Add to head of open devices list */
  503. list_add ( &ibdev->open_list, &open_ib_devices );
  504. assert ( ibdev->open_count == 1 );
  505. return 0;
  506. ibdev->op->close ( ibdev );
  507. err_open:
  508. ib_destroy_mi ( ibdev, ibdev->gsi );
  509. err_create_gsi:
  510. ib_destroy_sma ( ibdev, ibdev->smi );
  511. err_create_sma:
  512. ib_destroy_mi ( ibdev, ibdev->smi );
  513. err_create_smi:
  514. assert ( ibdev->open_count == 1 );
  515. ibdev->open_count = 0;
  516. return rc;
  517. }
  518. /**
  519. * Close port
  520. *
  521. * @v ibdev Infiniband device
  522. */
  523. void ib_close ( struct ib_device *ibdev ) {
  524. /* Decrement device open request counter */
  525. ibdev->open_count--;
  526. /* Close device if this was the last remaining requested opening */
  527. if ( ibdev->open_count == 0 ) {
  528. list_del ( &ibdev->open_list );
  529. ib_destroy_mi ( ibdev, ibdev->gsi );
  530. ib_destroy_sma ( ibdev, ibdev->smi );
  531. ib_destroy_mi ( ibdev, ibdev->smi );
  532. ibdev->op->close ( ibdev );
  533. }
  534. }
  535. /***************************************************************************
  536. *
  537. * Multicast
  538. *
  539. ***************************************************************************
  540. */
  541. /**
  542. * Attach to multicast group
  543. *
  544. * @v ibdev Infiniband device
  545. * @v qp Queue pair
  546. * @v gid Multicast GID
  547. * @ret rc Return status code
  548. *
  549. * Note that this function handles only the local device's attachment
  550. * to the multicast GID; it does not issue the relevant MADs to join
  551. * the multicast group on the subnet.
  552. */
  553. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  554. struct ib_gid *gid ) {
  555. struct ib_multicast_gid *mgid;
  556. int rc;
  557. /* Add to software multicast GID list */
  558. mgid = zalloc ( sizeof ( *mgid ) );
  559. if ( ! mgid ) {
  560. rc = -ENOMEM;
  561. goto err_alloc_mgid;
  562. }
  563. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  564. list_add ( &mgid->list, &qp->mgids );
  565. /* Add to hardware multicast GID list */
  566. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  567. goto err_dev_mcast_attach;
  568. return 0;
  569. err_dev_mcast_attach:
  570. list_del ( &mgid->list );
  571. free ( mgid );
  572. err_alloc_mgid:
  573. return rc;
  574. }
  575. /**
  576. * Detach from multicast group
  577. *
  578. * @v ibdev Infiniband device
  579. * @v qp Queue pair
  580. * @v gid Multicast GID
  581. */
  582. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  583. struct ib_gid *gid ) {
  584. struct ib_multicast_gid *mgid;
  585. /* Remove from hardware multicast GID list */
  586. ibdev->op->mcast_detach ( ibdev, qp, gid );
  587. /* Remove from software multicast GID list */
  588. list_for_each_entry ( mgid, &qp->mgids, list ) {
  589. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  590. list_del ( &mgid->list );
  591. free ( mgid );
  592. break;
  593. }
  594. }
  595. }
  596. /***************************************************************************
  597. *
  598. * Miscellaneous
  599. *
  600. ***************************************************************************
  601. */
  602. /**
  603. * Get Infiniband HCA information
  604. *
  605. * @v ibdev Infiniband device
  606. * @ret hca_guid HCA GUID
  607. * @ret num_ports Number of ports
  608. */
  609. int ib_get_hca_info ( struct ib_device *ibdev,
  610. struct ib_gid_half *hca_guid ) {
  611. struct ib_device *tmp;
  612. int num_ports = 0;
  613. /* Search for IB devices with the same physical device to
  614. * identify port count and a suitable Node GUID.
  615. */
  616. for_each_ibdev ( tmp ) {
  617. if ( tmp->dev != ibdev->dev )
  618. continue;
  619. if ( num_ports == 0 ) {
  620. memcpy ( hca_guid, &tmp->gid.u.half[1],
  621. sizeof ( *hca_guid ) );
  622. }
  623. num_ports++;
  624. }
  625. return num_ports;
  626. }
  627. /**
  628. * Set port information
  629. *
  630. * @v ibdev Infiniband device
  631. * @v mad Set port information MAD
  632. */
  633. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  634. int rc;
  635. /* Adapters with embedded SMAs do not need to support this method */
  636. if ( ! ibdev->op->set_port_info ) {
  637. DBGC ( ibdev, "IBDEV %p does not support setting port "
  638. "information\n", ibdev );
  639. return -ENOTSUP;
  640. }
  641. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  642. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  643. ibdev, strerror ( rc ) );
  644. return rc;
  645. }
  646. return 0;
  647. };
  648. /**
  649. * Set partition key table
  650. *
  651. * @v ibdev Infiniband device
  652. * @v mad Set partition key table MAD
  653. */
  654. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  655. int rc;
  656. /* Adapters with embedded SMAs do not need to support this method */
  657. if ( ! ibdev->op->set_pkey_table ) {
  658. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  659. "key table\n", ibdev );
  660. return -ENOTSUP;
  661. }
  662. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  663. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  664. "%s\n", ibdev, strerror ( rc ) );
  665. return rc;
  666. }
  667. return 0;
  668. };
  669. /***************************************************************************
  670. *
  671. * Event queues
  672. *
  673. ***************************************************************************
  674. */
  675. /**
  676. * Handle Infiniband link state change
  677. *
  678. * @v ibdev Infiniband device
  679. */
  680. void ib_link_state_changed ( struct ib_device *ibdev ) {
  681. /* Notify IPoIB of link state change */
  682. ipoib_link_state_changed ( ibdev );
  683. }
  684. /**
  685. * Poll event queue
  686. *
  687. * @v ibdev Infiniband device
  688. */
  689. void ib_poll_eq ( struct ib_device *ibdev ) {
  690. struct ib_completion_queue *cq;
  691. /* Poll device's event queue */
  692. ibdev->op->poll_eq ( ibdev );
  693. /* Poll all completion queues */
  694. list_for_each_entry ( cq, &ibdev->cqs, list )
  695. ib_poll_cq ( ibdev, cq );
  696. }
  697. /**
  698. * Single-step the Infiniband event queue
  699. *
  700. * @v process Infiniband event queue process
  701. */
  702. static void ib_step ( struct process *process __unused ) {
  703. struct ib_device *ibdev;
  704. for_each_ibdev ( ibdev )
  705. ib_poll_eq ( ibdev );
  706. }
  707. /** Infiniband event queue process */
  708. struct process ib_process __permanent_process = {
  709. .list = LIST_HEAD_INIT ( ib_process.list ),
  710. .step = ib_step,
  711. };
  712. /***************************************************************************
  713. *
  714. * Infiniband device creation/destruction
  715. *
  716. ***************************************************************************
  717. */
  718. /**
  719. * Allocate Infiniband device
  720. *
  721. * @v priv_size Size of driver private data area
  722. * @ret ibdev Infiniband device, or NULL
  723. */
  724. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  725. struct ib_device *ibdev;
  726. void *drv_priv;
  727. size_t total_len;
  728. total_len = ( sizeof ( *ibdev ) + priv_size );
  729. ibdev = zalloc ( total_len );
  730. if ( ibdev ) {
  731. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  732. ib_set_drvdata ( ibdev, drv_priv );
  733. INIT_LIST_HEAD ( &ibdev->cqs );
  734. INIT_LIST_HEAD ( &ibdev->qps );
  735. ibdev->lid = IB_LID_NONE;
  736. ibdev->pkey = IB_PKEY_NONE;
  737. }
  738. return ibdev;
  739. }
  740. /**
  741. * Register Infiniband device
  742. *
  743. * @v ibdev Infiniband device
  744. * @ret rc Return status code
  745. */
  746. int register_ibdev ( struct ib_device *ibdev ) {
  747. int rc;
  748. /* Add to device list */
  749. ibdev_get ( ibdev );
  750. list_add_tail ( &ibdev->list, &ib_devices );
  751. /* Add IPoIB device */
  752. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  753. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  754. ibdev, strerror ( rc ) );
  755. goto err_ipoib_probe;
  756. }
  757. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  758. ibdev->dev->name );
  759. return 0;
  760. err_ipoib_probe:
  761. list_del ( &ibdev->list );
  762. ibdev_put ( ibdev );
  763. return rc;
  764. }
  765. /**
  766. * Unregister Infiniband device
  767. *
  768. * @v ibdev Infiniband device
  769. */
  770. void unregister_ibdev ( struct ib_device *ibdev ) {
  771. /* Close device */
  772. ipoib_remove ( ibdev );
  773. /* Remove from device list */
  774. list_del ( &ibdev->list );
  775. ibdev_put ( ibdev );
  776. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  777. }
  778. /**
  779. * Find Infiniband device by GID
  780. *
  781. * @v gid GID
  782. * @ret ibdev Infiniband device, or NULL
  783. */
  784. struct ib_device * find_ibdev ( struct ib_gid *gid ) {
  785. struct ib_device *ibdev;
  786. for_each_ibdev ( ibdev ) {
  787. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  788. return ibdev;
  789. }
  790. return NULL;
  791. }
  792. /**
  793. * Get most recently opened Infiniband device
  794. *
  795. * @ret ibdev Most recently opened Infiniband device, or NULL
  796. */
  797. struct ib_device * last_opened_ibdev ( void ) {
  798. struct ib_device *ibdev;
  799. list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
  800. assert ( ibdev->open_count != 0 );
  801. return ibdev;
  802. }
  803. return NULL;
  804. }