You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <string.h>
  23. #include <unistd.h>
  24. #include <byteswap.h>
  25. #include <errno.h>
  26. #include <assert.h>
  27. #include <gpxe/list.h>
  28. #include <gpxe/if_arp.h>
  29. #include <gpxe/netdevice.h>
  30. #include <gpxe/iobuf.h>
  31. #include <gpxe/ipoib.h>
  32. #include <gpxe/process.h>
  33. #include <gpxe/infiniband.h>
  34. /** @file
  35. *
  36. * Infiniband protocol
  37. *
  38. */
  39. /** List of Infiniband devices */
  40. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  41. /**
  42. * Create completion queue
  43. *
  44. * @v ibdev Infiniband device
  45. * @v num_cqes Number of completion queue entries
  46. * @v op Completion queue operations
  47. * @ret cq New completion queue
  48. */
  49. struct ib_completion_queue *
  50. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  51. struct ib_completion_queue_operations *op ) {
  52. struct ib_completion_queue *cq;
  53. int rc;
  54. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  55. /* Allocate and initialise data structure */
  56. cq = zalloc ( sizeof ( *cq ) );
  57. if ( ! cq )
  58. goto err_alloc_cq;
  59. cq->num_cqes = num_cqes;
  60. INIT_LIST_HEAD ( &cq->work_queues );
  61. cq->op = op;
  62. /* Perform device-specific initialisation and get CQN */
  63. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  64. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  65. "queue: %s\n", ibdev, strerror ( rc ) );
  66. goto err_dev_create_cq;
  67. }
  68. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  69. "with CQN %#lx\n", ibdev, num_cqes, cq,
  70. ib_cq_get_drvdata ( cq ), cq->cqn );
  71. return cq;
  72. ibdev->op->destroy_cq ( ibdev, cq );
  73. err_dev_create_cq:
  74. free ( cq );
  75. err_alloc_cq:
  76. return NULL;
  77. }
  78. /**
  79. * Destroy completion queue
  80. *
  81. * @v ibdev Infiniband device
  82. * @v cq Completion queue
  83. */
  84. void ib_destroy_cq ( struct ib_device *ibdev,
  85. struct ib_completion_queue *cq ) {
  86. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  87. ibdev, cq->cqn );
  88. assert ( list_empty ( &cq->work_queues ) );
  89. ibdev->op->destroy_cq ( ibdev, cq );
  90. free ( cq );
  91. }
  92. /**
  93. * Create queue pair
  94. *
  95. * @v ibdev Infiniband device
  96. * @v num_send_wqes Number of send work queue entries
  97. * @v send_cq Send completion queue
  98. * @v num_recv_wqes Number of receive work queue entries
  99. * @v recv_cq Receive completion queue
  100. * @v qkey Queue key
  101. * @ret qp Queue pair
  102. */
  103. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  104. unsigned int num_send_wqes,
  105. struct ib_completion_queue *send_cq,
  106. unsigned int num_recv_wqes,
  107. struct ib_completion_queue *recv_cq,
  108. unsigned long qkey ) {
  109. struct ib_queue_pair *qp;
  110. size_t total_size;
  111. int rc;
  112. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  113. /* Allocate and initialise data structure */
  114. total_size = ( sizeof ( *qp ) +
  115. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  116. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  117. qp = zalloc ( total_size );
  118. if ( ! qp )
  119. goto err_alloc_qp;
  120. qp->ibdev = ibdev;
  121. list_add ( &qp->list, &ibdev->qps );
  122. qp->qkey = qkey;
  123. qp->send.qp = qp;
  124. qp->send.is_send = 1;
  125. qp->send.cq = send_cq;
  126. list_add ( &qp->send.list, &send_cq->work_queues );
  127. qp->send.num_wqes = num_send_wqes;
  128. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  129. qp->recv.qp = qp;
  130. qp->recv.cq = recv_cq;
  131. list_add ( &qp->recv.list, &recv_cq->work_queues );
  132. qp->recv.num_wqes = num_recv_wqes;
  133. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  134. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  135. INIT_LIST_HEAD ( &qp->mgids );
  136. /* Perform device-specific initialisation and get QPN */
  137. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  138. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  139. "%s\n", ibdev, strerror ( rc ) );
  140. goto err_dev_create_qp;
  141. }
  142. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  143. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  144. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  145. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  146. qp->recv.iobufs );
  147. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  148. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  149. ( ( ( void * ) qp ) + total_size ) );
  150. return qp;
  151. ibdev->op->destroy_qp ( ibdev, qp );
  152. err_dev_create_qp:
  153. list_del ( &qp->send.list );
  154. list_del ( &qp->recv.list );
  155. list_del ( &qp->list );
  156. free ( qp );
  157. err_alloc_qp:
  158. return NULL;
  159. }
  160. /**
  161. * Modify queue pair
  162. *
  163. * @v ibdev Infiniband device
  164. * @v qp Queue pair
  165. * @v mod_list Modification list
  166. * @v qkey New queue key, if applicable
  167. * @ret rc Return status code
  168. */
  169. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  170. unsigned long mod_list, unsigned long qkey ) {
  171. int rc;
  172. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  173. if ( mod_list & IB_MODIFY_QKEY )
  174. qp->qkey = qkey;
  175. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
  176. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  177. ibdev, qp->qpn, strerror ( rc ) );
  178. return rc;
  179. }
  180. return 0;
  181. }
  182. /**
  183. * Destroy queue pair
  184. *
  185. * @v ibdev Infiniband device
  186. * @v qp Queue pair
  187. */
  188. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  189. struct io_buffer *iobuf;
  190. unsigned int i;
  191. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  192. ibdev, qp->qpn );
  193. assert ( list_empty ( &qp->mgids ) );
  194. /* Perform device-specific destruction */
  195. ibdev->op->destroy_qp ( ibdev, qp );
  196. /* Complete any remaining I/O buffers with errors */
  197. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  198. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  199. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  200. }
  201. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  202. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  203. ib_complete_recv ( ibdev, qp, NULL, iobuf,
  204. -ECANCELED );
  205. }
  206. }
  207. /* Remove work queues from completion queue */
  208. list_del ( &qp->send.list );
  209. list_del ( &qp->recv.list );
  210. /* Free QP */
  211. list_del ( &qp->list );
  212. free ( qp );
  213. }
  214. /**
  215. * Find queue pair by QPN
  216. *
  217. * @v ibdev Infiniband device
  218. * @v qpn Queue pair number
  219. * @ret qp Queue pair, or NULL
  220. */
  221. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  222. unsigned long qpn ) {
  223. struct ib_queue_pair *qp;
  224. list_for_each_entry ( qp, &ibdev->qps, list ) {
  225. if ( qp->qpn == qpn )
  226. return qp;
  227. }
  228. return NULL;
  229. }
  230. /**
  231. * Find queue pair by multicast GID
  232. *
  233. * @v ibdev Infiniband device
  234. * @v gid Multicast GID
  235. * @ret qp Queue pair, or NULL
  236. */
  237. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  238. struct ib_gid *gid ) {
  239. struct ib_queue_pair *qp;
  240. struct ib_multicast_gid *mgid;
  241. list_for_each_entry ( qp, &ibdev->qps, list ) {
  242. list_for_each_entry ( mgid, &qp->mgids, list ) {
  243. if ( memcmp ( &mgid->gid, gid,
  244. sizeof ( mgid->gid ) ) == 0 ) {
  245. return qp;
  246. }
  247. }
  248. }
  249. return NULL;
  250. }
  251. /**
  252. * Find work queue belonging to completion queue
  253. *
  254. * @v cq Completion queue
  255. * @v qpn Queue pair number
  256. * @v is_send Find send work queue (rather than receive)
  257. * @ret wq Work queue, or NULL if not found
  258. */
  259. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  260. unsigned long qpn, int is_send ) {
  261. struct ib_work_queue *wq;
  262. list_for_each_entry ( wq, &cq->work_queues, list ) {
  263. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  264. return wq;
  265. }
  266. return NULL;
  267. }
  268. /**
  269. * Post send work queue entry
  270. *
  271. * @v ibdev Infiniband device
  272. * @v qp Queue pair
  273. * @v av Address vector
  274. * @v iobuf I/O buffer
  275. * @ret rc Return status code
  276. */
  277. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  278. struct ib_address_vector *av,
  279. struct io_buffer *iobuf ) {
  280. int rc;
  281. /* Check queue fill level */
  282. if ( qp->send.fill >= qp->send.num_wqes ) {
  283. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  284. ibdev, qp->qpn );
  285. return -ENOBUFS;
  286. }
  287. /* Post to hardware */
  288. if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
  289. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  290. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  291. return rc;
  292. }
  293. qp->send.fill++;
  294. return 0;
  295. }
  296. /**
  297. * Post receive work queue entry
  298. *
  299. * @v ibdev Infiniband device
  300. * @v qp Queue pair
  301. * @v iobuf I/O buffer
  302. * @ret rc Return status code
  303. */
  304. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  305. struct io_buffer *iobuf ) {
  306. int rc;
  307. /* Check queue fill level */
  308. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  309. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  310. ibdev, qp->qpn );
  311. return -ENOBUFS;
  312. }
  313. /* Post to hardware */
  314. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  315. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  316. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  317. return rc;
  318. }
  319. qp->recv.fill++;
  320. return 0;
  321. }
  322. /**
  323. * Complete send work queue entry
  324. *
  325. * @v ibdev Infiniband device
  326. * @v qp Queue pair
  327. * @v iobuf I/O buffer
  328. * @v rc Completion status code
  329. */
  330. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  331. struct io_buffer *iobuf, int rc ) {
  332. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  333. qp->send.fill--;
  334. }
  335. /**
  336. * Complete receive work queue entry
  337. *
  338. * @v ibdev Infiniband device
  339. * @v qp Queue pair
  340. * @v av Address vector
  341. * @v iobuf I/O buffer
  342. * @v rc Completion status code
  343. */
  344. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  345. struct ib_address_vector *av,
  346. struct io_buffer *iobuf, int rc ) {
  347. qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
  348. qp->recv.fill--;
  349. }
  350. /**
  351. * Open port
  352. *
  353. * @v ibdev Infiniband device
  354. * @ret rc Return status code
  355. */
  356. int ib_open ( struct ib_device *ibdev ) {
  357. int rc;
  358. /* Open device if this is the first requested opening */
  359. if ( ibdev->open_count == 0 ) {
  360. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 )
  361. return rc;
  362. }
  363. /* Increment device open request counter */
  364. ibdev->open_count++;
  365. return 0;
  366. }
  367. /**
  368. * Close port
  369. *
  370. * @v ibdev Infiniband device
  371. */
  372. void ib_close ( struct ib_device *ibdev ) {
  373. /* Decrement device open request counter */
  374. ibdev->open_count--;
  375. /* Close device if this was the last remaining requested opening */
  376. if ( ibdev->open_count == 0 )
  377. ibdev->op->close ( ibdev );
  378. }
  379. /**
  380. * Attach to multicast group
  381. *
  382. * @v ibdev Infiniband device
  383. * @v qp Queue pair
  384. * @v gid Multicast GID
  385. * @ret rc Return status code
  386. */
  387. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  388. struct ib_gid *gid ) {
  389. struct ib_multicast_gid *mgid;
  390. int rc;
  391. /* Add to software multicast GID list */
  392. mgid = zalloc ( sizeof ( *mgid ) );
  393. if ( ! mgid ) {
  394. rc = -ENOMEM;
  395. goto err_alloc_mgid;
  396. }
  397. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  398. list_add ( &mgid->list, &qp->mgids );
  399. /* Add to hardware multicast GID list */
  400. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  401. goto err_dev_mcast_attach;
  402. return 0;
  403. err_dev_mcast_attach:
  404. list_del ( &mgid->list );
  405. free ( mgid );
  406. err_alloc_mgid:
  407. return rc;
  408. }
  409. /**
  410. * Detach from multicast group
  411. *
  412. * @v ibdev Infiniband device
  413. * @v qp Queue pair
  414. * @v gid Multicast GID
  415. */
  416. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  417. struct ib_gid *gid ) {
  418. struct ib_multicast_gid *mgid;
  419. /* Remove from hardware multicast GID list */
  420. ibdev->op->mcast_detach ( ibdev, qp, gid );
  421. /* Remove from software multicast GID list */
  422. list_for_each_entry ( mgid, &qp->mgids, list ) {
  423. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  424. list_del ( &mgid->list );
  425. free ( mgid );
  426. break;
  427. }
  428. }
  429. }
  430. /***************************************************************************
  431. *
  432. * Event queues
  433. *
  434. ***************************************************************************
  435. */
  436. /**
  437. * Handle Infiniband link state change
  438. *
  439. * @v ibdev Infiniband device
  440. */
  441. void ib_link_state_changed ( struct ib_device *ibdev ) {
  442. /* Notify IPoIB of link state change */
  443. ipoib_link_state_changed ( ibdev );
  444. }
  445. /**
  446. * Single-step the Infiniband event queue
  447. *
  448. * @v process Infiniband event queue process
  449. */
  450. static void ib_step ( struct process *process __unused ) {
  451. struct ib_device *ibdev;
  452. list_for_each_entry ( ibdev, &ib_devices, list ) {
  453. ibdev->op->poll_eq ( ibdev );
  454. }
  455. }
  456. /** Infiniband event queue process */
  457. struct process ib_process __permanent_process = {
  458. .step = ib_step,
  459. };
  460. /***************************************************************************
  461. *
  462. * Infiniband device creation/destruction
  463. *
  464. ***************************************************************************
  465. */
  466. /**
  467. * Allocate Infiniband device
  468. *
  469. * @v priv_size Size of driver private data area
  470. * @ret ibdev Infiniband device, or NULL
  471. */
  472. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  473. struct ib_device *ibdev;
  474. void *drv_priv;
  475. size_t total_len;
  476. total_len = ( sizeof ( *ibdev ) + priv_size );
  477. ibdev = zalloc ( total_len );
  478. if ( ibdev ) {
  479. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  480. ib_set_drvdata ( ibdev, drv_priv );
  481. INIT_LIST_HEAD ( &ibdev->qps );
  482. ibdev->lid = IB_LID_NONE;
  483. ibdev->pkey = IB_PKEY_NONE;
  484. }
  485. return ibdev;
  486. }
  487. /**
  488. * Register Infiniband device
  489. *
  490. * @v ibdev Infiniband device
  491. * @ret rc Return status code
  492. */
  493. int register_ibdev ( struct ib_device *ibdev ) {
  494. int rc;
  495. /* Add to device list */
  496. ibdev_get ( ibdev );
  497. list_add_tail ( &ibdev->list, &ib_devices );
  498. /* Add IPoIB device */
  499. if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
  500. DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
  501. ibdev, strerror ( rc ) );
  502. goto err_ipoib_probe;
  503. }
  504. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  505. ibdev->dev->name );
  506. return 0;
  507. err_ipoib_probe:
  508. list_del ( &ibdev->list );
  509. ibdev_put ( ibdev );
  510. return rc;
  511. }
  512. /**
  513. * Unregister Infiniband device
  514. *
  515. * @v ibdev Infiniband device
  516. */
  517. void unregister_ibdev ( struct ib_device *ibdev ) {
  518. /* Close device */
  519. ipoib_remove ( ibdev );
  520. /* Remove from device list */
  521. list_del ( &ibdev->list );
  522. ibdev_put ( ibdev );
  523. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  524. }