You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <stdio.h>
  27. #include <string.h>
  28. #include <unistd.h>
  29. #include <byteswap.h>
  30. #include <errno.h>
  31. #include <assert.h>
  32. #include <ipxe/list.h>
  33. #include <ipxe/errortab.h>
  34. #include <ipxe/if_arp.h>
  35. #include <ipxe/netdevice.h>
  36. #include <ipxe/iobuf.h>
  37. #include <ipxe/process.h>
  38. #include <ipxe/profile.h>
  39. #include <ipxe/infiniband.h>
  40. #include <ipxe/ib_mi.h>
  41. #include <ipxe/ib_sma.h>
  42. /** @file
  43. *
  44. * Infiniband protocol
  45. *
  46. */
  47. /** List of Infiniband devices */
  48. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  49. /** List of open Infiniband devices, in reverse order of opening */
  50. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  51. /** Infiniband device index */
  52. static unsigned int ibdev_index = 0;
  53. /** Post send work queue entry profiler */
  54. static struct profiler ib_post_send_profiler __profiler =
  55. { .name = "ib.post_send" };
  56. /** Post receive work queue entry profiler */
  57. static struct profiler ib_post_recv_profiler __profiler =
  58. { .name = "ib.post_recv" };
  59. /* Disambiguate the various possible EINPROGRESSes */
  60. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  61. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  62. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  63. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  64. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  65. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  66. /** Human-readable message for the link statuses */
  67. struct errortab infiniband_errors[] __errortab = {
  68. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  69. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  70. };
  71. /***************************************************************************
  72. *
  73. * Completion queues
  74. *
  75. ***************************************************************************
  76. */
  77. /**
  78. * Create completion queue
  79. *
  80. * @v ibdev Infiniband device
  81. * @v num_cqes Number of completion queue entries
  82. * @v op Completion queue operations
  83. * @v new_cq New completion queue to fill in
  84. * @ret rc Return status code
  85. */
  86. int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  87. struct ib_completion_queue_operations *op,
  88. struct ib_completion_queue **new_cq ) {
  89. struct ib_completion_queue *cq;
  90. int rc;
  91. DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
  92. /* Allocate and initialise data structure */
  93. cq = zalloc ( sizeof ( *cq ) );
  94. if ( ! cq ) {
  95. rc = -ENOMEM;
  96. goto err_alloc_cq;
  97. }
  98. cq->ibdev = ibdev;
  99. list_add_tail ( &cq->list, &ibdev->cqs );
  100. cq->num_cqes = num_cqes;
  101. INIT_LIST_HEAD ( &cq->work_queues );
  102. cq->op = op;
  103. /* Perform device-specific initialisation and get CQN */
  104. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  105. DBGC ( ibdev, "IBDEV %s could not initialise completion "
  106. "queue: %s\n", ibdev->name, strerror ( rc ) );
  107. goto err_dev_create_cq;
  108. }
  109. DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
  110. "with CQN %#lx\n", ibdev->name, num_cqes, cq,
  111. ib_cq_get_drvdata ( cq ), cq->cqn );
  112. *new_cq = cq;
  113. return 0;
  114. ibdev->op->destroy_cq ( ibdev, cq );
  115. err_dev_create_cq:
  116. list_del ( &cq->list );
  117. free ( cq );
  118. err_alloc_cq:
  119. return rc;
  120. }
  121. /**
  122. * Destroy completion queue
  123. *
  124. * @v ibdev Infiniband device
  125. * @v cq Completion queue
  126. */
  127. void ib_destroy_cq ( struct ib_device *ibdev,
  128. struct ib_completion_queue *cq ) {
  129. DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
  130. ibdev->name, cq->cqn );
  131. assert ( list_empty ( &cq->work_queues ) );
  132. ibdev->op->destroy_cq ( ibdev, cq );
  133. list_del ( &cq->list );
  134. free ( cq );
  135. }
  136. /**
  137. * Poll completion queue
  138. *
  139. * @v ibdev Infiniband device
  140. * @v cq Completion queue
  141. */
  142. void ib_poll_cq ( struct ib_device *ibdev,
  143. struct ib_completion_queue *cq ) {
  144. struct ib_work_queue *wq;
  145. /* Poll completion queue */
  146. ibdev->op->poll_cq ( ibdev, cq );
  147. /* Refill receive work queues */
  148. list_for_each_entry ( wq, &cq->work_queues, list ) {
  149. if ( ! wq->is_send )
  150. ib_refill_recv ( ibdev, wq->qp );
  151. }
  152. }
  153. /***************************************************************************
  154. *
  155. * Work queues
  156. *
  157. ***************************************************************************
  158. */
  159. /**
  160. * Create queue pair
  161. *
  162. * @v ibdev Infiniband device
  163. * @v type Queue pair type
  164. * @v num_send_wqes Number of send work queue entries
  165. * @v send_cq Send completion queue
  166. * @v num_recv_wqes Number of receive work queue entries
  167. * @v recv_cq Receive completion queue
  168. * @v op Queue pair operations
  169. * @v name Queue pair name
  170. * @v new_qp New queue pair to fill in
  171. * @ret rc Return status code
  172. *
  173. * The queue pair will be left in the INIT state; you must call
  174. * ib_modify_qp() before it is ready to use for sending and receiving.
  175. */
  176. int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
  177. unsigned int num_send_wqes,
  178. struct ib_completion_queue *send_cq,
  179. unsigned int num_recv_wqes,
  180. struct ib_completion_queue *recv_cq,
  181. struct ib_queue_pair_operations *op, const char *name,
  182. struct ib_queue_pair **new_qp ) {
  183. struct ib_queue_pair *qp;
  184. size_t total_size;
  185. int rc;
  186. DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
  187. /* Allocate and initialise data structure */
  188. total_size = ( sizeof ( *qp ) +
  189. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  190. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  191. qp = zalloc ( total_size );
  192. if ( ! qp ) {
  193. rc = -ENOMEM;
  194. goto err_alloc_qp;
  195. }
  196. qp->ibdev = ibdev;
  197. list_add_tail ( &qp->list, &ibdev->qps );
  198. qp->type = type;
  199. qp->send.qp = qp;
  200. qp->send.is_send = 1;
  201. qp->send.cq = send_cq;
  202. list_add_tail ( &qp->send.list, &send_cq->work_queues );
  203. qp->send.psn = ( random() & 0xffffffUL );
  204. qp->send.num_wqes = num_send_wqes;
  205. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  206. qp->recv.qp = qp;
  207. qp->recv.cq = recv_cq;
  208. list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
  209. qp->recv.psn = ( random() & 0xffffffUL );
  210. qp->recv.num_wqes = num_recv_wqes;
  211. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  212. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  213. INIT_LIST_HEAD ( &qp->mgids );
  214. qp->op = op;
  215. qp->name = name;
  216. /* Perform device-specific initialisation and get QPN */
  217. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  218. DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
  219. "%s\n", ibdev->name, strerror ( rc ) );
  220. goto err_dev_create_qp;
  221. }
  222. DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
  223. ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  224. DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
  225. ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
  226. qp->recv.iobufs );
  227. DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
  228. ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  229. ( ( ( void * ) qp ) + total_size ) );
  230. /* Calculate externally-visible QPN */
  231. switch ( type ) {
  232. case IB_QPT_SMI:
  233. qp->ext_qpn = IB_QPN_SMI;
  234. break;
  235. case IB_QPT_GSI:
  236. qp->ext_qpn = IB_QPN_GSI;
  237. break;
  238. default:
  239. qp->ext_qpn = qp->qpn;
  240. break;
  241. }
  242. if ( qp->ext_qpn != qp->qpn ) {
  243. DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
  244. ibdev->name, qp->qpn, qp->ext_qpn );
  245. }
  246. *new_qp = qp;
  247. return 0;
  248. ibdev->op->destroy_qp ( ibdev, qp );
  249. err_dev_create_qp:
  250. list_del ( &qp->send.list );
  251. list_del ( &qp->recv.list );
  252. list_del ( &qp->list );
  253. free ( qp );
  254. err_alloc_qp:
  255. return rc;
  256. }
  257. /**
  258. * Modify queue pair
  259. *
  260. * @v ibdev Infiniband device
  261. * @v qp Queue pair
  262. * @ret rc Return status code
  263. */
  264. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  265. int rc;
  266. DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
  267. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  268. DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
  269. ibdev->name, qp->qpn, strerror ( rc ) );
  270. return rc;
  271. }
  272. return 0;
  273. }
  274. /**
  275. * Destroy queue pair
  276. *
  277. * @v ibdev Infiniband device
  278. * @v qp Queue pair
  279. */
  280. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  281. struct io_buffer *iobuf;
  282. unsigned int i;
  283. DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
  284. ibdev->name, qp->qpn );
  285. assert ( list_empty ( &qp->mgids ) );
  286. /* Perform device-specific destruction */
  287. ibdev->op->destroy_qp ( ibdev, qp );
  288. /* Complete any remaining I/O buffers with errors */
  289. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  290. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  291. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  292. }
  293. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  294. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  295. ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
  296. -ECANCELED );
  297. }
  298. }
  299. /* Remove work queues from completion queue */
  300. list_del ( &qp->send.list );
  301. list_del ( &qp->recv.list );
  302. /* Free QP */
  303. list_del ( &qp->list );
  304. free ( qp );
  305. }
  306. /**
  307. * Find queue pair by QPN
  308. *
  309. * @v ibdev Infiniband device
  310. * @v qpn Queue pair number
  311. * @ret qp Queue pair, or NULL
  312. */
  313. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  314. unsigned long qpn ) {
  315. struct ib_queue_pair *qp;
  316. list_for_each_entry ( qp, &ibdev->qps, list ) {
  317. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  318. return qp;
  319. }
  320. return NULL;
  321. }
  322. /**
  323. * Find queue pair by multicast GID
  324. *
  325. * @v ibdev Infiniband device
  326. * @v gid Multicast GID
  327. * @ret qp Queue pair, or NULL
  328. */
  329. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  330. union ib_gid *gid ) {
  331. struct ib_queue_pair *qp;
  332. struct ib_multicast_gid *mgid;
  333. list_for_each_entry ( qp, &ibdev->qps, list ) {
  334. list_for_each_entry ( mgid, &qp->mgids, list ) {
  335. if ( memcmp ( &mgid->gid, gid,
  336. sizeof ( mgid->gid ) ) == 0 ) {
  337. return qp;
  338. }
  339. }
  340. }
  341. return NULL;
  342. }
  343. /**
  344. * Find work queue belonging to completion queue
  345. *
  346. * @v cq Completion queue
  347. * @v qpn Queue pair number
  348. * @v is_send Find send work queue (rather than receive)
  349. * @ret wq Work queue, or NULL if not found
  350. */
  351. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  352. unsigned long qpn, int is_send ) {
  353. struct ib_work_queue *wq;
  354. list_for_each_entry ( wq, &cq->work_queues, list ) {
  355. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  356. return wq;
  357. }
  358. return NULL;
  359. }
  360. /**
  361. * Post send work queue entry
  362. *
  363. * @v ibdev Infiniband device
  364. * @v qp Queue pair
  365. * @v dest Destination address vector
  366. * @v iobuf I/O buffer
  367. * @ret rc Return status code
  368. */
  369. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  370. struct ib_address_vector *dest,
  371. struct io_buffer *iobuf ) {
  372. struct ib_address_vector dest_copy;
  373. int rc;
  374. /* Start profiling */
  375. profile_start ( &ib_post_send_profiler );
  376. /* Check queue fill level */
  377. if ( qp->send.fill >= qp->send.num_wqes ) {
  378. DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
  379. ibdev->name, qp->qpn );
  380. return -ENOBUFS;
  381. }
  382. /* Use default address vector if none specified */
  383. if ( ! dest )
  384. dest = &qp->av;
  385. /* Make modifiable copy of address vector */
  386. memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
  387. dest = &dest_copy;
  388. /* Fill in optional parameters in address vector */
  389. if ( ! dest->qkey )
  390. dest->qkey = qp->qkey;
  391. if ( ! dest->rate )
  392. dest->rate = IB_RATE_2_5;
  393. /* Post to hardware */
  394. if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
  395. DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
  396. "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
  397. return rc;
  398. }
  399. /* Increase fill level */
  400. qp->send.fill++;
  401. /* Stop profiling */
  402. profile_stop ( &ib_post_send_profiler );
  403. return 0;
  404. }
  405. /**
  406. * Post receive work queue entry
  407. *
  408. * @v ibdev Infiniband device
  409. * @v qp Queue pair
  410. * @v iobuf I/O buffer
  411. * @ret rc Return status code
  412. */
  413. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  414. struct io_buffer *iobuf ) {
  415. int rc;
  416. /* Start profiling */
  417. profile_start ( &ib_post_recv_profiler );
  418. /* Check packet length */
  419. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  420. DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
  421. ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
  422. return -EINVAL;
  423. }
  424. /* Check queue fill level */
  425. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  426. DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
  427. ibdev->name, qp->qpn );
  428. return -ENOBUFS;
  429. }
  430. /* Post to hardware */
  431. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  432. DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
  433. "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
  434. return rc;
  435. }
  436. /* Increase fill level */
  437. qp->recv.fill++;
  438. /* Stop profiling */
  439. profile_stop ( &ib_post_recv_profiler );
  440. return 0;
  441. }
  442. /**
  443. * Complete send work queue entry
  444. *
  445. * @v ibdev Infiniband device
  446. * @v qp Queue pair
  447. * @v iobuf I/O buffer
  448. * @v rc Completion status code
  449. */
  450. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  451. struct io_buffer *iobuf, int rc ) {
  452. if ( qp->send.cq->op->complete_send ) {
  453. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  454. } else {
  455. free_iob ( iobuf );
  456. }
  457. qp->send.fill--;
  458. }
  459. /**
  460. * Complete receive work queue entry
  461. *
  462. * @v ibdev Infiniband device
  463. * @v qp Queue pair
  464. * @v dest Destination address vector, or NULL
  465. * @v source Source address vector, or NULL
  466. * @v iobuf I/O buffer
  467. * @v rc Completion status code
  468. */
  469. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  470. struct ib_address_vector *dest,
  471. struct ib_address_vector *source,
  472. struct io_buffer *iobuf, int rc ) {
  473. if ( qp->recv.cq->op->complete_recv ) {
  474. qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
  475. iobuf, rc );
  476. } else {
  477. free_iob ( iobuf );
  478. }
  479. qp->recv.fill--;
  480. }
  481. /**
  482. * Refill receive work queue
  483. *
  484. * @v ibdev Infiniband device
  485. * @v qp Queue pair
  486. */
  487. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  488. struct io_buffer *iobuf;
  489. int rc;
  490. /* Keep filling while unfilled entries remain */
  491. while ( qp->recv.fill < qp->recv.num_wqes ) {
  492. /* Allocate I/O buffer */
  493. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  494. if ( ! iobuf ) {
  495. /* Non-fatal; we will refill on next attempt */
  496. return;
  497. }
  498. /* Post I/O buffer */
  499. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  500. DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
  501. ibdev->name, strerror ( rc ) );
  502. free_iob ( iobuf );
  503. /* Give up */
  504. return;
  505. }
  506. }
  507. }
  508. /***************************************************************************
  509. *
  510. * Link control
  511. *
  512. ***************************************************************************
  513. */
  514. /**
  515. * Get link state
  516. *
  517. * @v ibdev Infiniband device
  518. * @ret rc Link status code
  519. */
  520. int ib_link_rc ( struct ib_device *ibdev ) {
  521. switch ( ibdev->port_state ) {
  522. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  523. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  524. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  525. case IB_PORT_STATE_ACTIVE: return 0;
  526. default: return -EINVAL;
  527. }
  528. }
  529. /**
  530. * Textual representation of Infiniband link state
  531. *
  532. * @v ibdev Infiniband device
  533. * @ret link_text Link state text
  534. */
  535. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  536. switch ( ibdev->port_state ) {
  537. case IB_PORT_STATE_DOWN: return "DOWN";
  538. case IB_PORT_STATE_INIT: return "INIT";
  539. case IB_PORT_STATE_ARMED: return "ARMED";
  540. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  541. default: return "UNKNOWN";
  542. }
  543. }
  544. /**
  545. * Notify drivers of Infiniband device or link state change
  546. *
  547. * @v ibdev Infiniband device
  548. */
  549. static void ib_notify ( struct ib_device *ibdev ) {
  550. struct ib_driver *driver;
  551. for_each_table_entry ( driver, IB_DRIVERS )
  552. driver->notify ( ibdev );
  553. }
  554. /**
  555. * Notify of Infiniband link state change
  556. *
  557. * @v ibdev Infiniband device
  558. */
  559. void ib_link_state_changed ( struct ib_device *ibdev ) {
  560. DBGC ( ibdev, "IBDEV %s link state is %s\n",
  561. ibdev->name, ib_link_state_text ( ibdev ) );
  562. /* Notify drivers of link state change */
  563. ib_notify ( ibdev );
  564. }
  565. /**
  566. * Open port
  567. *
  568. * @v ibdev Infiniband device
  569. * @ret rc Return status code
  570. */
  571. int ib_open ( struct ib_device *ibdev ) {
  572. int rc;
  573. /* Increment device open request counter */
  574. if ( ibdev->open_count++ > 0 ) {
  575. /* Device was already open; do nothing */
  576. return 0;
  577. }
  578. /* Open device */
  579. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  580. DBGC ( ibdev, "IBDEV %s could not open: %s\n",
  581. ibdev->name, strerror ( rc ) );
  582. goto err_open;
  583. }
  584. /* Create subnet management interface */
  585. if ( ( rc = ib_create_mi ( ibdev, IB_QPT_SMI, &ibdev->smi ) ) != 0 ) {
  586. DBGC ( ibdev, "IBDEV %s could not create SMI: %s\n",
  587. ibdev->name, strerror ( rc ) );
  588. goto err_create_smi;
  589. }
  590. /* Create subnet management agent */
  591. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  592. DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
  593. ibdev->name, strerror ( rc ) );
  594. goto err_create_sma;
  595. }
  596. /* Create general services interface */
  597. if ( ( rc = ib_create_mi ( ibdev, IB_QPT_GSI, &ibdev->gsi ) ) != 0 ) {
  598. DBGC ( ibdev, "IBDEV %s could not create GSI: %s\n",
  599. ibdev->name, strerror ( rc ) );
  600. goto err_create_gsi;
  601. }
  602. /* Add to head of open devices list */
  603. list_add ( &ibdev->open_list, &open_ib_devices );
  604. /* Notify drivers of device state change */
  605. ib_notify ( ibdev );
  606. assert ( ibdev->open_count == 1 );
  607. return 0;
  608. ib_destroy_mi ( ibdev, ibdev->gsi );
  609. err_create_gsi:
  610. ib_destroy_sma ( ibdev, ibdev->smi );
  611. err_create_sma:
  612. ib_destroy_mi ( ibdev, ibdev->smi );
  613. err_create_smi:
  614. ibdev->op->close ( ibdev );
  615. err_open:
  616. assert ( ibdev->open_count == 1 );
  617. ibdev->open_count = 0;
  618. return rc;
  619. }
  620. /**
  621. * Close port
  622. *
  623. * @v ibdev Infiniband device
  624. */
  625. void ib_close ( struct ib_device *ibdev ) {
  626. /* Decrement device open request counter */
  627. ibdev->open_count--;
  628. /* Close device if this was the last remaining requested opening */
  629. if ( ibdev->open_count == 0 ) {
  630. ib_notify ( ibdev );
  631. list_del ( &ibdev->open_list );
  632. ib_destroy_mi ( ibdev, ibdev->gsi );
  633. ib_destroy_sma ( ibdev, ibdev->smi );
  634. ib_destroy_mi ( ibdev, ibdev->smi );
  635. ibdev->op->close ( ibdev );
  636. ibdev->port_state = IB_PORT_STATE_DOWN;
  637. }
  638. }
  639. /***************************************************************************
  640. *
  641. * Multicast
  642. *
  643. ***************************************************************************
  644. */
  645. /**
  646. * Attach to multicast group
  647. *
  648. * @v ibdev Infiniband device
  649. * @v qp Queue pair
  650. * @v gid Multicast GID
  651. * @ret rc Return status code
  652. *
  653. * Note that this function handles only the local device's attachment
  654. * to the multicast GID; it does not issue the relevant MADs to join
  655. * the multicast group on the subnet.
  656. */
  657. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  658. union ib_gid *gid ) {
  659. struct ib_multicast_gid *mgid;
  660. int rc;
  661. /* Sanity check */
  662. assert ( qp != NULL );
  663. /* Add to software multicast GID list */
  664. mgid = zalloc ( sizeof ( *mgid ) );
  665. if ( ! mgid ) {
  666. rc = -ENOMEM;
  667. goto err_alloc_mgid;
  668. }
  669. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  670. list_add_tail ( &mgid->list, &qp->mgids );
  671. /* Add to hardware multicast GID list */
  672. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  673. goto err_dev_mcast_attach;
  674. return 0;
  675. err_dev_mcast_attach:
  676. list_del ( &mgid->list );
  677. free ( mgid );
  678. err_alloc_mgid:
  679. return rc;
  680. }
  681. /**
  682. * Detach from multicast group
  683. *
  684. * @v ibdev Infiniband device
  685. * @v qp Queue pair
  686. * @v gid Multicast GID
  687. */
  688. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  689. union ib_gid *gid ) {
  690. struct ib_multicast_gid *mgid;
  691. /* Sanity check */
  692. assert ( qp != NULL );
  693. /* Remove from hardware multicast GID list */
  694. ibdev->op->mcast_detach ( ibdev, qp, gid );
  695. /* Remove from software multicast GID list */
  696. list_for_each_entry ( mgid, &qp->mgids, list ) {
  697. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  698. list_del ( &mgid->list );
  699. free ( mgid );
  700. break;
  701. }
  702. }
  703. }
  704. /***************************************************************************
  705. *
  706. * Miscellaneous
  707. *
  708. ***************************************************************************
  709. */
  710. /**
  711. * Count Infiniband HCA ports
  712. *
  713. * @v ibdev Infiniband device
  714. * @ret num_ports Number of ports
  715. */
  716. int ib_count_ports ( struct ib_device *ibdev ) {
  717. struct ib_device *tmp;
  718. int num_ports = 0;
  719. /* Search for IB devices with the same physical device to
  720. * identify port count.
  721. */
  722. for_each_ibdev ( tmp ) {
  723. if ( tmp->dev == ibdev->dev )
  724. num_ports++;
  725. }
  726. return num_ports;
  727. }
  728. /**
  729. * Set port information
  730. *
  731. * @v ibdev Infiniband device
  732. * @v mad Set port information MAD
  733. */
  734. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  735. int rc;
  736. /* Adapters with embedded SMAs do not need to support this method */
  737. if ( ! ibdev->op->set_port_info ) {
  738. DBGC ( ibdev, "IBDEV %s does not support setting port "
  739. "information\n", ibdev->name );
  740. return -ENOTSUP;
  741. }
  742. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  743. DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
  744. ibdev->name, strerror ( rc ) );
  745. return rc;
  746. }
  747. return 0;
  748. };
  749. /**
  750. * Set partition key table
  751. *
  752. * @v ibdev Infiniband device
  753. * @v mad Set partition key table MAD
  754. */
  755. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  756. int rc;
  757. /* Adapters with embedded SMAs do not need to support this method */
  758. if ( ! ibdev->op->set_pkey_table ) {
  759. DBGC ( ibdev, "IBDEV %s does not support setting partition "
  760. "key table\n", ibdev->name );
  761. return -ENOTSUP;
  762. }
  763. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  764. DBGC ( ibdev, "IBDEV %s could not set partition key table: "
  765. "%s\n", ibdev->name, strerror ( rc ) );
  766. return rc;
  767. }
  768. return 0;
  769. };
  770. /***************************************************************************
  771. *
  772. * Event queues
  773. *
  774. ***************************************************************************
  775. */
  776. /**
  777. * Poll event queue
  778. *
  779. * @v ibdev Infiniband device
  780. */
  781. void ib_poll_eq ( struct ib_device *ibdev ) {
  782. struct ib_completion_queue *cq;
  783. /* Poll device's event queue */
  784. ibdev->op->poll_eq ( ibdev );
  785. /* Poll all completion queues */
  786. list_for_each_entry ( cq, &ibdev->cqs, list )
  787. ib_poll_cq ( ibdev, cq );
  788. }
  789. /**
  790. * Single-step the Infiniband event queue
  791. *
  792. * @v process Infiniband event queue process
  793. */
  794. static void ib_step ( struct process *process __unused ) {
  795. struct ib_device *ibdev;
  796. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  797. ib_poll_eq ( ibdev );
  798. }
  799. /** Infiniband event queue process */
  800. PERMANENT_PROCESS ( ib_process, ib_step );
  801. /***************************************************************************
  802. *
  803. * Infiniband device creation/destruction
  804. *
  805. ***************************************************************************
  806. */
  807. /**
  808. * Allocate Infiniband device
  809. *
  810. * @v priv_size Size of driver private data area
  811. * @ret ibdev Infiniband device, or NULL
  812. */
  813. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  814. struct ib_device *ibdev;
  815. void *drv_priv;
  816. size_t total_len;
  817. total_len = ( sizeof ( *ibdev ) + priv_size );
  818. ibdev = zalloc ( total_len );
  819. if ( ibdev ) {
  820. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  821. ib_set_drvdata ( ibdev, drv_priv );
  822. INIT_LIST_HEAD ( &ibdev->list );
  823. INIT_LIST_HEAD ( &ibdev->open_list );
  824. INIT_LIST_HEAD ( &ibdev->cqs );
  825. INIT_LIST_HEAD ( &ibdev->qps );
  826. ibdev->port_state = IB_PORT_STATE_DOWN;
  827. ibdev->lid = IB_LID_NONE;
  828. ibdev->pkey = IB_PKEY_DEFAULT;
  829. }
  830. return ibdev;
  831. }
  832. /**
  833. * Register Infiniband device
  834. *
  835. * @v ibdev Infiniband device
  836. * @ret rc Return status code
  837. */
  838. int register_ibdev ( struct ib_device *ibdev ) {
  839. struct ib_driver *driver;
  840. int rc;
  841. /* Record device index and create device name */
  842. if ( ibdev->name[0] == '\0' ) {
  843. snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
  844. ibdev_index );
  845. }
  846. ibdev->index = ++ibdev_index;
  847. /* Add to device list */
  848. ibdev_get ( ibdev );
  849. list_add_tail ( &ibdev->list, &ib_devices );
  850. DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
  851. ibdev->dev->name );
  852. /* Probe device */
  853. for_each_table_entry ( driver, IB_DRIVERS ) {
  854. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  855. DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
  856. ibdev->name, driver->name, strerror ( rc ) );
  857. goto err_probe;
  858. }
  859. }
  860. return 0;
  861. err_probe:
  862. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  863. driver->remove ( ibdev );
  864. list_del ( &ibdev->list );
  865. ibdev_put ( ibdev );
  866. return rc;
  867. }
  868. /**
  869. * Unregister Infiniband device
  870. *
  871. * @v ibdev Infiniband device
  872. */
  873. void unregister_ibdev ( struct ib_device *ibdev ) {
  874. struct ib_driver *driver;
  875. /* Remove device */
  876. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  877. driver->remove ( ibdev );
  878. /* Remove from device list */
  879. list_del ( &ibdev->list );
  880. ibdev_put ( ibdev );
  881. DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
  882. /* Reset device index if no devices remain */
  883. if ( list_empty ( &ib_devices ) )
  884. ibdev_index = 0;
  885. }
  886. /**
  887. * Find Infiniband device by GID
  888. *
  889. * @v gid GID
  890. * @ret ibdev Infiniband device, or NULL
  891. */
  892. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  893. struct ib_device *ibdev;
  894. for_each_ibdev ( ibdev ) {
  895. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  896. return ibdev;
  897. }
  898. return NULL;
  899. }
  900. /**
  901. * Get most recently opened Infiniband device
  902. *
  903. * @ret ibdev Most recently opened Infiniband device, or NULL
  904. */
  905. struct ib_device * last_opened_ibdev ( void ) {
  906. struct ib_device *ibdev;
  907. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  908. open_list );
  909. if ( ! ibdev )
  910. return NULL;
  911. assert ( ibdev->open_count != 0 );
  912. return ibdev;
  913. }
  914. /* Drag in objects via register_ibdev() */
  915. REQUIRING_SYMBOL ( register_ibdev );
  916. /* Drag in Infiniband configuration */
  917. REQUIRE_OBJECT ( config_infiniband );