You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 26KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <stdio.h>
  27. #include <string.h>
  28. #include <unistd.h>
  29. #include <byteswap.h>
  30. #include <errno.h>
  31. #include <assert.h>
  32. #include <ipxe/list.h>
  33. #include <ipxe/errortab.h>
  34. #include <ipxe/if_arp.h>
  35. #include <ipxe/netdevice.h>
  36. #include <ipxe/iobuf.h>
  37. #include <ipxe/process.h>
  38. #include <ipxe/profile.h>
  39. #include <ipxe/infiniband.h>
  40. #include <ipxe/ib_mi.h>
  41. #include <ipxe/ib_sma.h>
  42. /** @file
  43. *
  44. * Infiniband protocol
  45. *
  46. */
  47. /** List of Infiniband devices */
  48. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  49. /** List of open Infiniband devices, in reverse order of opening */
  50. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  51. /** Infiniband device index */
  52. static unsigned int ibdev_index = 0;
  53. /** Post send work queue entry profiler */
  54. static struct profiler ib_post_send_profiler __profiler =
  55. { .name = "ib.post_send" };
  56. /** Post receive work queue entry profiler */
  57. static struct profiler ib_post_recv_profiler __profiler =
  58. { .name = "ib.post_recv" };
  59. /* Disambiguate the various possible EINPROGRESSes */
  60. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  61. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  62. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  63. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  64. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  65. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  66. /** Human-readable message for the link statuses */
  67. struct errortab infiniband_errors[] __errortab = {
  68. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  69. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  70. };
  71. /***************************************************************************
  72. *
  73. * Completion queues
  74. *
  75. ***************************************************************************
  76. */
  77. /**
  78. * Create completion queue
  79. *
  80. * @v ibdev Infiniband device
  81. * @v num_cqes Number of completion queue entries
  82. * @v op Completion queue operations
  83. * @ret cq New completion queue
  84. */
  85. struct ib_completion_queue *
  86. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  87. struct ib_completion_queue_operations *op ) {
  88. struct ib_completion_queue *cq;
  89. int rc;
  90. DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
  91. /* Allocate and initialise data structure */
  92. cq = zalloc ( sizeof ( *cq ) );
  93. if ( ! cq )
  94. goto err_alloc_cq;
  95. cq->ibdev = ibdev;
  96. list_add_tail ( &cq->list, &ibdev->cqs );
  97. cq->num_cqes = num_cqes;
  98. INIT_LIST_HEAD ( &cq->work_queues );
  99. cq->op = op;
  100. /* Perform device-specific initialisation and get CQN */
  101. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  102. DBGC ( ibdev, "IBDEV %s could not initialise completion "
  103. "queue: %s\n", ibdev->name, strerror ( rc ) );
  104. goto err_dev_create_cq;
  105. }
  106. DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
  107. "with CQN %#lx\n", ibdev->name, num_cqes, cq,
  108. ib_cq_get_drvdata ( cq ), cq->cqn );
  109. return cq;
  110. ibdev->op->destroy_cq ( ibdev, cq );
  111. err_dev_create_cq:
  112. list_del ( &cq->list );
  113. free ( cq );
  114. err_alloc_cq:
  115. return NULL;
  116. }
  117. /**
  118. * Destroy completion queue
  119. *
  120. * @v ibdev Infiniband device
  121. * @v cq Completion queue
  122. */
  123. void ib_destroy_cq ( struct ib_device *ibdev,
  124. struct ib_completion_queue *cq ) {
  125. DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
  126. ibdev->name, cq->cqn );
  127. assert ( list_empty ( &cq->work_queues ) );
  128. ibdev->op->destroy_cq ( ibdev, cq );
  129. list_del ( &cq->list );
  130. free ( cq );
  131. }
  132. /**
  133. * Poll completion queue
  134. *
  135. * @v ibdev Infiniband device
  136. * @v cq Completion queue
  137. */
  138. void ib_poll_cq ( struct ib_device *ibdev,
  139. struct ib_completion_queue *cq ) {
  140. struct ib_work_queue *wq;
  141. /* Poll completion queue */
  142. ibdev->op->poll_cq ( ibdev, cq );
  143. /* Refill receive work queues */
  144. list_for_each_entry ( wq, &cq->work_queues, list ) {
  145. if ( ! wq->is_send )
  146. ib_refill_recv ( ibdev, wq->qp );
  147. }
  148. }
  149. /***************************************************************************
  150. *
  151. * Work queues
  152. *
  153. ***************************************************************************
  154. */
  155. /**
  156. * Create queue pair
  157. *
  158. * @v ibdev Infiniband device
  159. * @v type Queue pair type
  160. * @v num_send_wqes Number of send work queue entries
  161. * @v send_cq Send completion queue
  162. * @v num_recv_wqes Number of receive work queue entries
  163. * @v recv_cq Receive completion queue
  164. * @v op Queue pair operations
  165. * @v name Queue pair name
  166. * @ret qp Queue pair
  167. *
  168. * The queue pair will be left in the INIT state; you must call
  169. * ib_modify_qp() before it is ready to use for sending and receiving.
  170. */
  171. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  172. enum ib_queue_pair_type type,
  173. unsigned int num_send_wqes,
  174. struct ib_completion_queue *send_cq,
  175. unsigned int num_recv_wqes,
  176. struct ib_completion_queue *recv_cq,
  177. struct ib_queue_pair_operations *op,
  178. const char *name ) {
  179. struct ib_queue_pair *qp;
  180. size_t total_size;
  181. int rc;
  182. DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
  183. /* Allocate and initialise data structure */
  184. total_size = ( sizeof ( *qp ) +
  185. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  186. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  187. qp = zalloc ( total_size );
  188. if ( ! qp )
  189. goto err_alloc_qp;
  190. qp->ibdev = ibdev;
  191. list_add_tail ( &qp->list, &ibdev->qps );
  192. qp->type = type;
  193. qp->send.qp = qp;
  194. qp->send.is_send = 1;
  195. qp->send.cq = send_cq;
  196. list_add_tail ( &qp->send.list, &send_cq->work_queues );
  197. qp->send.psn = ( random() & 0xffffffUL );
  198. qp->send.num_wqes = num_send_wqes;
  199. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  200. qp->recv.qp = qp;
  201. qp->recv.cq = recv_cq;
  202. list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
  203. qp->recv.psn = ( random() & 0xffffffUL );
  204. qp->recv.num_wqes = num_recv_wqes;
  205. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  206. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  207. INIT_LIST_HEAD ( &qp->mgids );
  208. qp->op = op;
  209. qp->name = name;
  210. /* Perform device-specific initialisation and get QPN */
  211. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  212. DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
  213. "%s\n", ibdev->name, strerror ( rc ) );
  214. goto err_dev_create_qp;
  215. }
  216. DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
  217. ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  218. DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
  219. ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
  220. qp->recv.iobufs );
  221. DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
  222. ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  223. ( ( ( void * ) qp ) + total_size ) );
  224. /* Calculate externally-visible QPN */
  225. switch ( type ) {
  226. case IB_QPT_SMI:
  227. qp->ext_qpn = IB_QPN_SMI;
  228. break;
  229. case IB_QPT_GSI:
  230. qp->ext_qpn = IB_QPN_GSI;
  231. break;
  232. default:
  233. qp->ext_qpn = qp->qpn;
  234. break;
  235. }
  236. if ( qp->ext_qpn != qp->qpn ) {
  237. DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
  238. ibdev->name, qp->qpn, qp->ext_qpn );
  239. }
  240. return qp;
  241. ibdev->op->destroy_qp ( ibdev, qp );
  242. err_dev_create_qp:
  243. list_del ( &qp->send.list );
  244. list_del ( &qp->recv.list );
  245. list_del ( &qp->list );
  246. free ( qp );
  247. err_alloc_qp:
  248. return NULL;
  249. }
  250. /**
  251. * Modify queue pair
  252. *
  253. * @v ibdev Infiniband device
  254. * @v qp Queue pair
  255. * @ret rc Return status code
  256. */
  257. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  258. int rc;
  259. DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
  260. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  261. DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
  262. ibdev->name, qp->qpn, strerror ( rc ) );
  263. return rc;
  264. }
  265. return 0;
  266. }
  267. /**
  268. * Destroy queue pair
  269. *
  270. * @v ibdev Infiniband device
  271. * @v qp Queue pair
  272. */
  273. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  274. struct io_buffer *iobuf;
  275. unsigned int i;
  276. DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
  277. ibdev->name, qp->qpn );
  278. assert ( list_empty ( &qp->mgids ) );
  279. /* Perform device-specific destruction */
  280. ibdev->op->destroy_qp ( ibdev, qp );
  281. /* Complete any remaining I/O buffers with errors */
  282. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  283. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  284. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  285. }
  286. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  287. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  288. ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
  289. -ECANCELED );
  290. }
  291. }
  292. /* Remove work queues from completion queue */
  293. list_del ( &qp->send.list );
  294. list_del ( &qp->recv.list );
  295. /* Free QP */
  296. list_del ( &qp->list );
  297. free ( qp );
  298. }
  299. /**
  300. * Find queue pair by QPN
  301. *
  302. * @v ibdev Infiniband device
  303. * @v qpn Queue pair number
  304. * @ret qp Queue pair, or NULL
  305. */
  306. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  307. unsigned long qpn ) {
  308. struct ib_queue_pair *qp;
  309. list_for_each_entry ( qp, &ibdev->qps, list ) {
  310. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  311. return qp;
  312. }
  313. return NULL;
  314. }
  315. /**
  316. * Find queue pair by multicast GID
  317. *
  318. * @v ibdev Infiniband device
  319. * @v gid Multicast GID
  320. * @ret qp Queue pair, or NULL
  321. */
  322. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  323. union ib_gid *gid ) {
  324. struct ib_queue_pair *qp;
  325. struct ib_multicast_gid *mgid;
  326. list_for_each_entry ( qp, &ibdev->qps, list ) {
  327. list_for_each_entry ( mgid, &qp->mgids, list ) {
  328. if ( memcmp ( &mgid->gid, gid,
  329. sizeof ( mgid->gid ) ) == 0 ) {
  330. return qp;
  331. }
  332. }
  333. }
  334. return NULL;
  335. }
  336. /**
  337. * Find work queue belonging to completion queue
  338. *
  339. * @v cq Completion queue
  340. * @v qpn Queue pair number
  341. * @v is_send Find send work queue (rather than receive)
  342. * @ret wq Work queue, or NULL if not found
  343. */
  344. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  345. unsigned long qpn, int is_send ) {
  346. struct ib_work_queue *wq;
  347. list_for_each_entry ( wq, &cq->work_queues, list ) {
  348. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  349. return wq;
  350. }
  351. return NULL;
  352. }
  353. /**
  354. * Post send work queue entry
  355. *
  356. * @v ibdev Infiniband device
  357. * @v qp Queue pair
  358. * @v dest Destination address vector
  359. * @v iobuf I/O buffer
  360. * @ret rc Return status code
  361. */
  362. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  363. struct ib_address_vector *dest,
  364. struct io_buffer *iobuf ) {
  365. struct ib_address_vector dest_copy;
  366. int rc;
  367. /* Start profiling */
  368. profile_start ( &ib_post_send_profiler );
  369. /* Check queue fill level */
  370. if ( qp->send.fill >= qp->send.num_wqes ) {
  371. DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
  372. ibdev->name, qp->qpn );
  373. return -ENOBUFS;
  374. }
  375. /* Use default address vector if none specified */
  376. if ( ! dest )
  377. dest = &qp->av;
  378. /* Make modifiable copy of address vector */
  379. memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
  380. dest = &dest_copy;
  381. /* Fill in optional parameters in address vector */
  382. if ( ! dest->qkey )
  383. dest->qkey = qp->qkey;
  384. if ( ! dest->rate )
  385. dest->rate = IB_RATE_2_5;
  386. /* Post to hardware */
  387. if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
  388. DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
  389. "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
  390. return rc;
  391. }
  392. /* Increase fill level */
  393. qp->send.fill++;
  394. /* Stop profiling */
  395. profile_stop ( &ib_post_send_profiler );
  396. return 0;
  397. }
  398. /**
  399. * Post receive work queue entry
  400. *
  401. * @v ibdev Infiniband device
  402. * @v qp Queue pair
  403. * @v iobuf I/O buffer
  404. * @ret rc Return status code
  405. */
  406. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  407. struct io_buffer *iobuf ) {
  408. int rc;
  409. /* Start profiling */
  410. profile_start ( &ib_post_recv_profiler );
  411. /* Check packet length */
  412. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  413. DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
  414. ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
  415. return -EINVAL;
  416. }
  417. /* Check queue fill level */
  418. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  419. DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
  420. ibdev->name, qp->qpn );
  421. return -ENOBUFS;
  422. }
  423. /* Post to hardware */
  424. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  425. DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
  426. "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
  427. return rc;
  428. }
  429. /* Increase fill level */
  430. qp->recv.fill++;
  431. /* Stop profiling */
  432. profile_stop ( &ib_post_recv_profiler );
  433. return 0;
  434. }
  435. /**
  436. * Complete send work queue entry
  437. *
  438. * @v ibdev Infiniband device
  439. * @v qp Queue pair
  440. * @v iobuf I/O buffer
  441. * @v rc Completion status code
  442. */
  443. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  444. struct io_buffer *iobuf, int rc ) {
  445. if ( qp->send.cq->op->complete_send ) {
  446. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  447. } else {
  448. free_iob ( iobuf );
  449. }
  450. qp->send.fill--;
  451. }
  452. /**
  453. * Complete receive work queue entry
  454. *
  455. * @v ibdev Infiniband device
  456. * @v qp Queue pair
  457. * @v dest Destination address vector, or NULL
  458. * @v source Source address vector, or NULL
  459. * @v iobuf I/O buffer
  460. * @v rc Completion status code
  461. */
  462. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  463. struct ib_address_vector *dest,
  464. struct ib_address_vector *source,
  465. struct io_buffer *iobuf, int rc ) {
  466. if ( qp->recv.cq->op->complete_recv ) {
  467. qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
  468. iobuf, rc );
  469. } else {
  470. free_iob ( iobuf );
  471. }
  472. qp->recv.fill--;
  473. }
  474. /**
  475. * Refill receive work queue
  476. *
  477. * @v ibdev Infiniband device
  478. * @v qp Queue pair
  479. */
  480. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  481. struct io_buffer *iobuf;
  482. int rc;
  483. /* Keep filling while unfilled entries remain */
  484. while ( qp->recv.fill < qp->recv.num_wqes ) {
  485. /* Allocate I/O buffer */
  486. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  487. if ( ! iobuf ) {
  488. /* Non-fatal; we will refill on next attempt */
  489. return;
  490. }
  491. /* Post I/O buffer */
  492. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  493. DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
  494. ibdev->name, strerror ( rc ) );
  495. free_iob ( iobuf );
  496. /* Give up */
  497. return;
  498. }
  499. }
  500. }
  501. /***************************************************************************
  502. *
  503. * Link control
  504. *
  505. ***************************************************************************
  506. */
  507. /**
  508. * Get link state
  509. *
  510. * @v ibdev Infiniband device
  511. * @ret rc Link status code
  512. */
  513. int ib_link_rc ( struct ib_device *ibdev ) {
  514. switch ( ibdev->port_state ) {
  515. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  516. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  517. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  518. case IB_PORT_STATE_ACTIVE: return 0;
  519. default: return -EINVAL;
  520. }
  521. }
  522. /**
  523. * Textual representation of Infiniband link state
  524. *
  525. * @v ibdev Infiniband device
  526. * @ret link_text Link state text
  527. */
  528. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  529. switch ( ibdev->port_state ) {
  530. case IB_PORT_STATE_DOWN: return "DOWN";
  531. case IB_PORT_STATE_INIT: return "INIT";
  532. case IB_PORT_STATE_ARMED: return "ARMED";
  533. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  534. default: return "UNKNOWN";
  535. }
  536. }
  537. /**
  538. * Notify drivers of Infiniband device or link state change
  539. *
  540. * @v ibdev Infiniband device
  541. */
  542. static void ib_notify ( struct ib_device *ibdev ) {
  543. struct ib_driver *driver;
  544. for_each_table_entry ( driver, IB_DRIVERS )
  545. driver->notify ( ibdev );
  546. }
  547. /**
  548. * Notify of Infiniband link state change
  549. *
  550. * @v ibdev Infiniband device
  551. */
  552. void ib_link_state_changed ( struct ib_device *ibdev ) {
  553. DBGC ( ibdev, "IBDEV %s link state is %s\n",
  554. ibdev->name, ib_link_state_text ( ibdev ) );
  555. /* Notify drivers of link state change */
  556. ib_notify ( ibdev );
  557. }
  558. /**
  559. * Open port
  560. *
  561. * @v ibdev Infiniband device
  562. * @ret rc Return status code
  563. */
  564. int ib_open ( struct ib_device *ibdev ) {
  565. int rc;
  566. /* Increment device open request counter */
  567. if ( ibdev->open_count++ > 0 ) {
  568. /* Device was already open; do nothing */
  569. return 0;
  570. }
  571. /* Open device */
  572. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  573. DBGC ( ibdev, "IBDEV %s could not open: %s\n",
  574. ibdev->name, strerror ( rc ) );
  575. goto err_open;
  576. }
  577. /* Create subnet management interface */
  578. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  579. if ( ! ibdev->smi ) {
  580. DBGC ( ibdev, "IBDEV %s could not create SMI\n", ibdev->name );
  581. rc = -ENOMEM;
  582. goto err_create_smi;
  583. }
  584. /* Create subnet management agent */
  585. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  586. DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
  587. ibdev->name, strerror ( rc ) );
  588. goto err_create_sma;
  589. }
  590. /* Create general services interface */
  591. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  592. if ( ! ibdev->gsi ) {
  593. DBGC ( ibdev, "IBDEV %s could not create GSI\n", ibdev->name );
  594. rc = -ENOMEM;
  595. goto err_create_gsi;
  596. }
  597. /* Add to head of open devices list */
  598. list_add ( &ibdev->open_list, &open_ib_devices );
  599. /* Notify drivers of device state change */
  600. ib_notify ( ibdev );
  601. assert ( ibdev->open_count == 1 );
  602. return 0;
  603. ib_destroy_mi ( ibdev, ibdev->gsi );
  604. err_create_gsi:
  605. ib_destroy_sma ( ibdev, ibdev->smi );
  606. err_create_sma:
  607. ib_destroy_mi ( ibdev, ibdev->smi );
  608. err_create_smi:
  609. ibdev->op->close ( ibdev );
  610. err_open:
  611. assert ( ibdev->open_count == 1 );
  612. ibdev->open_count = 0;
  613. return rc;
  614. }
  615. /**
  616. * Close port
  617. *
  618. * @v ibdev Infiniband device
  619. */
  620. void ib_close ( struct ib_device *ibdev ) {
  621. /* Decrement device open request counter */
  622. ibdev->open_count--;
  623. /* Close device if this was the last remaining requested opening */
  624. if ( ibdev->open_count == 0 ) {
  625. ib_notify ( ibdev );
  626. list_del ( &ibdev->open_list );
  627. ib_destroy_mi ( ibdev, ibdev->gsi );
  628. ib_destroy_sma ( ibdev, ibdev->smi );
  629. ib_destroy_mi ( ibdev, ibdev->smi );
  630. ibdev->op->close ( ibdev );
  631. ibdev->port_state = IB_PORT_STATE_DOWN;
  632. }
  633. }
  634. /***************************************************************************
  635. *
  636. * Multicast
  637. *
  638. ***************************************************************************
  639. */
  640. /**
  641. * Attach to multicast group
  642. *
  643. * @v ibdev Infiniband device
  644. * @v qp Queue pair
  645. * @v gid Multicast GID
  646. * @ret rc Return status code
  647. *
  648. * Note that this function handles only the local device's attachment
  649. * to the multicast GID; it does not issue the relevant MADs to join
  650. * the multicast group on the subnet.
  651. */
  652. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  653. union ib_gid *gid ) {
  654. struct ib_multicast_gid *mgid;
  655. int rc;
  656. /* Sanity check */
  657. assert ( qp != NULL );
  658. /* Add to software multicast GID list */
  659. mgid = zalloc ( sizeof ( *mgid ) );
  660. if ( ! mgid ) {
  661. rc = -ENOMEM;
  662. goto err_alloc_mgid;
  663. }
  664. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  665. list_add_tail ( &mgid->list, &qp->mgids );
  666. /* Add to hardware multicast GID list */
  667. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  668. goto err_dev_mcast_attach;
  669. return 0;
  670. err_dev_mcast_attach:
  671. list_del ( &mgid->list );
  672. free ( mgid );
  673. err_alloc_mgid:
  674. return rc;
  675. }
  676. /**
  677. * Detach from multicast group
  678. *
  679. * @v ibdev Infiniband device
  680. * @v qp Queue pair
  681. * @v gid Multicast GID
  682. */
  683. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  684. union ib_gid *gid ) {
  685. struct ib_multicast_gid *mgid;
  686. /* Sanity check */
  687. assert ( qp != NULL );
  688. /* Remove from hardware multicast GID list */
  689. ibdev->op->mcast_detach ( ibdev, qp, gid );
  690. /* Remove from software multicast GID list */
  691. list_for_each_entry ( mgid, &qp->mgids, list ) {
  692. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  693. list_del ( &mgid->list );
  694. free ( mgid );
  695. break;
  696. }
  697. }
  698. }
  699. /***************************************************************************
  700. *
  701. * Miscellaneous
  702. *
  703. ***************************************************************************
  704. */
  705. /**
  706. * Count Infiniband HCA ports
  707. *
  708. * @v ibdev Infiniband device
  709. * @ret num_ports Number of ports
  710. */
  711. int ib_count_ports ( struct ib_device *ibdev ) {
  712. struct ib_device *tmp;
  713. int num_ports = 0;
  714. /* Search for IB devices with the same physical device to
  715. * identify port count.
  716. */
  717. for_each_ibdev ( tmp ) {
  718. if ( tmp->dev == ibdev->dev )
  719. num_ports++;
  720. }
  721. return num_ports;
  722. }
  723. /**
  724. * Set port information
  725. *
  726. * @v ibdev Infiniband device
  727. * @v mad Set port information MAD
  728. */
  729. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  730. int rc;
  731. /* Adapters with embedded SMAs do not need to support this method */
  732. if ( ! ibdev->op->set_port_info ) {
  733. DBGC ( ibdev, "IBDEV %s does not support setting port "
  734. "information\n", ibdev->name );
  735. return -ENOTSUP;
  736. }
  737. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  738. DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
  739. ibdev->name, strerror ( rc ) );
  740. return rc;
  741. }
  742. return 0;
  743. };
  744. /**
  745. * Set partition key table
  746. *
  747. * @v ibdev Infiniband device
  748. * @v mad Set partition key table MAD
  749. */
  750. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  751. int rc;
  752. /* Adapters with embedded SMAs do not need to support this method */
  753. if ( ! ibdev->op->set_pkey_table ) {
  754. DBGC ( ibdev, "IBDEV %s does not support setting partition "
  755. "key table\n", ibdev->name );
  756. return -ENOTSUP;
  757. }
  758. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  759. DBGC ( ibdev, "IBDEV %s could not set partition key table: "
  760. "%s\n", ibdev->name, strerror ( rc ) );
  761. return rc;
  762. }
  763. return 0;
  764. };
  765. /***************************************************************************
  766. *
  767. * Event queues
  768. *
  769. ***************************************************************************
  770. */
  771. /**
  772. * Poll event queue
  773. *
  774. * @v ibdev Infiniband device
  775. */
  776. void ib_poll_eq ( struct ib_device *ibdev ) {
  777. struct ib_completion_queue *cq;
  778. /* Poll device's event queue */
  779. ibdev->op->poll_eq ( ibdev );
  780. /* Poll all completion queues */
  781. list_for_each_entry ( cq, &ibdev->cqs, list )
  782. ib_poll_cq ( ibdev, cq );
  783. }
  784. /**
  785. * Single-step the Infiniband event queue
  786. *
  787. * @v process Infiniband event queue process
  788. */
  789. static void ib_step ( struct process *process __unused ) {
  790. struct ib_device *ibdev;
  791. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  792. ib_poll_eq ( ibdev );
  793. }
  794. /** Infiniband event queue process */
  795. PERMANENT_PROCESS ( ib_process, ib_step );
  796. /***************************************************************************
  797. *
  798. * Infiniband device creation/destruction
  799. *
  800. ***************************************************************************
  801. */
  802. /**
  803. * Allocate Infiniband device
  804. *
  805. * @v priv_size Size of driver private data area
  806. * @ret ibdev Infiniband device, or NULL
  807. */
  808. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  809. struct ib_device *ibdev;
  810. void *drv_priv;
  811. size_t total_len;
  812. total_len = ( sizeof ( *ibdev ) + priv_size );
  813. ibdev = zalloc ( total_len );
  814. if ( ibdev ) {
  815. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  816. ib_set_drvdata ( ibdev, drv_priv );
  817. INIT_LIST_HEAD ( &ibdev->list );
  818. INIT_LIST_HEAD ( &ibdev->open_list );
  819. INIT_LIST_HEAD ( &ibdev->cqs );
  820. INIT_LIST_HEAD ( &ibdev->qps );
  821. ibdev->port_state = IB_PORT_STATE_DOWN;
  822. ibdev->lid = IB_LID_NONE;
  823. ibdev->pkey = IB_PKEY_DEFAULT;
  824. }
  825. return ibdev;
  826. }
  827. /**
  828. * Register Infiniband device
  829. *
  830. * @v ibdev Infiniband device
  831. * @ret rc Return status code
  832. */
  833. int register_ibdev ( struct ib_device *ibdev ) {
  834. struct ib_driver *driver;
  835. int rc;
  836. /* Record device index and create device name */
  837. if ( ibdev->name[0] == '\0' ) {
  838. snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
  839. ibdev_index );
  840. }
  841. ibdev->index = ++ibdev_index;
  842. /* Add to device list */
  843. ibdev_get ( ibdev );
  844. list_add_tail ( &ibdev->list, &ib_devices );
  845. DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
  846. ibdev->dev->name );
  847. /* Probe device */
  848. for_each_table_entry ( driver, IB_DRIVERS ) {
  849. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  850. DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
  851. ibdev->name, driver->name, strerror ( rc ) );
  852. goto err_probe;
  853. }
  854. }
  855. return 0;
  856. err_probe:
  857. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  858. driver->remove ( ibdev );
  859. list_del ( &ibdev->list );
  860. ibdev_put ( ibdev );
  861. return rc;
  862. }
  863. /**
  864. * Unregister Infiniband device
  865. *
  866. * @v ibdev Infiniband device
  867. */
  868. void unregister_ibdev ( struct ib_device *ibdev ) {
  869. struct ib_driver *driver;
  870. /* Remove device */
  871. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  872. driver->remove ( ibdev );
  873. /* Remove from device list */
  874. list_del ( &ibdev->list );
  875. ibdev_put ( ibdev );
  876. DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
  877. /* Reset device index if no devices remain */
  878. if ( list_empty ( &ib_devices ) )
  879. ibdev_index = 0;
  880. }
  881. /**
  882. * Find Infiniband device by GID
  883. *
  884. * @v gid GID
  885. * @ret ibdev Infiniband device, or NULL
  886. */
  887. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  888. struct ib_device *ibdev;
  889. for_each_ibdev ( ibdev ) {
  890. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  891. return ibdev;
  892. }
  893. return NULL;
  894. }
  895. /**
  896. * Get most recently opened Infiniband device
  897. *
  898. * @ret ibdev Most recently opened Infiniband device, or NULL
  899. */
  900. struct ib_device * last_opened_ibdev ( void ) {
  901. struct ib_device *ibdev;
  902. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  903. open_list );
  904. if ( ! ibdev )
  905. return NULL;
  906. assert ( ibdev->open_count != 0 );
  907. return ibdev;
  908. }
  909. /* Drag in objects via register_ibdev() */
  910. REQUIRING_SYMBOL ( register_ibdev );
  911. /* Drag in Infiniband configuration */
  912. REQUIRE_OBJECT ( config_infiniband );