You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

infiniband.c 25KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <stdio.h>
  27. #include <string.h>
  28. #include <unistd.h>
  29. #include <byteswap.h>
  30. #include <errno.h>
  31. #include <assert.h>
  32. #include <ipxe/list.h>
  33. #include <ipxe/errortab.h>
  34. #include <ipxe/if_arp.h>
  35. #include <ipxe/netdevice.h>
  36. #include <ipxe/iobuf.h>
  37. #include <ipxe/process.h>
  38. #include <ipxe/infiniband.h>
  39. #include <ipxe/ib_mi.h>
  40. #include <ipxe/ib_sma.h>
  41. /** @file
  42. *
  43. * Infiniband protocol
  44. *
  45. */
  46. /** List of Infiniband devices */
  47. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  48. /** List of open Infiniband devices, in reverse order of opening */
  49. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  50. /* Disambiguate the various possible EINPROGRESSes */
  51. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  52. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  53. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  54. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  55. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  56. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  57. /** Human-readable message for the link statuses */
  58. struct errortab infiniband_errors[] __errortab = {
  59. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  60. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  61. };
  62. /***************************************************************************
  63. *
  64. * Completion queues
  65. *
  66. ***************************************************************************
  67. */
  68. /**
  69. * Create completion queue
  70. *
  71. * @v ibdev Infiniband device
  72. * @v num_cqes Number of completion queue entries
  73. * @v op Completion queue operations
  74. * @ret cq New completion queue
  75. */
  76. struct ib_completion_queue *
  77. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  78. struct ib_completion_queue_operations *op ) {
  79. struct ib_completion_queue *cq;
  80. int rc;
  81. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  82. /* Allocate and initialise data structure */
  83. cq = zalloc ( sizeof ( *cq ) );
  84. if ( ! cq )
  85. goto err_alloc_cq;
  86. cq->ibdev = ibdev;
  87. list_add ( &cq->list, &ibdev->cqs );
  88. cq->num_cqes = num_cqes;
  89. INIT_LIST_HEAD ( &cq->work_queues );
  90. cq->op = op;
  91. /* Perform device-specific initialisation and get CQN */
  92. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  93. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  94. "queue: %s\n", ibdev, strerror ( rc ) );
  95. goto err_dev_create_cq;
  96. }
  97. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  98. "with CQN %#lx\n", ibdev, num_cqes, cq,
  99. ib_cq_get_drvdata ( cq ), cq->cqn );
  100. return cq;
  101. ibdev->op->destroy_cq ( ibdev, cq );
  102. err_dev_create_cq:
  103. list_del ( &cq->list );
  104. free ( cq );
  105. err_alloc_cq:
  106. return NULL;
  107. }
  108. /**
  109. * Destroy completion queue
  110. *
  111. * @v ibdev Infiniband device
  112. * @v cq Completion queue
  113. */
  114. void ib_destroy_cq ( struct ib_device *ibdev,
  115. struct ib_completion_queue *cq ) {
  116. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  117. ibdev, cq->cqn );
  118. assert ( list_empty ( &cq->work_queues ) );
  119. ibdev->op->destroy_cq ( ibdev, cq );
  120. list_del ( &cq->list );
  121. free ( cq );
  122. }
  123. /**
  124. * Poll completion queue
  125. *
  126. * @v ibdev Infiniband device
  127. * @v cq Completion queue
  128. */
  129. void ib_poll_cq ( struct ib_device *ibdev,
  130. struct ib_completion_queue *cq ) {
  131. struct ib_work_queue *wq;
  132. /* Poll completion queue */
  133. ibdev->op->poll_cq ( ibdev, cq );
  134. /* Refill receive work queues */
  135. list_for_each_entry ( wq, &cq->work_queues, list ) {
  136. if ( ! wq->is_send )
  137. ib_refill_recv ( ibdev, wq->qp );
  138. }
  139. }
  140. /***************************************************************************
  141. *
  142. * Work queues
  143. *
  144. ***************************************************************************
  145. */
  146. /**
  147. * Create queue pair
  148. *
  149. * @v ibdev Infiniband device
  150. * @v type Queue pair type
  151. * @v num_send_wqes Number of send work queue entries
  152. * @v send_cq Send completion queue
  153. * @v num_recv_wqes Number of receive work queue entries
  154. * @v recv_cq Receive completion queue
  155. * @v op Queue pair operations
  156. * @ret qp Queue pair
  157. *
  158. * The queue pair will be left in the INIT state; you must call
  159. * ib_modify_qp() before it is ready to use for sending and receiving.
  160. */
  161. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  162. enum ib_queue_pair_type type,
  163. unsigned int num_send_wqes,
  164. struct ib_completion_queue *send_cq,
  165. unsigned int num_recv_wqes,
  166. struct ib_completion_queue *recv_cq,
  167. struct ib_queue_pair_operations *op ) {
  168. struct ib_queue_pair *qp;
  169. size_t total_size;
  170. int rc;
  171. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  172. /* Allocate and initialise data structure */
  173. total_size = ( sizeof ( *qp ) +
  174. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  175. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  176. qp = zalloc ( total_size );
  177. if ( ! qp )
  178. goto err_alloc_qp;
  179. qp->ibdev = ibdev;
  180. list_add ( &qp->list, &ibdev->qps );
  181. qp->type = type;
  182. qp->send.qp = qp;
  183. qp->send.is_send = 1;
  184. qp->send.cq = send_cq;
  185. list_add ( &qp->send.list, &send_cq->work_queues );
  186. qp->send.psn = ( random() & 0xffffffUL );
  187. qp->send.num_wqes = num_send_wqes;
  188. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  189. qp->recv.qp = qp;
  190. qp->recv.cq = recv_cq;
  191. list_add ( &qp->recv.list, &recv_cq->work_queues );
  192. qp->recv.psn = ( random() & 0xffffffUL );
  193. qp->recv.num_wqes = num_recv_wqes;
  194. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  195. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  196. INIT_LIST_HEAD ( &qp->mgids );
  197. qp->op = op;
  198. /* Perform device-specific initialisation and get QPN */
  199. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  200. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  201. "%s\n", ibdev, strerror ( rc ) );
  202. goto err_dev_create_qp;
  203. }
  204. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  205. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  206. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  207. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  208. qp->recv.iobufs );
  209. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  210. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  211. ( ( ( void * ) qp ) + total_size ) );
  212. /* Calculate externally-visible QPN */
  213. switch ( type ) {
  214. case IB_QPT_SMI:
  215. qp->ext_qpn = IB_QPN_SMI;
  216. break;
  217. case IB_QPT_GSI:
  218. qp->ext_qpn = IB_QPN_GSI;
  219. break;
  220. default:
  221. qp->ext_qpn = qp->qpn;
  222. break;
  223. }
  224. if ( qp->ext_qpn != qp->qpn ) {
  225. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  226. ibdev, qp->qpn, qp->ext_qpn );
  227. }
  228. return qp;
  229. ibdev->op->destroy_qp ( ibdev, qp );
  230. err_dev_create_qp:
  231. list_del ( &qp->send.list );
  232. list_del ( &qp->recv.list );
  233. list_del ( &qp->list );
  234. free ( qp );
  235. err_alloc_qp:
  236. return NULL;
  237. }
  238. /**
  239. * Modify queue pair
  240. *
  241. * @v ibdev Infiniband device
  242. * @v qp Queue pair
  243. * @ret rc Return status code
  244. */
  245. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  246. int rc;
  247. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  248. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  249. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  250. ibdev, qp->qpn, strerror ( rc ) );
  251. return rc;
  252. }
  253. return 0;
  254. }
  255. /**
  256. * Destroy queue pair
  257. *
  258. * @v ibdev Infiniband device
  259. * @v qp Queue pair
  260. */
  261. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  262. struct io_buffer *iobuf;
  263. unsigned int i;
  264. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  265. ibdev, qp->qpn );
  266. assert ( list_empty ( &qp->mgids ) );
  267. /* Perform device-specific destruction */
  268. ibdev->op->destroy_qp ( ibdev, qp );
  269. /* Complete any remaining I/O buffers with errors */
  270. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  271. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  272. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  273. }
  274. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  275. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  276. ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
  277. -ECANCELED );
  278. }
  279. }
  280. /* Remove work queues from completion queue */
  281. list_del ( &qp->send.list );
  282. list_del ( &qp->recv.list );
  283. /* Free QP */
  284. list_del ( &qp->list );
  285. free ( qp );
  286. }
  287. /**
  288. * Find queue pair by QPN
  289. *
  290. * @v ibdev Infiniband device
  291. * @v qpn Queue pair number
  292. * @ret qp Queue pair, or NULL
  293. */
  294. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  295. unsigned long qpn ) {
  296. struct ib_queue_pair *qp;
  297. list_for_each_entry ( qp, &ibdev->qps, list ) {
  298. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  299. return qp;
  300. }
  301. return NULL;
  302. }
  303. /**
  304. * Find queue pair by multicast GID
  305. *
  306. * @v ibdev Infiniband device
  307. * @v gid Multicast GID
  308. * @ret qp Queue pair, or NULL
  309. */
  310. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  311. union ib_gid *gid ) {
  312. struct ib_queue_pair *qp;
  313. struct ib_multicast_gid *mgid;
  314. list_for_each_entry ( qp, &ibdev->qps, list ) {
  315. list_for_each_entry ( mgid, &qp->mgids, list ) {
  316. if ( memcmp ( &mgid->gid, gid,
  317. sizeof ( mgid->gid ) ) == 0 ) {
  318. return qp;
  319. }
  320. }
  321. }
  322. return NULL;
  323. }
  324. /**
  325. * Find work queue belonging to completion queue
  326. *
  327. * @v cq Completion queue
  328. * @v qpn Queue pair number
  329. * @v is_send Find send work queue (rather than receive)
  330. * @ret wq Work queue, or NULL if not found
  331. */
  332. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  333. unsigned long qpn, int is_send ) {
  334. struct ib_work_queue *wq;
  335. list_for_each_entry ( wq, &cq->work_queues, list ) {
  336. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  337. return wq;
  338. }
  339. return NULL;
  340. }
  341. /**
  342. * Post send work queue entry
  343. *
  344. * @v ibdev Infiniband device
  345. * @v qp Queue pair
  346. * @v dest Destination address vector
  347. * @v iobuf I/O buffer
  348. * @ret rc Return status code
  349. */
  350. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  351. struct ib_address_vector *dest,
  352. struct io_buffer *iobuf ) {
  353. struct ib_address_vector dest_copy;
  354. int rc;
  355. /* Check queue fill level */
  356. if ( qp->send.fill >= qp->send.num_wqes ) {
  357. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  358. ibdev, qp->qpn );
  359. return -ENOBUFS;
  360. }
  361. /* Use default address vector if none specified */
  362. if ( ! dest )
  363. dest = &qp->av;
  364. /* Make modifiable copy of address vector */
  365. memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
  366. dest = &dest_copy;
  367. /* Fill in optional parameters in address vector */
  368. if ( ! dest->qkey )
  369. dest->qkey = qp->qkey;
  370. if ( ! dest->rate )
  371. dest->rate = IB_RATE_2_5;
  372. /* Post to hardware */
  373. if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
  374. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  375. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  376. return rc;
  377. }
  378. qp->send.fill++;
  379. return 0;
  380. }
  381. /**
  382. * Post receive work queue entry
  383. *
  384. * @v ibdev Infiniband device
  385. * @v qp Queue pair
  386. * @v iobuf I/O buffer
  387. * @ret rc Return status code
  388. */
  389. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  390. struct io_buffer *iobuf ) {
  391. int rc;
  392. /* Check packet length */
  393. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  394. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  395. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  396. return -EINVAL;
  397. }
  398. /* Check queue fill level */
  399. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  400. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  401. ibdev, qp->qpn );
  402. return -ENOBUFS;
  403. }
  404. /* Post to hardware */
  405. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  406. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  407. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  408. return rc;
  409. }
  410. qp->recv.fill++;
  411. return 0;
  412. }
  413. /**
  414. * Complete send work queue entry
  415. *
  416. * @v ibdev Infiniband device
  417. * @v qp Queue pair
  418. * @v iobuf I/O buffer
  419. * @v rc Completion status code
  420. */
  421. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  422. struct io_buffer *iobuf, int rc ) {
  423. if ( qp->send.cq->op->complete_send ) {
  424. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  425. } else {
  426. free_iob ( iobuf );
  427. }
  428. qp->send.fill--;
  429. }
  430. /**
  431. * Complete receive work queue entry
  432. *
  433. * @v ibdev Infiniband device
  434. * @v qp Queue pair
  435. * @v dest Destination address vector, or NULL
  436. * @v source Source address vector, or NULL
  437. * @v iobuf I/O buffer
  438. * @v rc Completion status code
  439. */
  440. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  441. struct ib_address_vector *dest,
  442. struct ib_address_vector *source,
  443. struct io_buffer *iobuf, int rc ) {
  444. if ( qp->recv.cq->op->complete_recv ) {
  445. qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
  446. iobuf, rc );
  447. } else {
  448. free_iob ( iobuf );
  449. }
  450. qp->recv.fill--;
  451. }
  452. /**
  453. * Refill receive work queue
  454. *
  455. * @v ibdev Infiniband device
  456. * @v qp Queue pair
  457. */
  458. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  459. struct io_buffer *iobuf;
  460. int rc;
  461. /* Keep filling while unfilled entries remain */
  462. while ( qp->recv.fill < qp->recv.num_wqes ) {
  463. /* Allocate I/O buffer */
  464. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  465. if ( ! iobuf ) {
  466. /* Non-fatal; we will refill on next attempt */
  467. return;
  468. }
  469. /* Post I/O buffer */
  470. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  471. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  472. ibdev, strerror ( rc ) );
  473. free_iob ( iobuf );
  474. /* Give up */
  475. return;
  476. }
  477. }
  478. }
  479. /***************************************************************************
  480. *
  481. * Link control
  482. *
  483. ***************************************************************************
  484. */
  485. /**
  486. * Get link state
  487. *
  488. * @v ibdev Infiniband device
  489. * @ret rc Link status code
  490. */
  491. int ib_link_rc ( struct ib_device *ibdev ) {
  492. switch ( ibdev->port_state ) {
  493. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  494. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  495. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  496. case IB_PORT_STATE_ACTIVE: return 0;
  497. default: return -EINVAL;
  498. }
  499. }
  500. /**
  501. * Textual representation of Infiniband link state
  502. *
  503. * @v ibdev Infiniband device
  504. * @ret link_text Link state text
  505. */
  506. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  507. switch ( ibdev->port_state ) {
  508. case IB_PORT_STATE_DOWN: return "DOWN";
  509. case IB_PORT_STATE_INIT: return "INIT";
  510. case IB_PORT_STATE_ARMED: return "ARMED";
  511. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  512. default: return "UNKNOWN";
  513. }
  514. }
  515. /**
  516. * Notify drivers of Infiniband device or link state change
  517. *
  518. * @v ibdev Infiniband device
  519. */
  520. static void ib_notify ( struct ib_device *ibdev ) {
  521. struct ib_driver *driver;
  522. for_each_table_entry ( driver, IB_DRIVERS )
  523. driver->notify ( ibdev );
  524. }
  525. /**
  526. * Notify of Infiniband link state change
  527. *
  528. * @v ibdev Infiniband device
  529. */
  530. void ib_link_state_changed ( struct ib_device *ibdev ) {
  531. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  532. ibdev, ib_link_state_text ( ibdev ) );
  533. /* Notify drivers of link state change */
  534. ib_notify ( ibdev );
  535. }
  536. /**
  537. * Open port
  538. *
  539. * @v ibdev Infiniband device
  540. * @ret rc Return status code
  541. */
  542. int ib_open ( struct ib_device *ibdev ) {
  543. int rc;
  544. /* Increment device open request counter */
  545. if ( ibdev->open_count++ > 0 ) {
  546. /* Device was already open; do nothing */
  547. return 0;
  548. }
  549. /* Open device */
  550. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  551. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  552. ibdev, strerror ( rc ) );
  553. goto err_open;
  554. }
  555. /* Create subnet management interface */
  556. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  557. if ( ! ibdev->smi ) {
  558. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  559. rc = -ENOMEM;
  560. goto err_create_smi;
  561. }
  562. /* Create subnet management agent */
  563. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  564. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  565. ibdev, strerror ( rc ) );
  566. goto err_create_sma;
  567. }
  568. /* Create general services interface */
  569. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  570. if ( ! ibdev->gsi ) {
  571. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  572. rc = -ENOMEM;
  573. goto err_create_gsi;
  574. }
  575. /* Add to head of open devices list */
  576. list_add ( &ibdev->open_list, &open_ib_devices );
  577. /* Notify drivers of device state change */
  578. ib_notify ( ibdev );
  579. assert ( ibdev->open_count == 1 );
  580. return 0;
  581. ib_destroy_mi ( ibdev, ibdev->gsi );
  582. err_create_gsi:
  583. ib_destroy_sma ( ibdev, ibdev->smi );
  584. err_create_sma:
  585. ib_destroy_mi ( ibdev, ibdev->smi );
  586. err_create_smi:
  587. ibdev->op->close ( ibdev );
  588. err_open:
  589. assert ( ibdev->open_count == 1 );
  590. ibdev->open_count = 0;
  591. return rc;
  592. }
  593. /**
  594. * Close port
  595. *
  596. * @v ibdev Infiniband device
  597. */
  598. void ib_close ( struct ib_device *ibdev ) {
  599. /* Decrement device open request counter */
  600. ibdev->open_count--;
  601. /* Close device if this was the last remaining requested opening */
  602. if ( ibdev->open_count == 0 ) {
  603. ib_notify ( ibdev );
  604. list_del ( &ibdev->open_list );
  605. ib_destroy_mi ( ibdev, ibdev->gsi );
  606. ib_destroy_sma ( ibdev, ibdev->smi );
  607. ib_destroy_mi ( ibdev, ibdev->smi );
  608. ibdev->op->close ( ibdev );
  609. ibdev->port_state = IB_PORT_STATE_DOWN;
  610. }
  611. }
  612. /***************************************************************************
  613. *
  614. * Multicast
  615. *
  616. ***************************************************************************
  617. */
  618. /**
  619. * Attach to multicast group
  620. *
  621. * @v ibdev Infiniband device
  622. * @v qp Queue pair
  623. * @v gid Multicast GID
  624. * @ret rc Return status code
  625. *
  626. * Note that this function handles only the local device's attachment
  627. * to the multicast GID; it does not issue the relevant MADs to join
  628. * the multicast group on the subnet.
  629. */
  630. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  631. union ib_gid *gid ) {
  632. struct ib_multicast_gid *mgid;
  633. int rc;
  634. /* Add to software multicast GID list */
  635. mgid = zalloc ( sizeof ( *mgid ) );
  636. if ( ! mgid ) {
  637. rc = -ENOMEM;
  638. goto err_alloc_mgid;
  639. }
  640. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  641. list_add ( &mgid->list, &qp->mgids );
  642. /* Add to hardware multicast GID list */
  643. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  644. goto err_dev_mcast_attach;
  645. return 0;
  646. err_dev_mcast_attach:
  647. list_del ( &mgid->list );
  648. free ( mgid );
  649. err_alloc_mgid:
  650. return rc;
  651. }
  652. /**
  653. * Detach from multicast group
  654. *
  655. * @v ibdev Infiniband device
  656. * @v qp Queue pair
  657. * @v gid Multicast GID
  658. */
  659. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  660. union ib_gid *gid ) {
  661. struct ib_multicast_gid *mgid;
  662. /* Remove from hardware multicast GID list */
  663. ibdev->op->mcast_detach ( ibdev, qp, gid );
  664. /* Remove from software multicast GID list */
  665. list_for_each_entry ( mgid, &qp->mgids, list ) {
  666. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  667. list_del ( &mgid->list );
  668. free ( mgid );
  669. break;
  670. }
  671. }
  672. }
  673. /***************************************************************************
  674. *
  675. * Miscellaneous
  676. *
  677. ***************************************************************************
  678. */
  679. /**
  680. * Count Infiniband HCA ports
  681. *
  682. * @v ibdev Infiniband device
  683. * @ret num_ports Number of ports
  684. */
  685. int ib_count_ports ( struct ib_device *ibdev ) {
  686. struct ib_device *tmp;
  687. int num_ports = 0;
  688. /* Search for IB devices with the same physical device to
  689. * identify port count.
  690. */
  691. for_each_ibdev ( tmp ) {
  692. if ( tmp->dev == ibdev->dev )
  693. num_ports++;
  694. }
  695. return num_ports;
  696. }
  697. /**
  698. * Set port information
  699. *
  700. * @v ibdev Infiniband device
  701. * @v mad Set port information MAD
  702. */
  703. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  704. int rc;
  705. /* Adapters with embedded SMAs do not need to support this method */
  706. if ( ! ibdev->op->set_port_info ) {
  707. DBGC ( ibdev, "IBDEV %p does not support setting port "
  708. "information\n", ibdev );
  709. return -ENOTSUP;
  710. }
  711. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  712. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  713. ibdev, strerror ( rc ) );
  714. return rc;
  715. }
  716. return 0;
  717. };
  718. /**
  719. * Set partition key table
  720. *
  721. * @v ibdev Infiniband device
  722. * @v mad Set partition key table MAD
  723. */
  724. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  725. int rc;
  726. /* Adapters with embedded SMAs do not need to support this method */
  727. if ( ! ibdev->op->set_pkey_table ) {
  728. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  729. "key table\n", ibdev );
  730. return -ENOTSUP;
  731. }
  732. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  733. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  734. "%s\n", ibdev, strerror ( rc ) );
  735. return rc;
  736. }
  737. return 0;
  738. };
  739. /***************************************************************************
  740. *
  741. * Event queues
  742. *
  743. ***************************************************************************
  744. */
  745. /**
  746. * Poll event queue
  747. *
  748. * @v ibdev Infiniband device
  749. */
  750. void ib_poll_eq ( struct ib_device *ibdev ) {
  751. struct ib_completion_queue *cq;
  752. /* Poll device's event queue */
  753. ibdev->op->poll_eq ( ibdev );
  754. /* Poll all completion queues */
  755. list_for_each_entry ( cq, &ibdev->cqs, list )
  756. ib_poll_cq ( ibdev, cq );
  757. }
  758. /**
  759. * Single-step the Infiniband event queue
  760. *
  761. * @v process Infiniband event queue process
  762. */
  763. static void ib_step ( struct process *process __unused ) {
  764. struct ib_device *ibdev;
  765. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  766. ib_poll_eq ( ibdev );
  767. }
  768. /** Infiniband event queue process */
  769. PERMANENT_PROCESS ( ib_process, ib_step );
  770. /***************************************************************************
  771. *
  772. * Infiniband device creation/destruction
  773. *
  774. ***************************************************************************
  775. */
  776. /**
  777. * Allocate Infiniband device
  778. *
  779. * @v priv_size Size of driver private data area
  780. * @ret ibdev Infiniband device, or NULL
  781. */
  782. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  783. struct ib_device *ibdev;
  784. void *drv_priv;
  785. size_t total_len;
  786. total_len = ( sizeof ( *ibdev ) + priv_size );
  787. ibdev = zalloc ( total_len );
  788. if ( ibdev ) {
  789. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  790. ib_set_drvdata ( ibdev, drv_priv );
  791. INIT_LIST_HEAD ( &ibdev->list );
  792. INIT_LIST_HEAD ( &ibdev->open_list );
  793. INIT_LIST_HEAD ( &ibdev->cqs );
  794. INIT_LIST_HEAD ( &ibdev->qps );
  795. ibdev->port_state = IB_PORT_STATE_DOWN;
  796. ibdev->lid = IB_LID_NONE;
  797. ibdev->pkey = IB_PKEY_DEFAULT;
  798. }
  799. return ibdev;
  800. }
  801. /**
  802. * Register Infiniband device
  803. *
  804. * @v ibdev Infiniband device
  805. * @ret rc Return status code
  806. */
  807. int register_ibdev ( struct ib_device *ibdev ) {
  808. struct ib_driver *driver;
  809. int rc;
  810. /* Add to device list */
  811. ibdev_get ( ibdev );
  812. list_add_tail ( &ibdev->list, &ib_devices );
  813. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  814. ibdev->dev->name );
  815. /* Probe device */
  816. for_each_table_entry ( driver, IB_DRIVERS ) {
  817. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  818. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  819. ibdev, driver->name, strerror ( rc ) );
  820. goto err_probe;
  821. }
  822. }
  823. return 0;
  824. err_probe:
  825. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  826. driver->remove ( ibdev );
  827. list_del ( &ibdev->list );
  828. ibdev_put ( ibdev );
  829. return rc;
  830. }
  831. /**
  832. * Unregister Infiniband device
  833. *
  834. * @v ibdev Infiniband device
  835. */
  836. void unregister_ibdev ( struct ib_device *ibdev ) {
  837. struct ib_driver *driver;
  838. /* Remove device */
  839. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  840. driver->remove ( ibdev );
  841. /* Remove from device list */
  842. list_del ( &ibdev->list );
  843. ibdev_put ( ibdev );
  844. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  845. }
  846. /**
  847. * Find Infiniband device by GID
  848. *
  849. * @v gid GID
  850. * @ret ibdev Infiniband device, or NULL
  851. */
  852. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  853. struct ib_device *ibdev;
  854. for_each_ibdev ( ibdev ) {
  855. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  856. return ibdev;
  857. }
  858. return NULL;
  859. }
  860. /**
  861. * Get most recently opened Infiniband device
  862. *
  863. * @ret ibdev Most recently opened Infiniband device, or NULL
  864. */
  865. struct ib_device * last_opened_ibdev ( void ) {
  866. struct ib_device *ibdev;
  867. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  868. open_list );
  869. if ( ! ibdev )
  870. return NULL;
  871. assert ( ibdev->open_count != 0 );
  872. return ibdev;
  873. }
  874. /* Drag in objects via register_ibdev() */
  875. REQUIRING_SYMBOL ( register_ibdev );
  876. /* Drag in Infiniband configuration */
  877. REQUIRE_OBJECT ( config_infiniband );
  878. /* Drag in IPoIB */
  879. REQUIRE_OBJECT ( ipoib );