Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

infiniband.c 26KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <stdio.h>
  27. #include <string.h>
  28. #include <unistd.h>
  29. #include <byteswap.h>
  30. #include <errno.h>
  31. #include <assert.h>
  32. #include <ipxe/list.h>
  33. #include <ipxe/errortab.h>
  34. #include <ipxe/if_arp.h>
  35. #include <ipxe/netdevice.h>
  36. #include <ipxe/iobuf.h>
  37. #include <ipxe/process.h>
  38. #include <ipxe/profile.h>
  39. #include <ipxe/infiniband.h>
  40. #include <ipxe/ib_mi.h>
  41. #include <ipxe/ib_sma.h>
  42. /** @file
  43. *
  44. * Infiniband protocol
  45. *
  46. */
  47. /** List of Infiniband devices */
  48. struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
  49. /** List of open Infiniband devices, in reverse order of opening */
  50. static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
  51. /** Post send work queue entry profiler */
  52. static struct profiler ib_post_send_profiler __profiler =
  53. { .name = "ib.post_send" };
  54. /** Post receive work queue entry profiler */
  55. static struct profiler ib_post_recv_profiler __profiler =
  56. { .name = "ib.post_recv" };
  57. /* Disambiguate the various possible EINPROGRESSes */
  58. #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
  59. #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
  60. ( EINFO_EINPROGRESS, 0x01, "Initialising" )
  61. #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
  62. #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
  63. ( EINFO_EINPROGRESS, 0x02, "Armed" )
  64. /** Human-readable message for the link statuses */
  65. struct errortab infiniband_errors[] __errortab = {
  66. __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
  67. __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
  68. };
  69. /***************************************************************************
  70. *
  71. * Completion queues
  72. *
  73. ***************************************************************************
  74. */
  75. /**
  76. * Create completion queue
  77. *
  78. * @v ibdev Infiniband device
  79. * @v num_cqes Number of completion queue entries
  80. * @v op Completion queue operations
  81. * @ret cq New completion queue
  82. */
  83. struct ib_completion_queue *
  84. ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
  85. struct ib_completion_queue_operations *op ) {
  86. struct ib_completion_queue *cq;
  87. int rc;
  88. DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
  89. /* Allocate and initialise data structure */
  90. cq = zalloc ( sizeof ( *cq ) );
  91. if ( ! cq )
  92. goto err_alloc_cq;
  93. cq->ibdev = ibdev;
  94. list_add ( &cq->list, &ibdev->cqs );
  95. cq->num_cqes = num_cqes;
  96. INIT_LIST_HEAD ( &cq->work_queues );
  97. cq->op = op;
  98. /* Perform device-specific initialisation and get CQN */
  99. if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
  100. DBGC ( ibdev, "IBDEV %p could not initialise completion "
  101. "queue: %s\n", ibdev, strerror ( rc ) );
  102. goto err_dev_create_cq;
  103. }
  104. DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
  105. "with CQN %#lx\n", ibdev, num_cqes, cq,
  106. ib_cq_get_drvdata ( cq ), cq->cqn );
  107. return cq;
  108. ibdev->op->destroy_cq ( ibdev, cq );
  109. err_dev_create_cq:
  110. list_del ( &cq->list );
  111. free ( cq );
  112. err_alloc_cq:
  113. return NULL;
  114. }
  115. /**
  116. * Destroy completion queue
  117. *
  118. * @v ibdev Infiniband device
  119. * @v cq Completion queue
  120. */
  121. void ib_destroy_cq ( struct ib_device *ibdev,
  122. struct ib_completion_queue *cq ) {
  123. DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
  124. ibdev, cq->cqn );
  125. assert ( list_empty ( &cq->work_queues ) );
  126. ibdev->op->destroy_cq ( ibdev, cq );
  127. list_del ( &cq->list );
  128. free ( cq );
  129. }
  130. /**
  131. * Poll completion queue
  132. *
  133. * @v ibdev Infiniband device
  134. * @v cq Completion queue
  135. */
  136. void ib_poll_cq ( struct ib_device *ibdev,
  137. struct ib_completion_queue *cq ) {
  138. struct ib_work_queue *wq;
  139. /* Poll completion queue */
  140. ibdev->op->poll_cq ( ibdev, cq );
  141. /* Refill receive work queues */
  142. list_for_each_entry ( wq, &cq->work_queues, list ) {
  143. if ( ! wq->is_send )
  144. ib_refill_recv ( ibdev, wq->qp );
  145. }
  146. }
  147. /***************************************************************************
  148. *
  149. * Work queues
  150. *
  151. ***************************************************************************
  152. */
  153. /**
  154. * Create queue pair
  155. *
  156. * @v ibdev Infiniband device
  157. * @v type Queue pair type
  158. * @v num_send_wqes Number of send work queue entries
  159. * @v send_cq Send completion queue
  160. * @v num_recv_wqes Number of receive work queue entries
  161. * @v recv_cq Receive completion queue
  162. * @v op Queue pair operations
  163. * @ret qp Queue pair
  164. *
  165. * The queue pair will be left in the INIT state; you must call
  166. * ib_modify_qp() before it is ready to use for sending and receiving.
  167. */
  168. struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
  169. enum ib_queue_pair_type type,
  170. unsigned int num_send_wqes,
  171. struct ib_completion_queue *send_cq,
  172. unsigned int num_recv_wqes,
  173. struct ib_completion_queue *recv_cq,
  174. struct ib_queue_pair_operations *op ) {
  175. struct ib_queue_pair *qp;
  176. size_t total_size;
  177. int rc;
  178. DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
  179. /* Allocate and initialise data structure */
  180. total_size = ( sizeof ( *qp ) +
  181. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
  182. ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
  183. qp = zalloc ( total_size );
  184. if ( ! qp )
  185. goto err_alloc_qp;
  186. qp->ibdev = ibdev;
  187. list_add ( &qp->list, &ibdev->qps );
  188. qp->type = type;
  189. qp->send.qp = qp;
  190. qp->send.is_send = 1;
  191. qp->send.cq = send_cq;
  192. list_add ( &qp->send.list, &send_cq->work_queues );
  193. qp->send.psn = ( random() & 0xffffffUL );
  194. qp->send.num_wqes = num_send_wqes;
  195. qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
  196. qp->recv.qp = qp;
  197. qp->recv.cq = recv_cq;
  198. list_add ( &qp->recv.list, &recv_cq->work_queues );
  199. qp->recv.psn = ( random() & 0xffffffUL );
  200. qp->recv.num_wqes = num_recv_wqes;
  201. qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
  202. ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
  203. INIT_LIST_HEAD ( &qp->mgids );
  204. qp->op = op;
  205. /* Perform device-specific initialisation and get QPN */
  206. if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
  207. DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
  208. "%s\n", ibdev, strerror ( rc ) );
  209. goto err_dev_create_qp;
  210. }
  211. DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
  212. ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
  213. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
  214. ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
  215. qp->recv.iobufs );
  216. DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
  217. ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
  218. ( ( ( void * ) qp ) + total_size ) );
  219. /* Calculate externally-visible QPN */
  220. switch ( type ) {
  221. case IB_QPT_SMI:
  222. qp->ext_qpn = IB_QPN_SMI;
  223. break;
  224. case IB_QPT_GSI:
  225. qp->ext_qpn = IB_QPN_GSI;
  226. break;
  227. default:
  228. qp->ext_qpn = qp->qpn;
  229. break;
  230. }
  231. if ( qp->ext_qpn != qp->qpn ) {
  232. DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
  233. ibdev, qp->qpn, qp->ext_qpn );
  234. }
  235. return qp;
  236. ibdev->op->destroy_qp ( ibdev, qp );
  237. err_dev_create_qp:
  238. list_del ( &qp->send.list );
  239. list_del ( &qp->recv.list );
  240. list_del ( &qp->list );
  241. free ( qp );
  242. err_alloc_qp:
  243. return NULL;
  244. }
  245. /**
  246. * Modify queue pair
  247. *
  248. * @v ibdev Infiniband device
  249. * @v qp Queue pair
  250. * @ret rc Return status code
  251. */
  252. int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  253. int rc;
  254. DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
  255. if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
  256. DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
  257. ibdev, qp->qpn, strerror ( rc ) );
  258. return rc;
  259. }
  260. return 0;
  261. }
  262. /**
  263. * Destroy queue pair
  264. *
  265. * @v ibdev Infiniband device
  266. * @v qp Queue pair
  267. */
  268. void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  269. struct io_buffer *iobuf;
  270. unsigned int i;
  271. DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
  272. ibdev, qp->qpn );
  273. assert ( list_empty ( &qp->mgids ) );
  274. /* Perform device-specific destruction */
  275. ibdev->op->destroy_qp ( ibdev, qp );
  276. /* Complete any remaining I/O buffers with errors */
  277. for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
  278. if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
  279. ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
  280. }
  281. for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
  282. if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
  283. ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
  284. -ECANCELED );
  285. }
  286. }
  287. /* Remove work queues from completion queue */
  288. list_del ( &qp->send.list );
  289. list_del ( &qp->recv.list );
  290. /* Free QP */
  291. list_del ( &qp->list );
  292. free ( qp );
  293. }
  294. /**
  295. * Find queue pair by QPN
  296. *
  297. * @v ibdev Infiniband device
  298. * @v qpn Queue pair number
  299. * @ret qp Queue pair, or NULL
  300. */
  301. struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
  302. unsigned long qpn ) {
  303. struct ib_queue_pair *qp;
  304. list_for_each_entry ( qp, &ibdev->qps, list ) {
  305. if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
  306. return qp;
  307. }
  308. return NULL;
  309. }
  310. /**
  311. * Find queue pair by multicast GID
  312. *
  313. * @v ibdev Infiniband device
  314. * @v gid Multicast GID
  315. * @ret qp Queue pair, or NULL
  316. */
  317. struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
  318. union ib_gid *gid ) {
  319. struct ib_queue_pair *qp;
  320. struct ib_multicast_gid *mgid;
  321. list_for_each_entry ( qp, &ibdev->qps, list ) {
  322. list_for_each_entry ( mgid, &qp->mgids, list ) {
  323. if ( memcmp ( &mgid->gid, gid,
  324. sizeof ( mgid->gid ) ) == 0 ) {
  325. return qp;
  326. }
  327. }
  328. }
  329. return NULL;
  330. }
  331. /**
  332. * Find work queue belonging to completion queue
  333. *
  334. * @v cq Completion queue
  335. * @v qpn Queue pair number
  336. * @v is_send Find send work queue (rather than receive)
  337. * @ret wq Work queue, or NULL if not found
  338. */
  339. struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
  340. unsigned long qpn, int is_send ) {
  341. struct ib_work_queue *wq;
  342. list_for_each_entry ( wq, &cq->work_queues, list ) {
  343. if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
  344. return wq;
  345. }
  346. return NULL;
  347. }
  348. /**
  349. * Post send work queue entry
  350. *
  351. * @v ibdev Infiniband device
  352. * @v qp Queue pair
  353. * @v dest Destination address vector
  354. * @v iobuf I/O buffer
  355. * @ret rc Return status code
  356. */
  357. int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  358. struct ib_address_vector *dest,
  359. struct io_buffer *iobuf ) {
  360. struct ib_address_vector dest_copy;
  361. int rc;
  362. /* Start profiling */
  363. profile_start ( &ib_post_send_profiler );
  364. /* Check queue fill level */
  365. if ( qp->send.fill >= qp->send.num_wqes ) {
  366. DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
  367. ibdev, qp->qpn );
  368. return -ENOBUFS;
  369. }
  370. /* Use default address vector if none specified */
  371. if ( ! dest )
  372. dest = &qp->av;
  373. /* Make modifiable copy of address vector */
  374. memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
  375. dest = &dest_copy;
  376. /* Fill in optional parameters in address vector */
  377. if ( ! dest->qkey )
  378. dest->qkey = qp->qkey;
  379. if ( ! dest->rate )
  380. dest->rate = IB_RATE_2_5;
  381. /* Post to hardware */
  382. if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
  383. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
  384. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  385. return rc;
  386. }
  387. /* Increase fill level */
  388. qp->send.fill++;
  389. /* Stop profiling */
  390. profile_stop ( &ib_post_send_profiler );
  391. return 0;
  392. }
  393. /**
  394. * Post receive work queue entry
  395. *
  396. * @v ibdev Infiniband device
  397. * @v qp Queue pair
  398. * @v iobuf I/O buffer
  399. * @ret rc Return status code
  400. */
  401. int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  402. struct io_buffer *iobuf ) {
  403. int rc;
  404. /* Start profiling */
  405. profile_start ( &ib_post_recv_profiler );
  406. /* Check packet length */
  407. if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
  408. DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
  409. ibdev, qp->qpn, iob_tailroom ( iobuf ) );
  410. return -EINVAL;
  411. }
  412. /* Check queue fill level */
  413. if ( qp->recv.fill >= qp->recv.num_wqes ) {
  414. DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
  415. ibdev, qp->qpn );
  416. return -ENOBUFS;
  417. }
  418. /* Post to hardware */
  419. if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  420. DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
  421. "%s\n", ibdev, qp->qpn, strerror ( rc ) );
  422. return rc;
  423. }
  424. /* Increase fill level */
  425. qp->recv.fill++;
  426. /* Stop profiling */
  427. profile_stop ( &ib_post_recv_profiler );
  428. return 0;
  429. }
  430. /**
  431. * Complete send work queue entry
  432. *
  433. * @v ibdev Infiniband device
  434. * @v qp Queue pair
  435. * @v iobuf I/O buffer
  436. * @v rc Completion status code
  437. */
  438. void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  439. struct io_buffer *iobuf, int rc ) {
  440. if ( qp->send.cq->op->complete_send ) {
  441. qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
  442. } else {
  443. free_iob ( iobuf );
  444. }
  445. qp->send.fill--;
  446. }
  447. /**
  448. * Complete receive work queue entry
  449. *
  450. * @v ibdev Infiniband device
  451. * @v qp Queue pair
  452. * @v dest Destination address vector, or NULL
  453. * @v source Source address vector, or NULL
  454. * @v iobuf I/O buffer
  455. * @v rc Completion status code
  456. */
  457. void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  458. struct ib_address_vector *dest,
  459. struct ib_address_vector *source,
  460. struct io_buffer *iobuf, int rc ) {
  461. if ( qp->recv.cq->op->complete_recv ) {
  462. qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
  463. iobuf, rc );
  464. } else {
  465. free_iob ( iobuf );
  466. }
  467. qp->recv.fill--;
  468. }
  469. /**
  470. * Refill receive work queue
  471. *
  472. * @v ibdev Infiniband device
  473. * @v qp Queue pair
  474. */
  475. void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
  476. struct io_buffer *iobuf;
  477. int rc;
  478. /* Keep filling while unfilled entries remain */
  479. while ( qp->recv.fill < qp->recv.num_wqes ) {
  480. /* Allocate I/O buffer */
  481. iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
  482. if ( ! iobuf ) {
  483. /* Non-fatal; we will refill on next attempt */
  484. return;
  485. }
  486. /* Post I/O buffer */
  487. if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
  488. DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
  489. ibdev, strerror ( rc ) );
  490. free_iob ( iobuf );
  491. /* Give up */
  492. return;
  493. }
  494. }
  495. }
  496. /***************************************************************************
  497. *
  498. * Link control
  499. *
  500. ***************************************************************************
  501. */
  502. /**
  503. * Get link state
  504. *
  505. * @v ibdev Infiniband device
  506. * @ret rc Link status code
  507. */
  508. int ib_link_rc ( struct ib_device *ibdev ) {
  509. switch ( ibdev->port_state ) {
  510. case IB_PORT_STATE_DOWN: return -ENOTCONN;
  511. case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
  512. case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
  513. case IB_PORT_STATE_ACTIVE: return 0;
  514. default: return -EINVAL;
  515. }
  516. }
  517. /**
  518. * Textual representation of Infiniband link state
  519. *
  520. * @v ibdev Infiniband device
  521. * @ret link_text Link state text
  522. */
  523. static const char * ib_link_state_text ( struct ib_device *ibdev ) {
  524. switch ( ibdev->port_state ) {
  525. case IB_PORT_STATE_DOWN: return "DOWN";
  526. case IB_PORT_STATE_INIT: return "INIT";
  527. case IB_PORT_STATE_ARMED: return "ARMED";
  528. case IB_PORT_STATE_ACTIVE: return "ACTIVE";
  529. default: return "UNKNOWN";
  530. }
  531. }
  532. /**
  533. * Notify drivers of Infiniband device or link state change
  534. *
  535. * @v ibdev Infiniband device
  536. */
  537. static void ib_notify ( struct ib_device *ibdev ) {
  538. struct ib_driver *driver;
  539. for_each_table_entry ( driver, IB_DRIVERS )
  540. driver->notify ( ibdev );
  541. }
  542. /**
  543. * Notify of Infiniband link state change
  544. *
  545. * @v ibdev Infiniband device
  546. */
  547. void ib_link_state_changed ( struct ib_device *ibdev ) {
  548. DBGC ( ibdev, "IBDEV %p link state is %s\n",
  549. ibdev, ib_link_state_text ( ibdev ) );
  550. /* Notify drivers of link state change */
  551. ib_notify ( ibdev );
  552. }
  553. /**
  554. * Open port
  555. *
  556. * @v ibdev Infiniband device
  557. * @ret rc Return status code
  558. */
  559. int ib_open ( struct ib_device *ibdev ) {
  560. int rc;
  561. /* Increment device open request counter */
  562. if ( ibdev->open_count++ > 0 ) {
  563. /* Device was already open; do nothing */
  564. return 0;
  565. }
  566. /* Open device */
  567. if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
  568. DBGC ( ibdev, "IBDEV %p could not open: %s\n",
  569. ibdev, strerror ( rc ) );
  570. goto err_open;
  571. }
  572. /* Create subnet management interface */
  573. ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
  574. if ( ! ibdev->smi ) {
  575. DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
  576. rc = -ENOMEM;
  577. goto err_create_smi;
  578. }
  579. /* Create subnet management agent */
  580. if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
  581. DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
  582. ibdev, strerror ( rc ) );
  583. goto err_create_sma;
  584. }
  585. /* Create general services interface */
  586. ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
  587. if ( ! ibdev->gsi ) {
  588. DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
  589. rc = -ENOMEM;
  590. goto err_create_gsi;
  591. }
  592. /* Add to head of open devices list */
  593. list_add ( &ibdev->open_list, &open_ib_devices );
  594. /* Notify drivers of device state change */
  595. ib_notify ( ibdev );
  596. assert ( ibdev->open_count == 1 );
  597. return 0;
  598. ib_destroy_mi ( ibdev, ibdev->gsi );
  599. err_create_gsi:
  600. ib_destroy_sma ( ibdev, ibdev->smi );
  601. err_create_sma:
  602. ib_destroy_mi ( ibdev, ibdev->smi );
  603. err_create_smi:
  604. ibdev->op->close ( ibdev );
  605. err_open:
  606. assert ( ibdev->open_count == 1 );
  607. ibdev->open_count = 0;
  608. return rc;
  609. }
  610. /**
  611. * Close port
  612. *
  613. * @v ibdev Infiniband device
  614. */
  615. void ib_close ( struct ib_device *ibdev ) {
  616. /* Decrement device open request counter */
  617. ibdev->open_count--;
  618. /* Close device if this was the last remaining requested opening */
  619. if ( ibdev->open_count == 0 ) {
  620. ib_notify ( ibdev );
  621. list_del ( &ibdev->open_list );
  622. ib_destroy_mi ( ibdev, ibdev->gsi );
  623. ib_destroy_sma ( ibdev, ibdev->smi );
  624. ib_destroy_mi ( ibdev, ibdev->smi );
  625. ibdev->op->close ( ibdev );
  626. ibdev->port_state = IB_PORT_STATE_DOWN;
  627. }
  628. }
  629. /***************************************************************************
  630. *
  631. * Multicast
  632. *
  633. ***************************************************************************
  634. */
  635. /**
  636. * Attach to multicast group
  637. *
  638. * @v ibdev Infiniband device
  639. * @v qp Queue pair
  640. * @v gid Multicast GID
  641. * @ret rc Return status code
  642. *
  643. * Note that this function handles only the local device's attachment
  644. * to the multicast GID; it does not issue the relevant MADs to join
  645. * the multicast group on the subnet.
  646. */
  647. int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  648. union ib_gid *gid ) {
  649. struct ib_multicast_gid *mgid;
  650. int rc;
  651. /* Sanity check */
  652. assert ( qp != NULL );
  653. /* Add to software multicast GID list */
  654. mgid = zalloc ( sizeof ( *mgid ) );
  655. if ( ! mgid ) {
  656. rc = -ENOMEM;
  657. goto err_alloc_mgid;
  658. }
  659. memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
  660. list_add ( &mgid->list, &qp->mgids );
  661. /* Add to hardware multicast GID list */
  662. if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
  663. goto err_dev_mcast_attach;
  664. return 0;
  665. err_dev_mcast_attach:
  666. list_del ( &mgid->list );
  667. free ( mgid );
  668. err_alloc_mgid:
  669. return rc;
  670. }
  671. /**
  672. * Detach from multicast group
  673. *
  674. * @v ibdev Infiniband device
  675. * @v qp Queue pair
  676. * @v gid Multicast GID
  677. */
  678. void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  679. union ib_gid *gid ) {
  680. struct ib_multicast_gid *mgid;
  681. /* Sanity check */
  682. assert ( qp != NULL );
  683. /* Remove from hardware multicast GID list */
  684. ibdev->op->mcast_detach ( ibdev, qp, gid );
  685. /* Remove from software multicast GID list */
  686. list_for_each_entry ( mgid, &qp->mgids, list ) {
  687. if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
  688. list_del ( &mgid->list );
  689. free ( mgid );
  690. break;
  691. }
  692. }
  693. }
  694. /***************************************************************************
  695. *
  696. * Miscellaneous
  697. *
  698. ***************************************************************************
  699. */
  700. /**
  701. * Count Infiniband HCA ports
  702. *
  703. * @v ibdev Infiniband device
  704. * @ret num_ports Number of ports
  705. */
  706. int ib_count_ports ( struct ib_device *ibdev ) {
  707. struct ib_device *tmp;
  708. int num_ports = 0;
  709. /* Search for IB devices with the same physical device to
  710. * identify port count.
  711. */
  712. for_each_ibdev ( tmp ) {
  713. if ( tmp->dev == ibdev->dev )
  714. num_ports++;
  715. }
  716. return num_ports;
  717. }
  718. /**
  719. * Set port information
  720. *
  721. * @v ibdev Infiniband device
  722. * @v mad Set port information MAD
  723. */
  724. int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
  725. int rc;
  726. /* Adapters with embedded SMAs do not need to support this method */
  727. if ( ! ibdev->op->set_port_info ) {
  728. DBGC ( ibdev, "IBDEV %p does not support setting port "
  729. "information\n", ibdev );
  730. return -ENOTSUP;
  731. }
  732. if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
  733. DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
  734. ibdev, strerror ( rc ) );
  735. return rc;
  736. }
  737. return 0;
  738. };
  739. /**
  740. * Set partition key table
  741. *
  742. * @v ibdev Infiniband device
  743. * @v mad Set partition key table MAD
  744. */
  745. int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
  746. int rc;
  747. /* Adapters with embedded SMAs do not need to support this method */
  748. if ( ! ibdev->op->set_pkey_table ) {
  749. DBGC ( ibdev, "IBDEV %p does not support setting partition "
  750. "key table\n", ibdev );
  751. return -ENOTSUP;
  752. }
  753. if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
  754. DBGC ( ibdev, "IBDEV %p could not set partition key table: "
  755. "%s\n", ibdev, strerror ( rc ) );
  756. return rc;
  757. }
  758. return 0;
  759. };
  760. /***************************************************************************
  761. *
  762. * Event queues
  763. *
  764. ***************************************************************************
  765. */
  766. /**
  767. * Poll event queue
  768. *
  769. * @v ibdev Infiniband device
  770. */
  771. void ib_poll_eq ( struct ib_device *ibdev ) {
  772. struct ib_completion_queue *cq;
  773. /* Poll device's event queue */
  774. ibdev->op->poll_eq ( ibdev );
  775. /* Poll all completion queues */
  776. list_for_each_entry ( cq, &ibdev->cqs, list )
  777. ib_poll_cq ( ibdev, cq );
  778. }
  779. /**
  780. * Single-step the Infiniband event queue
  781. *
  782. * @v process Infiniband event queue process
  783. */
  784. static void ib_step ( struct process *process __unused ) {
  785. struct ib_device *ibdev;
  786. list_for_each_entry ( ibdev, &open_ib_devices, open_list )
  787. ib_poll_eq ( ibdev );
  788. }
  789. /** Infiniband event queue process */
  790. PERMANENT_PROCESS ( ib_process, ib_step );
  791. /***************************************************************************
  792. *
  793. * Infiniband device creation/destruction
  794. *
  795. ***************************************************************************
  796. */
  797. /**
  798. * Allocate Infiniband device
  799. *
  800. * @v priv_size Size of driver private data area
  801. * @ret ibdev Infiniband device, or NULL
  802. */
  803. struct ib_device * alloc_ibdev ( size_t priv_size ) {
  804. struct ib_device *ibdev;
  805. void *drv_priv;
  806. size_t total_len;
  807. total_len = ( sizeof ( *ibdev ) + priv_size );
  808. ibdev = zalloc ( total_len );
  809. if ( ibdev ) {
  810. drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
  811. ib_set_drvdata ( ibdev, drv_priv );
  812. INIT_LIST_HEAD ( &ibdev->list );
  813. INIT_LIST_HEAD ( &ibdev->open_list );
  814. INIT_LIST_HEAD ( &ibdev->cqs );
  815. INIT_LIST_HEAD ( &ibdev->qps );
  816. ibdev->port_state = IB_PORT_STATE_DOWN;
  817. ibdev->lid = IB_LID_NONE;
  818. ibdev->pkey = IB_PKEY_DEFAULT;
  819. }
  820. return ibdev;
  821. }
  822. /**
  823. * Register Infiniband device
  824. *
  825. * @v ibdev Infiniband device
  826. * @ret rc Return status code
  827. */
  828. int register_ibdev ( struct ib_device *ibdev ) {
  829. struct ib_driver *driver;
  830. int rc;
  831. /* Add to device list */
  832. ibdev_get ( ibdev );
  833. list_add_tail ( &ibdev->list, &ib_devices );
  834. DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
  835. ibdev->dev->name );
  836. /* Probe device */
  837. for_each_table_entry ( driver, IB_DRIVERS ) {
  838. if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
  839. DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
  840. ibdev, driver->name, strerror ( rc ) );
  841. goto err_probe;
  842. }
  843. }
  844. return 0;
  845. err_probe:
  846. for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
  847. driver->remove ( ibdev );
  848. list_del ( &ibdev->list );
  849. ibdev_put ( ibdev );
  850. return rc;
  851. }
  852. /**
  853. * Unregister Infiniband device
  854. *
  855. * @v ibdev Infiniband device
  856. */
  857. void unregister_ibdev ( struct ib_device *ibdev ) {
  858. struct ib_driver *driver;
  859. /* Remove device */
  860. for_each_table_entry_reverse ( driver, IB_DRIVERS )
  861. driver->remove ( ibdev );
  862. /* Remove from device list */
  863. list_del ( &ibdev->list );
  864. ibdev_put ( ibdev );
  865. DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
  866. }
  867. /**
  868. * Find Infiniband device by GID
  869. *
  870. * @v gid GID
  871. * @ret ibdev Infiniband device, or NULL
  872. */
  873. struct ib_device * find_ibdev ( union ib_gid *gid ) {
  874. struct ib_device *ibdev;
  875. for_each_ibdev ( ibdev ) {
  876. if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
  877. return ibdev;
  878. }
  879. return NULL;
  880. }
  881. /**
  882. * Get most recently opened Infiniband device
  883. *
  884. * @ret ibdev Most recently opened Infiniband device, or NULL
  885. */
  886. struct ib_device * last_opened_ibdev ( void ) {
  887. struct ib_device *ibdev;
  888. ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
  889. open_list );
  890. if ( ! ibdev )
  891. return NULL;
  892. assert ( ibdev->open_count != 0 );
  893. return ibdev;
  894. }
  895. /* Drag in objects via register_ibdev() */
  896. REQUIRING_SYMBOL ( register_ibdev );
  897. /* Drag in Infiniband configuration */
  898. REQUIRE_OBJECT ( config_infiniband );
  899. /* Drag in IPoIB */
  900. REQUIRE_OBJECT ( ipoib );