You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ib_cm.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17. * 02110-1301, USA.
  18. *
  19. * You can also choose to distribute this program under the terms of
  20. * the Unmodified Binary Distribution Licence (as given in the file
  21. * COPYING.UBDL), provided that you have satisfied its requirements.
  22. */
  23. FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <byteswap.h>
  28. #include <errno.h>
  29. #include <assert.h>
  30. #include <ipxe/infiniband.h>
  31. #include <ipxe/ib_mi.h>
  32. #include <ipxe/ib_pathrec.h>
  33. #include <ipxe/ib_cm.h>
  34. /**
  35. * @file
  36. *
  37. * Infiniband communication management
  38. *
  39. */
  40. /** List of connections */
  41. static LIST_HEAD ( ib_cm_conns );
  42. /**
  43. * Find connection by local communication ID
  44. *
  45. * @v local_id Local communication ID
  46. * @ret conn Connection, or NULL
  47. */
  48. static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
  49. struct ib_connection *conn;
  50. list_for_each_entry ( conn, &ib_cm_conns, list ) {
  51. if ( conn->local_id == local_id )
  52. return conn;
  53. }
  54. return NULL;
  55. }
  56. /**
  57. * Send "ready to use" response
  58. *
  59. * @v ibdev Infiniband device
  60. * @v mi Management interface
  61. * @v tid Transaction identifier
  62. * @v av Address vector
  63. * @v local_id Local communication ID
  64. * @v remote_id Remote communication ID
  65. * @ret rc Return status code
  66. */
  67. static int ib_cm_send_rtu ( struct ib_device *ibdev,
  68. struct ib_mad_interface *mi,
  69. struct ib_mad_tid *tid,
  70. struct ib_address_vector *av,
  71. uint32_t local_id, uint32_t remote_id ) {
  72. union ib_mad mad;
  73. struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
  74. int rc;
  75. /* Construct "ready to use" response */
  76. memset ( &mad, 0, sizeof ( mad ) );
  77. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  78. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  79. mad.hdr.method = IB_MGMT_METHOD_SEND;
  80. memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
  81. mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
  82. rtu->local_id = htonl ( local_id );
  83. rtu->remote_id = htonl ( remote_id );
  84. if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
  85. DBGC ( local_id, "CM %08x could not send RTU: %s\n",
  86. local_id, strerror ( rc ) );
  87. return rc;
  88. }
  89. return 0;
  90. }
  91. /**
  92. * Handle duplicate connection replies
  93. *
  94. * @v ibdev Infiniband device
  95. * @v mi Management interface
  96. * @v mad Received MAD
  97. * @v av Source address vector
  98. * @ret rc Return status code
  99. *
  100. * If a "ready to use" MAD is lost, the peer may resend the connection
  101. * reply. We have to respond to these with duplicate "ready to use"
  102. * MADs, otherwise the peer may time out and drop the connection.
  103. */
  104. static void ib_cm_recv_rep ( struct ib_device *ibdev,
  105. struct ib_mad_interface *mi,
  106. union ib_mad *mad,
  107. struct ib_address_vector *av ) {
  108. struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
  109. struct ib_connection *conn;
  110. uint32_t local_id = ntohl ( rep->remote_id );
  111. int rc;
  112. /* Identify connection */
  113. conn = ib_cm_find ( local_id );
  114. if ( conn ) {
  115. /* Try to send "ready to use" reply */
  116. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
  117. conn->local_id,
  118. conn->remote_id ) ) != 0 ) {
  119. /* Ignore errors; the remote end will retry */
  120. }
  121. } else {
  122. DBGC ( local_id, "CM %08x unexpected REP\n", local_id );
  123. }
  124. }
  125. /**
  126. * Send reply to disconnection request
  127. *
  128. * @v ibdev Infiniband device
  129. * @v mi Management interface
  130. * @v tid Transaction identifier
  131. * @v av Address vector
  132. * @v local_id Local communication ID
  133. * @v remote_id Remote communication ID
  134. * @ret rc Return status code
  135. */
  136. static int ib_cm_send_drep ( struct ib_device *ibdev,
  137. struct ib_mad_interface *mi,
  138. struct ib_mad_tid *tid,
  139. struct ib_address_vector *av,
  140. uint32_t local_id, uint32_t remote_id ) {
  141. union ib_mad mad;
  142. struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
  143. int rc;
  144. /* Construct reply to disconnection request */
  145. memset ( &mad, 0, sizeof ( mad ) );
  146. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  147. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  148. mad.hdr.method = IB_MGMT_METHOD_SEND;
  149. memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
  150. mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
  151. drep->local_id = htonl ( local_id );
  152. drep->remote_id = htonl ( remote_id );
  153. if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
  154. DBGC ( local_id, "CM %08x could not send DREP: %s\n",
  155. local_id, strerror ( rc ) );
  156. return rc;
  157. }
  158. return 0;
  159. }
  160. /**
  161. * Handle disconnection requests
  162. *
  163. * @v ibdev Infiniband device
  164. * @v mi Management interface
  165. * @v mad Received MAD
  166. * @v av Source address vector
  167. * @ret rc Return status code
  168. */
  169. static void ib_cm_recv_dreq ( struct ib_device *ibdev,
  170. struct ib_mad_interface *mi,
  171. union ib_mad *mad,
  172. struct ib_address_vector *av ) {
  173. struct ib_cm_disconnect_request *dreq =
  174. &mad->cm.cm_data.disconnect_request;
  175. struct ib_connection *conn;
  176. uint32_t local_id = ntohl ( dreq->remote_id );
  177. uint32_t remote_id = ntohl ( dreq->local_id );
  178. int rc;
  179. /* Identify connection */
  180. conn = ib_cm_find ( local_id );
  181. if ( conn ) {
  182. /* Notify upper layer */
  183. conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
  184. &dreq->private_data,
  185. sizeof ( dreq->private_data ) );
  186. } else {
  187. DBGC ( local_id, "CM %08x unexpected DREQ\n", local_id );
  188. }
  189. /* Send reply */
  190. if ( ( rc = ib_cm_send_drep ( ibdev, mi, &mad->hdr.tid, av, local_id,
  191. remote_id ) ) != 0 ) {
  192. /* Ignore errors; the remote end will retry */
  193. }
  194. };
  195. /** Communication management agents */
  196. struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
  197. {
  198. .mgmt_class = IB_MGMT_CLASS_CM,
  199. .class_version = IB_CM_CLASS_VERSION,
  200. .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
  201. .handle = ib_cm_recv_rep,
  202. },
  203. {
  204. .mgmt_class = IB_MGMT_CLASS_CM,
  205. .class_version = IB_CM_CLASS_VERSION,
  206. .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
  207. .handle = ib_cm_recv_dreq,
  208. },
  209. };
  210. /**
  211. * Convert connection rejection reason to return status code
  212. *
  213. * @v reason Rejection reason (in network byte order)
  214. * @ret rc Return status code
  215. */
  216. static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
  217. switch ( reason ) {
  218. case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
  219. return -ENODEV;
  220. case htons ( IB_CM_REJECT_STALE_CONN ) :
  221. return -EALREADY;
  222. case htons ( IB_CM_REJECT_CONSUMER ) :
  223. return -ENOTTY;
  224. default:
  225. return -EPERM;
  226. }
  227. }
  228. /**
  229. * Handle connection request transaction completion
  230. *
  231. * @v ibdev Infiniband device
  232. * @v mi Management interface
  233. * @v madx Management transaction
  234. * @v rc Status code
  235. * @v mad Received MAD (or NULL on error)
  236. * @v av Source address vector (or NULL on error)
  237. */
  238. static void ib_cm_req_complete ( struct ib_device *ibdev,
  239. struct ib_mad_interface *mi,
  240. struct ib_mad_transaction *madx,
  241. int rc, union ib_mad *mad,
  242. struct ib_address_vector *av ) {
  243. struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
  244. struct ib_queue_pair *qp = conn->qp;
  245. struct ib_cm_common *common = &mad->cm.cm_data.common;
  246. struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
  247. struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
  248. uint32_t local_id = conn->local_id;
  249. void *private_data = NULL;
  250. size_t private_data_len = 0;
  251. /* Report failures */
  252. if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
  253. rc = -EIO;
  254. if ( rc != 0 ) {
  255. DBGC ( local_id, "CM %08x connection request failed: %s\n",
  256. local_id, strerror ( rc ) );
  257. goto out;
  258. }
  259. /* Record remote communication ID */
  260. conn->remote_id = ntohl ( common->local_id );
  261. /* Handle response */
  262. switch ( mad->hdr.attr_id ) {
  263. case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
  264. /* Extract fields */
  265. qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
  266. qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
  267. private_data = &rep->private_data;
  268. private_data_len = sizeof ( rep->private_data );
  269. DBGC ( local_id, "CM %08x connected to QPN %#lx PSN %#x\n",
  270. local_id, qp->av.qpn, qp->send.psn );
  271. /* Modify queue pair */
  272. if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
  273. DBGC ( local_id, "CM %08x could not modify queue "
  274. "pair: %s\n", local_id, strerror ( rc ) );
  275. goto out;
  276. }
  277. /* Send "ready to use" reply */
  278. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
  279. conn->local_id,
  280. conn->remote_id ) ) != 0 ) {
  281. /* Treat as non-fatal */
  282. rc = 0;
  283. }
  284. break;
  285. case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
  286. /* Extract fields */
  287. DBGC ( local_id, "CM %08x connection rejected (reason %d)\n",
  288. local_id, ntohs ( rej->reason ) );
  289. /* Private data is valid only for a Consumer Reject */
  290. if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
  291. private_data = &rej->private_data;
  292. private_data_len = sizeof ( rej->private_data );
  293. }
  294. rc = ib_cm_rejection_reason_to_rc ( rej->reason );
  295. break;
  296. default:
  297. DBGC ( local_id, "CM %08x unexpected response (attribute "
  298. "%04x)\n", local_id, ntohs ( mad->hdr.attr_id ) );
  299. rc = -ENOTSUP;
  300. break;
  301. }
  302. out:
  303. /* Destroy the completed transaction */
  304. ib_destroy_madx ( ibdev, ibdev->gsi, madx );
  305. conn->madx = NULL;
  306. /* Hand off to the upper completion handler */
  307. conn->op->changed ( ibdev, qp, conn, rc, private_data,
  308. private_data_len );
  309. }
  310. /** Connection request operations */
  311. static struct ib_mad_transaction_operations ib_cm_req_op = {
  312. .complete = ib_cm_req_complete,
  313. };
  314. /**
  315. * Handle connection path transaction completion
  316. *
  317. * @v ibdev Infiniband device
  318. * @v path Path
  319. * @v rc Status code
  320. * @v av Address vector, or NULL on error
  321. */
  322. static void ib_cm_path_complete ( struct ib_device *ibdev,
  323. struct ib_path *path, int rc,
  324. struct ib_address_vector *av ) {
  325. struct ib_connection *conn = ib_path_get_ownerdata ( path );
  326. struct ib_queue_pair *qp = conn->qp;
  327. union ib_mad mad;
  328. struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
  329. uint32_t local_id = conn->local_id;
  330. size_t private_data_len;
  331. /* Report failures */
  332. if ( rc != 0 ) {
  333. DBGC ( local_id, "CM %08x path lookup failed: %s\n",
  334. local_id, strerror ( rc ) );
  335. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  336. goto out;
  337. }
  338. /* Update queue pair peer path */
  339. memcpy ( &qp->av, av, sizeof ( qp->av ) );
  340. /* Construct connection request */
  341. memset ( &mad, 0, sizeof ( mad ) );
  342. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  343. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  344. mad.hdr.method = IB_MGMT_METHOD_SEND;
  345. mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
  346. req->local_id = htonl ( conn->local_id );
  347. memcpy ( &req->service_id, &conn->service_id,
  348. sizeof ( req->service_id ) );
  349. memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
  350. req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
  351. req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
  352. req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
  353. htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
  354. ( 0 << 0 ) );
  355. req->starting_psn__local_timeout__retry_count =
  356. htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
  357. ( 0x07 << 0 ) );
  358. req->pkey = htons ( ibdev->pkey );
  359. req->payload_mtu__rdc_exists__rnr_retry =
  360. ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
  361. req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
  362. req->primary.local_lid = htons ( ibdev->lid );
  363. req->primary.remote_lid = htons ( conn->qp->av.lid );
  364. memcpy ( &req->primary.local_gid, &ibdev->gid,
  365. sizeof ( req->primary.local_gid ) );
  366. memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
  367. sizeof ( req->primary.remote_gid ) );
  368. req->primary.flow_label__rate =
  369. htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
  370. req->primary.hop_limit = 0;
  371. req->primary.sl__subnet_local =
  372. ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
  373. req->primary.local_ack_timeout = ( 0x13 << 3 );
  374. private_data_len = conn->private_data_len;
  375. if ( private_data_len > sizeof ( req->private_data ) )
  376. private_data_len = sizeof ( req->private_data );
  377. memcpy ( &req->private_data, &conn->private_data, private_data_len );
  378. /* Create connection request */
  379. av->qpn = IB_QPN_GSI;
  380. av->qkey = IB_QKEY_GSI;
  381. conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
  382. &ib_cm_req_op );
  383. if ( ! conn->madx ) {
  384. DBGC ( local_id, "CM %08x could not create connection "
  385. "request\n", local_id );
  386. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  387. goto out;
  388. }
  389. ib_madx_set_ownerdata ( conn->madx, conn );
  390. out:
  391. /* Destroy the completed transaction */
  392. ib_destroy_path ( ibdev, path );
  393. conn->path = NULL;
  394. }
  395. /** Connection path operations */
  396. static struct ib_path_operations ib_cm_path_op = {
  397. .complete = ib_cm_path_complete,
  398. };
  399. /**
  400. * Create connection to remote QP
  401. *
  402. * @v ibdev Infiniband device
  403. * @v qp Queue pair
  404. * @v dgid Target GID
  405. * @v service_id Target service ID
  406. * @v private_data Connection request private data
  407. * @v private_data_len Length of connection request private data
  408. * @v op Connection operations
  409. * @ret conn Connection
  410. */
  411. struct ib_connection *
  412. ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  413. union ib_gid *dgid, union ib_guid *service_id,
  414. void *private_data, size_t private_data_len,
  415. struct ib_connection_operations *op ) {
  416. struct ib_connection *conn;
  417. uint32_t local_id;
  418. /* Allocate and initialise request */
  419. conn = zalloc ( sizeof ( *conn ) + private_data_len );
  420. if ( ! conn )
  421. goto err_alloc_conn;
  422. conn->ibdev = ibdev;
  423. conn->qp = qp;
  424. memset ( &qp->av, 0, sizeof ( qp->av ) );
  425. qp->av.gid_present = 1;
  426. memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
  427. conn->local_id = local_id = random();
  428. memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
  429. conn->op = op;
  430. conn->private_data_len = private_data_len;
  431. memcpy ( &conn->private_data, private_data, private_data_len );
  432. /* Create path */
  433. conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
  434. if ( ! conn->path )
  435. goto err_create_path;
  436. ib_path_set_ownerdata ( conn->path, conn );
  437. /* Add to list of connections */
  438. list_add ( &conn->list, &ib_cm_conns );
  439. DBGC ( local_id, "CM %08x created for IBDEV %s QPN %#lx\n",
  440. local_id, ibdev->name, qp->qpn );
  441. DBGC ( local_id, "CM %08x connecting to " IB_GID_FMT " "
  442. IB_GUID_FMT "\n", local_id, IB_GID_ARGS ( dgid ),
  443. IB_GUID_ARGS ( service_id ) );
  444. return conn;
  445. ib_destroy_path ( ibdev, conn->path );
  446. err_create_path:
  447. free ( conn );
  448. err_alloc_conn:
  449. return NULL;
  450. }
  451. /**
  452. * Destroy connection to remote QP
  453. *
  454. * @v ibdev Infiniband device
  455. * @v qp Queue pair
  456. * @v conn Connection
  457. */
  458. void ib_destroy_conn ( struct ib_device *ibdev,
  459. struct ib_queue_pair *qp __unused,
  460. struct ib_connection *conn ) {
  461. list_del ( &conn->list );
  462. if ( conn->madx )
  463. ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
  464. if ( conn->path )
  465. ib_destroy_path ( ibdev, conn->path );
  466. free ( conn );
  467. }