You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ib_cm.c 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /*
  2. * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <byteswap.h>
  23. #include <errno.h>
  24. #include <assert.h>
  25. #include <gpxe/infiniband.h>
  26. #include <gpxe/ib_mi.h>
  27. #include <gpxe/ib_pathrec.h>
  28. #include <gpxe/ib_cm.h>
  29. /**
  30. * @file
  31. *
  32. * Infiniband communication management
  33. *
  34. */
  35. /** List of connections */
  36. static LIST_HEAD ( ib_cm_conns );
  37. /**
  38. * Send "ready to use" response
  39. *
  40. * @v ibdev Infiniband device
  41. * @v mi Management interface
  42. * @v conn Connection
  43. * @v av Address vector
  44. * @ret rc Return status code
  45. */
  46. static int ib_cm_send_rtu ( struct ib_device *ibdev,
  47. struct ib_mad_interface *mi,
  48. struct ib_connection *conn,
  49. struct ib_address_vector *av ) {
  50. union ib_mad mad;
  51. struct ib_cm_ready_to_use *ready =
  52. &mad.cm.cm_data.ready_to_use;
  53. int rc;
  54. /* Construct "ready to use" response */
  55. memset ( &mad, 0, sizeof ( mad ) );
  56. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  57. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  58. mad.hdr.method = IB_MGMT_METHOD_SEND;
  59. mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
  60. ready->local_id = htonl ( conn->local_id );
  61. ready->remote_id = htonl ( conn->remote_id );
  62. if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
  63. DBGC ( conn, "CM %p could not send RTU: %s\n",
  64. conn, strerror ( rc ) );
  65. return rc;
  66. }
  67. return 0;
  68. }
  69. /**
  70. * Handle duplicate connection replies
  71. *
  72. * @v ibdev Infiniband device
  73. * @v mi Management interface
  74. * @v mad Received MAD
  75. * @v av Source address vector
  76. * @ret rc Return status code
  77. *
  78. * If a "ready to use" MAD is lost, the peer may resend the connection
  79. * reply. We have to respond to these with duplicate "ready to use"
  80. * MADs, otherwise the peer may time out and drop the connection.
  81. */
  82. static void ib_cm_connect_rep ( struct ib_device *ibdev,
  83. struct ib_mad_interface *mi,
  84. union ib_mad *mad,
  85. struct ib_address_vector *av ) {
  86. struct ib_cm_connect_reply *connect_rep =
  87. &mad->cm.cm_data.connect_reply;
  88. struct ib_connection *conn;
  89. int rc;
  90. /* Identify connection */
  91. list_for_each_entry ( conn, &ib_cm_conns, list ) {
  92. if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
  93. continue;
  94. /* Try to send "ready to use" reply */
  95. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
  96. /* Ignore errors */
  97. return;
  98. }
  99. return;
  100. }
  101. DBG ( "CM unidentified connection %08x\n",
  102. ntohl ( connect_rep->remote_id ) );
  103. }
  104. /** Communication management agents */
  105. struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
  106. {
  107. .mgmt_class = IB_MGMT_CLASS_CM,
  108. .class_version = IB_CM_CLASS_VERSION,
  109. .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
  110. .handle = ib_cm_connect_rep,
  111. },
  112. };
  113. /**
  114. * Convert connection rejection reason to return status code
  115. *
  116. * @v reason Rejection reason (in network byte order)
  117. * @ret rc Return status code
  118. */
  119. static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
  120. switch ( reason ) {
  121. case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
  122. return -ENODEV;
  123. case htons ( IB_CM_REJECT_STALE_CONN ) :
  124. return -EALREADY;
  125. case htons ( IB_CM_REJECT_CONSUMER ) :
  126. return -ENOTTY;
  127. default:
  128. return -EPERM;
  129. }
  130. }
  131. /**
  132. * Handle connection request transaction completion
  133. *
  134. * @v ibdev Infiniband device
  135. * @v mi Management interface
  136. * @v madx Management transaction
  137. * @v rc Status code
  138. * @v mad Received MAD (or NULL on error)
  139. * @v av Source address vector (or NULL on error)
  140. */
  141. static void ib_cm_req_complete ( struct ib_device *ibdev,
  142. struct ib_mad_interface *mi,
  143. struct ib_mad_transaction *madx,
  144. int rc, union ib_mad *mad,
  145. struct ib_address_vector *av ) {
  146. struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
  147. struct ib_queue_pair *qp = conn->qp;
  148. struct ib_cm_common *common = &mad->cm.cm_data.common;
  149. struct ib_cm_connect_reply *connect_rep =
  150. &mad->cm.cm_data.connect_reply;
  151. struct ib_cm_connect_reject *connect_rej =
  152. &mad->cm.cm_data.connect_reject;
  153. void *private_data = NULL;
  154. size_t private_data_len = 0;
  155. /* Report failures */
  156. if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
  157. rc = -EIO;
  158. if ( rc != 0 ) {
  159. DBGC ( conn, "CM %p connection request failed: %s\n",
  160. conn, strerror ( rc ) );
  161. goto out;
  162. }
  163. /* Record remote communication ID */
  164. conn->remote_id = ntohl ( common->local_id );
  165. /* Handle response */
  166. switch ( mad->hdr.attr_id ) {
  167. case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
  168. /* Extract fields */
  169. qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
  170. qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
  171. private_data = &connect_rep->private_data;
  172. private_data_len = sizeof ( connect_rep->private_data );
  173. DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
  174. conn, qp->av.qpn, qp->send.psn );
  175. /* Modify queue pair */
  176. if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
  177. DBGC ( conn, "CM %p could not modify queue pair: %s\n",
  178. conn, strerror ( rc ) );
  179. goto out;
  180. }
  181. /* Send "ready to use" reply */
  182. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
  183. /* Treat as non-fatal */
  184. rc = 0;
  185. }
  186. break;
  187. case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
  188. /* Extract fields */
  189. DBGC ( conn, "CM %p connection rejected (reason %d)\n",
  190. conn, ntohs ( connect_rej->reason ) );
  191. /* Private data is valid only for a Consumer Reject */
  192. if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
  193. private_data = &connect_rej->private_data;
  194. private_data_len = sizeof (connect_rej->private_data);
  195. }
  196. rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason );
  197. break;
  198. default:
  199. DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
  200. conn, ntohs ( mad->hdr.attr_id ) );
  201. rc = -ENOTSUP;
  202. break;
  203. }
  204. out:
  205. /* Destroy the completed transaction */
  206. ib_destroy_madx ( ibdev, ibdev->gsi, madx );
  207. conn->madx = NULL;
  208. /* Hand off to the upper completion handler */
  209. conn->op->changed ( ibdev, qp, conn, rc, private_data,
  210. private_data_len );
  211. }
  212. /** Connection request operations */
  213. static struct ib_mad_transaction_operations ib_cm_req_op = {
  214. .complete = ib_cm_req_complete,
  215. };
  216. /**
  217. * Handle connection path transaction completion
  218. *
  219. * @v ibdev Infiniband device
  220. * @v path Path
  221. * @v rc Status code
  222. * @v av Address vector, or NULL on error
  223. */
  224. static void ib_cm_path_complete ( struct ib_device *ibdev,
  225. struct ib_path *path, int rc,
  226. struct ib_address_vector *av ) {
  227. struct ib_connection *conn = ib_path_get_ownerdata ( path );
  228. struct ib_queue_pair *qp = conn->qp;
  229. union ib_mad mad;
  230. struct ib_cm_connect_request *connect_req =
  231. &mad.cm.cm_data.connect_request;
  232. size_t private_data_len;
  233. /* Report failures */
  234. if ( rc != 0 ) {
  235. DBGC ( conn, "CM %p path lookup failed: %s\n",
  236. conn, strerror ( rc ) );
  237. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  238. goto out;
  239. }
  240. /* Update queue pair peer path */
  241. memcpy ( &qp->av, av, sizeof ( qp->av ) );
  242. /* Construct connection request */
  243. memset ( &mad, 0, sizeof ( mad ) );
  244. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  245. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  246. mad.hdr.method = IB_MGMT_METHOD_SEND;
  247. mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
  248. connect_req->local_id = htonl ( conn->local_id );
  249. memcpy ( &connect_req->service_id, &conn->service_id,
  250. sizeof ( connect_req->service_id ) );
  251. ib_get_hca_info ( ibdev, &connect_req->local_ca );
  252. connect_req->local_qpn__responder_resources =
  253. htonl ( ( qp->qpn << 8 ) | 1 );
  254. connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
  255. connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
  256. htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
  257. ( 0 << 0 ) );
  258. connect_req->starting_psn__local_timeout__retry_count =
  259. htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
  260. ( 0x07 << 0 ) );
  261. connect_req->pkey = htons ( ibdev->pkey );
  262. connect_req->payload_mtu__rdc_exists__rnr_retry =
  263. ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
  264. connect_req->max_cm_retries__srq =
  265. ( ( 0x0f << 4 ) | ( 0 << 3 ) );
  266. connect_req->primary.local_lid = htons ( ibdev->lid );
  267. connect_req->primary.remote_lid = htons ( conn->qp->av.lid );
  268. memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
  269. sizeof ( connect_req->primary.local_gid ) );
  270. memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid,
  271. sizeof ( connect_req->primary.remote_gid ) );
  272. connect_req->primary.flow_label__rate =
  273. htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
  274. connect_req->primary.hop_limit = 0;
  275. connect_req->primary.sl__subnet_local =
  276. ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
  277. connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
  278. private_data_len = conn->private_data_len;
  279. if ( private_data_len > sizeof ( connect_req->private_data ) )
  280. private_data_len = sizeof ( connect_req->private_data );
  281. memcpy ( &connect_req->private_data, &conn->private_data,
  282. private_data_len );
  283. /* Create connection request */
  284. av->qpn = IB_QPN_GSI;
  285. av->qkey = IB_QKEY_GSI;
  286. conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
  287. &ib_cm_req_op );
  288. if ( ! conn->madx ) {
  289. DBGC ( conn, "CM %p could not create connection request\n",
  290. conn );
  291. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  292. goto out;
  293. }
  294. ib_madx_set_ownerdata ( conn->madx, conn );
  295. out:
  296. /* Destroy the completed transaction */
  297. ib_destroy_path ( ibdev, path );
  298. conn->path = NULL;
  299. }
  300. /** Connection path operations */
  301. static struct ib_path_operations ib_cm_path_op = {
  302. .complete = ib_cm_path_complete,
  303. };
  304. /**
  305. * Create connection to remote QP
  306. *
  307. * @v ibdev Infiniband device
  308. * @v qp Queue pair
  309. * @v dgid Target GID
  310. * @v service_id Target service ID
  311. * @v private_data Connection request private data
  312. * @v private_data_len Length of connection request private data
  313. * @v op Connection operations
  314. * @ret conn Connection
  315. */
  316. struct ib_connection *
  317. ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  318. struct ib_gid *dgid, struct ib_gid_half *service_id,
  319. void *private_data, size_t private_data_len,
  320. struct ib_connection_operations *op ) {
  321. struct ib_connection *conn;
  322. /* Allocate and initialise request */
  323. conn = zalloc ( sizeof ( *conn ) + private_data_len );
  324. if ( ! conn )
  325. goto err_alloc_conn;
  326. conn->ibdev = ibdev;
  327. conn->qp = qp;
  328. memset ( &qp->av, 0, sizeof ( qp->av ) );
  329. qp->av.gid_present = 1;
  330. memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
  331. conn->local_id = random();
  332. memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
  333. conn->op = op;
  334. conn->private_data_len = private_data_len;
  335. memcpy ( &conn->private_data, private_data, private_data_len );
  336. /* Create path */
  337. conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
  338. if ( ! conn->path )
  339. goto err_create_path;
  340. ib_path_set_ownerdata ( conn->path, conn );
  341. /* Add to list of connections */
  342. list_add ( &conn->list, &ib_cm_conns );
  343. DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
  344. conn, ibdev, qp->qpn );
  345. DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
  346. conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ),
  347. ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ),
  348. ntohl ( service_id->u.dwords[0] ),
  349. ntohl ( service_id->u.dwords[1] ) );
  350. return conn;
  351. ib_destroy_path ( ibdev, conn->path );
  352. err_create_path:
  353. free ( conn );
  354. err_alloc_conn:
  355. return NULL;
  356. }
  357. /**
  358. * Destroy connection to remote QP
  359. *
  360. * @v ibdev Infiniband device
  361. * @v qp Queue pair
  362. * @v conn Connection
  363. */
  364. void ib_destroy_conn ( struct ib_device *ibdev,
  365. struct ib_queue_pair *qp __unused,
  366. struct ib_connection *conn ) {
  367. list_del ( &conn->list );
  368. if ( conn->madx )
  369. ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
  370. if ( conn->path )
  371. ib_destroy_path ( ibdev, conn->path );
  372. free ( conn );
  373. }