Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

ib_cm.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. /*
  2. * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation; either version 2 of the
  7. * License, or any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. */
  18. FILE_LICENCE ( GPL2_OR_LATER );
  19. #include <stdint.h>
  20. #include <stdlib.h>
  21. #include <string.h>
  22. #include <byteswap.h>
  23. #include <errno.h>
  24. #include <assert.h>
  25. #include <ipxe/infiniband.h>
  26. #include <ipxe/ib_mi.h>
  27. #include <ipxe/ib_pathrec.h>
  28. #include <ipxe/ib_cm.h>
  29. /**
  30. * @file
  31. *
  32. * Infiniband communication management
  33. *
  34. */
  35. /** List of connections */
  36. static LIST_HEAD ( ib_cm_conns );
  37. /**
  38. * Find connection by local communication ID
  39. *
  40. * @v local_id Local communication ID
  41. * @ret conn Connection, or NULL
  42. */
  43. static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
  44. struct ib_connection *conn;
  45. list_for_each_entry ( conn, &ib_cm_conns, list ) {
  46. if ( conn->local_id == local_id )
  47. return conn;
  48. }
  49. return NULL;
  50. }
  51. /**
  52. * Send "ready to use" response
  53. *
  54. * @v ibdev Infiniband device
  55. * @v mi Management interface
  56. * @v av Address vector
  57. * @v local_id Local communication ID
  58. * @v remote_id Remote communication ID
  59. * @ret rc Return status code
  60. */
  61. static int ib_cm_send_rtu ( struct ib_device *ibdev,
  62. struct ib_mad_interface *mi,
  63. struct ib_address_vector *av,
  64. uint32_t local_id, uint32_t remote_id ) {
  65. union ib_mad mad;
  66. struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
  67. int rc;
  68. /* Construct "ready to use" response */
  69. memset ( &mad, 0, sizeof ( mad ) );
  70. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  71. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  72. mad.hdr.method = IB_MGMT_METHOD_SEND;
  73. mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
  74. rtu->local_id = htonl ( local_id );
  75. rtu->remote_id = htonl ( remote_id );
  76. if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
  77. DBG ( "CM could not send RTU: %s\n", strerror ( rc ) );
  78. return rc;
  79. }
  80. return 0;
  81. }
  82. /**
  83. * Handle duplicate connection replies
  84. *
  85. * @v ibdev Infiniband device
  86. * @v mi Management interface
  87. * @v mad Received MAD
  88. * @v av Source address vector
  89. * @ret rc Return status code
  90. *
  91. * If a "ready to use" MAD is lost, the peer may resend the connection
  92. * reply. We have to respond to these with duplicate "ready to use"
  93. * MADs, otherwise the peer may time out and drop the connection.
  94. */
  95. static void ib_cm_recv_rep ( struct ib_device *ibdev,
  96. struct ib_mad_interface *mi,
  97. union ib_mad *mad,
  98. struct ib_address_vector *av ) {
  99. struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
  100. struct ib_connection *conn;
  101. uint32_t local_id = ntohl ( rep->remote_id );
  102. int rc;
  103. /* Identify connection */
  104. conn = ib_cm_find ( local_id );
  105. if ( conn ) {
  106. /* Try to send "ready to use" reply */
  107. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
  108. conn->remote_id ) ) != 0 ) {
  109. /* Ignore errors; the remote end will retry */
  110. }
  111. } else {
  112. DBG ( "CM unidentified connection %08x\n", local_id );
  113. }
  114. }
  115. /**
  116. * Send reply to disconnection request
  117. *
  118. * @v ibdev Infiniband device
  119. * @v mi Management interface
  120. * @v av Address vector
  121. * @v local_id Local communication ID
  122. * @v remote_id Remote communication ID
  123. * @ret rc Return status code
  124. */
  125. static int ib_cm_send_drep ( struct ib_device *ibdev,
  126. struct ib_mad_interface *mi,
  127. struct ib_address_vector *av,
  128. uint32_t local_id, uint32_t remote_id ) {
  129. union ib_mad mad;
  130. struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
  131. int rc;
  132. /* Construct reply to disconnection request */
  133. memset ( &mad, 0, sizeof ( mad ) );
  134. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  135. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  136. mad.hdr.method = IB_MGMT_METHOD_SEND;
  137. mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
  138. drep->local_id = htonl ( local_id );
  139. drep->remote_id = htonl ( remote_id );
  140. if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
  141. DBG ( "CM could not send DREP: %s\n", strerror ( rc ) );
  142. return rc;
  143. }
  144. return 0;
  145. }
  146. /**
  147. * Handle disconnection requests
  148. *
  149. * @v ibdev Infiniband device
  150. * @v mi Management interface
  151. * @v mad Received MAD
  152. * @v av Source address vector
  153. * @ret rc Return status code
  154. */
  155. static void ib_cm_recv_dreq ( struct ib_device *ibdev,
  156. struct ib_mad_interface *mi,
  157. union ib_mad *mad,
  158. struct ib_address_vector *av ) {
  159. struct ib_cm_disconnect_request *dreq =
  160. &mad->cm.cm_data.disconnect_request;
  161. struct ib_connection *conn;
  162. uint32_t local_id = ntohl ( dreq->remote_id );
  163. uint32_t remote_id = ntohl ( dreq->local_id );
  164. int rc;
  165. /* Identify connection */
  166. conn = ib_cm_find ( local_id );
  167. if ( conn ) {
  168. /* Notify upper layer */
  169. conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
  170. &dreq->private_data,
  171. sizeof ( dreq->private_data ) );
  172. } else {
  173. DBG ( "CM unidentified connection %08x\n", local_id );
  174. }
  175. /* Send reply */
  176. if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
  177. remote_id ) ) != 0 ) {
  178. /* Ignore errors; the remote end will retry */
  179. }
  180. };
  181. /** Communication management agents */
  182. struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
  183. {
  184. .mgmt_class = IB_MGMT_CLASS_CM,
  185. .class_version = IB_CM_CLASS_VERSION,
  186. .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
  187. .handle = ib_cm_recv_rep,
  188. },
  189. {
  190. .mgmt_class = IB_MGMT_CLASS_CM,
  191. .class_version = IB_CM_CLASS_VERSION,
  192. .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
  193. .handle = ib_cm_recv_dreq,
  194. },
  195. };
  196. /**
  197. * Convert connection rejection reason to return status code
  198. *
  199. * @v reason Rejection reason (in network byte order)
  200. * @ret rc Return status code
  201. */
  202. static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
  203. switch ( reason ) {
  204. case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
  205. return -ENODEV;
  206. case htons ( IB_CM_REJECT_STALE_CONN ) :
  207. return -EALREADY;
  208. case htons ( IB_CM_REJECT_CONSUMER ) :
  209. return -ENOTTY;
  210. default:
  211. return -EPERM;
  212. }
  213. }
  214. /**
  215. * Handle connection request transaction completion
  216. *
  217. * @v ibdev Infiniband device
  218. * @v mi Management interface
  219. * @v madx Management transaction
  220. * @v rc Status code
  221. * @v mad Received MAD (or NULL on error)
  222. * @v av Source address vector (or NULL on error)
  223. */
  224. static void ib_cm_req_complete ( struct ib_device *ibdev,
  225. struct ib_mad_interface *mi,
  226. struct ib_mad_transaction *madx,
  227. int rc, union ib_mad *mad,
  228. struct ib_address_vector *av ) {
  229. struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
  230. struct ib_queue_pair *qp = conn->qp;
  231. struct ib_cm_common *common = &mad->cm.cm_data.common;
  232. struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
  233. struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
  234. void *private_data = NULL;
  235. size_t private_data_len = 0;
  236. /* Report failures */
  237. if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
  238. rc = -EIO;
  239. if ( rc != 0 ) {
  240. DBGC ( conn, "CM %p connection request failed: %s\n",
  241. conn, strerror ( rc ) );
  242. goto out;
  243. }
  244. /* Record remote communication ID */
  245. conn->remote_id = ntohl ( common->local_id );
  246. /* Handle response */
  247. switch ( mad->hdr.attr_id ) {
  248. case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
  249. /* Extract fields */
  250. qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
  251. qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
  252. private_data = &rep->private_data;
  253. private_data_len = sizeof ( rep->private_data );
  254. DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
  255. conn, qp->av.qpn, qp->send.psn );
  256. /* Modify queue pair */
  257. if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
  258. DBGC ( conn, "CM %p could not modify queue pair: %s\n",
  259. conn, strerror ( rc ) );
  260. goto out;
  261. }
  262. /* Send "ready to use" reply */
  263. if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
  264. conn->remote_id ) ) != 0 ) {
  265. /* Treat as non-fatal */
  266. rc = 0;
  267. }
  268. break;
  269. case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
  270. /* Extract fields */
  271. DBGC ( conn, "CM %p connection rejected (reason %d)\n",
  272. conn, ntohs ( rej->reason ) );
  273. /* Private data is valid only for a Consumer Reject */
  274. if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
  275. private_data = &rej->private_data;
  276. private_data_len = sizeof ( rej->private_data );
  277. }
  278. rc = ib_cm_rejection_reason_to_rc ( rej->reason );
  279. break;
  280. default:
  281. DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
  282. conn, ntohs ( mad->hdr.attr_id ) );
  283. rc = -ENOTSUP;
  284. break;
  285. }
  286. out:
  287. /* Destroy the completed transaction */
  288. ib_destroy_madx ( ibdev, ibdev->gsi, madx );
  289. conn->madx = NULL;
  290. /* Hand off to the upper completion handler */
  291. conn->op->changed ( ibdev, qp, conn, rc, private_data,
  292. private_data_len );
  293. }
  294. /** Connection request operations */
  295. static struct ib_mad_transaction_operations ib_cm_req_op = {
  296. .complete = ib_cm_req_complete,
  297. };
  298. /**
  299. * Handle connection path transaction completion
  300. *
  301. * @v ibdev Infiniband device
  302. * @v path Path
  303. * @v rc Status code
  304. * @v av Address vector, or NULL on error
  305. */
  306. static void ib_cm_path_complete ( struct ib_device *ibdev,
  307. struct ib_path *path, int rc,
  308. struct ib_address_vector *av ) {
  309. struct ib_connection *conn = ib_path_get_ownerdata ( path );
  310. struct ib_queue_pair *qp = conn->qp;
  311. union ib_mad mad;
  312. struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
  313. size_t private_data_len;
  314. /* Report failures */
  315. if ( rc != 0 ) {
  316. DBGC ( conn, "CM %p path lookup failed: %s\n",
  317. conn, strerror ( rc ) );
  318. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  319. goto out;
  320. }
  321. /* Update queue pair peer path */
  322. memcpy ( &qp->av, av, sizeof ( qp->av ) );
  323. /* Construct connection request */
  324. memset ( &mad, 0, sizeof ( mad ) );
  325. mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
  326. mad.hdr.class_version = IB_CM_CLASS_VERSION;
  327. mad.hdr.method = IB_MGMT_METHOD_SEND;
  328. mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
  329. req->local_id = htonl ( conn->local_id );
  330. memcpy ( &req->service_id, &conn->service_id,
  331. sizeof ( req->service_id ) );
  332. memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
  333. req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
  334. req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
  335. req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
  336. htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
  337. ( 0 << 0 ) );
  338. req->starting_psn__local_timeout__retry_count =
  339. htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
  340. ( 0x07 << 0 ) );
  341. req->pkey = htons ( ibdev->pkey );
  342. req->payload_mtu__rdc_exists__rnr_retry =
  343. ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
  344. req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
  345. req->primary.local_lid = htons ( ibdev->lid );
  346. req->primary.remote_lid = htons ( conn->qp->av.lid );
  347. memcpy ( &req->primary.local_gid, &ibdev->gid,
  348. sizeof ( req->primary.local_gid ) );
  349. memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
  350. sizeof ( req->primary.remote_gid ) );
  351. req->primary.flow_label__rate =
  352. htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
  353. req->primary.hop_limit = 0;
  354. req->primary.sl__subnet_local =
  355. ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
  356. req->primary.local_ack_timeout = ( 0x13 << 3 );
  357. private_data_len = conn->private_data_len;
  358. if ( private_data_len > sizeof ( req->private_data ) )
  359. private_data_len = sizeof ( req->private_data );
  360. memcpy ( &req->private_data, &conn->private_data, private_data_len );
  361. /* Create connection request */
  362. av->qpn = IB_QPN_GSI;
  363. av->qkey = IB_QKEY_GSI;
  364. conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
  365. &ib_cm_req_op );
  366. if ( ! conn->madx ) {
  367. DBGC ( conn, "CM %p could not create connection request\n",
  368. conn );
  369. conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
  370. goto out;
  371. }
  372. ib_madx_set_ownerdata ( conn->madx, conn );
  373. out:
  374. /* Destroy the completed transaction */
  375. ib_destroy_path ( ibdev, path );
  376. conn->path = NULL;
  377. }
  378. /** Connection path operations */
  379. static struct ib_path_operations ib_cm_path_op = {
  380. .complete = ib_cm_path_complete,
  381. };
  382. /**
  383. * Create connection to remote QP
  384. *
  385. * @v ibdev Infiniband device
  386. * @v qp Queue pair
  387. * @v dgid Target GID
  388. * @v service_id Target service ID
  389. * @v private_data Connection request private data
  390. * @v private_data_len Length of connection request private data
  391. * @v op Connection operations
  392. * @ret conn Connection
  393. */
  394. struct ib_connection *
  395. ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
  396. union ib_gid *dgid, union ib_guid *service_id,
  397. void *private_data, size_t private_data_len,
  398. struct ib_connection_operations *op ) {
  399. struct ib_connection *conn;
  400. /* Allocate and initialise request */
  401. conn = zalloc ( sizeof ( *conn ) + private_data_len );
  402. if ( ! conn )
  403. goto err_alloc_conn;
  404. conn->ibdev = ibdev;
  405. conn->qp = qp;
  406. memset ( &qp->av, 0, sizeof ( qp->av ) );
  407. qp->av.gid_present = 1;
  408. memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
  409. conn->local_id = random();
  410. memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
  411. conn->op = op;
  412. conn->private_data_len = private_data_len;
  413. memcpy ( &conn->private_data, private_data, private_data_len );
  414. /* Create path */
  415. conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
  416. if ( ! conn->path )
  417. goto err_create_path;
  418. ib_path_set_ownerdata ( conn->path, conn );
  419. /* Add to list of connections */
  420. list_add ( &conn->list, &ib_cm_conns );
  421. DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
  422. conn, ibdev, qp->qpn );
  423. DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n",
  424. conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
  425. return conn;
  426. ib_destroy_path ( ibdev, conn->path );
  427. err_create_path:
  428. free ( conn );
  429. err_alloc_conn:
  430. return NULL;
  431. }
  432. /**
  433. * Destroy connection to remote QP
  434. *
  435. * @v ibdev Infiniband device
  436. * @v qp Queue pair
  437. * @v conn Connection
  438. */
  439. void ib_destroy_conn ( struct ib_device *ibdev,
  440. struct ib_queue_pair *qp __unused,
  441. struct ib_connection *conn ) {
  442. list_del ( &conn->list );
  443. if ( conn->madx )
  444. ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
  445. if ( conn->path )
  446. ib_destroy_path ( ibdev, conn->path );
  447. free ( conn );
  448. }