123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- /*
- * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
- FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
- #include <stdint.h>
- #include <stdlib.h>
- #include <string.h>
- #include <byteswap.h>
- #include <errno.h>
- #include <assert.h>
- #include <ipxe/infiniband.h>
- #include <ipxe/ib_mi.h>
- #include <ipxe/ib_pathrec.h>
- #include <ipxe/ib_cm.h>
-
- /**
- * @file
- *
- * Infiniband communication management
- *
- */
-
- /** List of connections */
- static LIST_HEAD ( ib_cm_conns );
-
- /**
- * Find connection by local communication ID
- *
- * @v local_id Local communication ID
- * @ret conn Connection, or NULL
- */
- static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
- struct ib_connection *conn;
-
- list_for_each_entry ( conn, &ib_cm_conns, list ) {
- if ( conn->local_id == local_id )
- return conn;
- }
- return NULL;
- }
-
- /**
- * Send "ready to use" response
- *
- * @v ibdev Infiniband device
- * @v mi Management interface
- * @v av Address vector
- * @v local_id Local communication ID
- * @v remote_id Remote communication ID
- * @ret rc Return status code
- */
- static int ib_cm_send_rtu ( struct ib_device *ibdev,
- struct ib_mad_interface *mi,
- struct ib_address_vector *av,
- uint32_t local_id, uint32_t remote_id ) {
- union ib_mad mad;
- struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
- int rc;
-
- /* Construct "ready to use" response */
- memset ( &mad, 0, sizeof ( mad ) );
- mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
- mad.hdr.class_version = IB_CM_CLASS_VERSION;
- mad.hdr.method = IB_MGMT_METHOD_SEND;
- mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
- rtu->local_id = htonl ( local_id );
- rtu->remote_id = htonl ( remote_id );
- if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
- DBG ( "CM could not send RTU: %s\n", strerror ( rc ) );
- return rc;
- }
-
- return 0;
- }
-
- /**
- * Handle duplicate connection replies
- *
- * @v ibdev Infiniband device
- * @v mi Management interface
- * @v mad Received MAD
- * @v av Source address vector
- * @ret rc Return status code
- *
- * If a "ready to use" MAD is lost, the peer may resend the connection
- * reply. We have to respond to these with duplicate "ready to use"
- * MADs, otherwise the peer may time out and drop the connection.
- */
- static void ib_cm_recv_rep ( struct ib_device *ibdev,
- struct ib_mad_interface *mi,
- union ib_mad *mad,
- struct ib_address_vector *av ) {
- struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
- struct ib_connection *conn;
- uint32_t local_id = ntohl ( rep->remote_id );
- int rc;
-
- /* Identify connection */
- conn = ib_cm_find ( local_id );
- if ( conn ) {
- /* Try to send "ready to use" reply */
- if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
- conn->remote_id ) ) != 0 ) {
- /* Ignore errors; the remote end will retry */
- }
- } else {
- DBG ( "CM unidentified connection %08x\n", local_id );
- }
- }
-
- /**
- * Send reply to disconnection request
- *
- * @v ibdev Infiniband device
- * @v mi Management interface
- * @v av Address vector
- * @v local_id Local communication ID
- * @v remote_id Remote communication ID
- * @ret rc Return status code
- */
- static int ib_cm_send_drep ( struct ib_device *ibdev,
- struct ib_mad_interface *mi,
- struct ib_address_vector *av,
- uint32_t local_id, uint32_t remote_id ) {
- union ib_mad mad;
- struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
- int rc;
-
- /* Construct reply to disconnection request */
- memset ( &mad, 0, sizeof ( mad ) );
- mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
- mad.hdr.class_version = IB_CM_CLASS_VERSION;
- mad.hdr.method = IB_MGMT_METHOD_SEND;
- mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
- drep->local_id = htonl ( local_id );
- drep->remote_id = htonl ( remote_id );
- if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
- DBG ( "CM could not send DREP: %s\n", strerror ( rc ) );
- return rc;
- }
-
- return 0;
- }
-
- /**
- * Handle disconnection requests
- *
- * @v ibdev Infiniband device
- * @v mi Management interface
- * @v mad Received MAD
- * @v av Source address vector
- * @ret rc Return status code
- */
- static void ib_cm_recv_dreq ( struct ib_device *ibdev,
- struct ib_mad_interface *mi,
- union ib_mad *mad,
- struct ib_address_vector *av ) {
- struct ib_cm_disconnect_request *dreq =
- &mad->cm.cm_data.disconnect_request;
- struct ib_connection *conn;
- uint32_t local_id = ntohl ( dreq->remote_id );
- uint32_t remote_id = ntohl ( dreq->local_id );
- int rc;
-
- /* Identify connection */
- conn = ib_cm_find ( local_id );
- if ( conn ) {
- /* Notify upper layer */
- conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
- &dreq->private_data,
- sizeof ( dreq->private_data ) );
- } else {
- DBG ( "CM unidentified connection %08x\n", local_id );
- }
-
- /* Send reply */
- if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
- remote_id ) ) != 0 ) {
- /* Ignore errors; the remote end will retry */
- }
- };
-
- /** Communication management agents */
- struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
- {
- .mgmt_class = IB_MGMT_CLASS_CM,
- .class_version = IB_CM_CLASS_VERSION,
- .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
- .handle = ib_cm_recv_rep,
- },
- {
- .mgmt_class = IB_MGMT_CLASS_CM,
- .class_version = IB_CM_CLASS_VERSION,
- .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
- .handle = ib_cm_recv_dreq,
- },
- };
-
- /**
- * Convert connection rejection reason to return status code
- *
- * @v reason Rejection reason (in network byte order)
- * @ret rc Return status code
- */
- static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
- switch ( reason ) {
- case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
- return -ENODEV;
- case htons ( IB_CM_REJECT_STALE_CONN ) :
- return -EALREADY;
- case htons ( IB_CM_REJECT_CONSUMER ) :
- return -ENOTTY;
- default:
- return -EPERM;
- }
- }
-
- /**
- * Handle connection request transaction completion
- *
- * @v ibdev Infiniband device
- * @v mi Management interface
- * @v madx Management transaction
- * @v rc Status code
- * @v mad Received MAD (or NULL on error)
- * @v av Source address vector (or NULL on error)
- */
- static void ib_cm_req_complete ( struct ib_device *ibdev,
- struct ib_mad_interface *mi,
- struct ib_mad_transaction *madx,
- int rc, union ib_mad *mad,
- struct ib_address_vector *av ) {
- struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
- struct ib_queue_pair *qp = conn->qp;
- struct ib_cm_common *common = &mad->cm.cm_data.common;
- struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
- struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
- void *private_data = NULL;
- size_t private_data_len = 0;
-
- /* Report failures */
- if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
- rc = -EIO;
- if ( rc != 0 ) {
- DBGC ( conn, "CM %p connection request failed: %s\n",
- conn, strerror ( rc ) );
- goto out;
- }
-
- /* Record remote communication ID */
- conn->remote_id = ntohl ( common->local_id );
-
- /* Handle response */
- switch ( mad->hdr.attr_id ) {
-
- case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
- /* Extract fields */
- qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
- qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
- private_data = &rep->private_data;
- private_data_len = sizeof ( rep->private_data );
- DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
- conn, qp->av.qpn, qp->send.psn );
-
- /* Modify queue pair */
- if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
- DBGC ( conn, "CM %p could not modify queue pair: %s\n",
- conn, strerror ( rc ) );
- goto out;
- }
-
- /* Send "ready to use" reply */
- if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
- conn->remote_id ) ) != 0 ) {
- /* Treat as non-fatal */
- rc = 0;
- }
- break;
-
- case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
- /* Extract fields */
- DBGC ( conn, "CM %p connection rejected (reason %d)\n",
- conn, ntohs ( rej->reason ) );
- /* Private data is valid only for a Consumer Reject */
- if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
- private_data = &rej->private_data;
- private_data_len = sizeof ( rej->private_data );
- }
- rc = ib_cm_rejection_reason_to_rc ( rej->reason );
- break;
-
- default:
- DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
- conn, ntohs ( mad->hdr.attr_id ) );
- rc = -ENOTSUP;
- break;
- }
-
- out:
- /* Destroy the completed transaction */
- ib_destroy_madx ( ibdev, ibdev->gsi, madx );
- conn->madx = NULL;
-
- /* Hand off to the upper completion handler */
- conn->op->changed ( ibdev, qp, conn, rc, private_data,
- private_data_len );
- }
-
- /** Connection request operations */
- static struct ib_mad_transaction_operations ib_cm_req_op = {
- .complete = ib_cm_req_complete,
- };
-
- /**
- * Handle connection path transaction completion
- *
- * @v ibdev Infiniband device
- * @v path Path
- * @v rc Status code
- * @v av Address vector, or NULL on error
- */
- static void ib_cm_path_complete ( struct ib_device *ibdev,
- struct ib_path *path, int rc,
- struct ib_address_vector *av ) {
- struct ib_connection *conn = ib_path_get_ownerdata ( path );
- struct ib_queue_pair *qp = conn->qp;
- union ib_mad mad;
- struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
- size_t private_data_len;
-
- /* Report failures */
- if ( rc != 0 ) {
- DBGC ( conn, "CM %p path lookup failed: %s\n",
- conn, strerror ( rc ) );
- conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
- goto out;
- }
-
- /* Update queue pair peer path */
- memcpy ( &qp->av, av, sizeof ( qp->av ) );
-
- /* Construct connection request */
- memset ( &mad, 0, sizeof ( mad ) );
- mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
- mad.hdr.class_version = IB_CM_CLASS_VERSION;
- mad.hdr.method = IB_MGMT_METHOD_SEND;
- mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
- req->local_id = htonl ( conn->local_id );
- memcpy ( &req->service_id, &conn->service_id,
- sizeof ( req->service_id ) );
- memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
- req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
- req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
- req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
- htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
- ( 0 << 0 ) );
- req->starting_psn__local_timeout__retry_count =
- htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
- ( 0x07 << 0 ) );
- req->pkey = htons ( ibdev->pkey );
- req->payload_mtu__rdc_exists__rnr_retry =
- ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
- req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
- req->primary.local_lid = htons ( ibdev->lid );
- req->primary.remote_lid = htons ( conn->qp->av.lid );
- memcpy ( &req->primary.local_gid, &ibdev->gid,
- sizeof ( req->primary.local_gid ) );
- memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
- sizeof ( req->primary.remote_gid ) );
- req->primary.flow_label__rate =
- htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
- req->primary.hop_limit = 0;
- req->primary.sl__subnet_local =
- ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
- req->primary.local_ack_timeout = ( 0x13 << 3 );
- private_data_len = conn->private_data_len;
- if ( private_data_len > sizeof ( req->private_data ) )
- private_data_len = sizeof ( req->private_data );
- memcpy ( &req->private_data, &conn->private_data, private_data_len );
-
- /* Create connection request */
- av->qpn = IB_QPN_GSI;
- av->qkey = IB_QKEY_GSI;
- conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
- &ib_cm_req_op );
- if ( ! conn->madx ) {
- DBGC ( conn, "CM %p could not create connection request\n",
- conn );
- conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
- goto out;
- }
- ib_madx_set_ownerdata ( conn->madx, conn );
-
- out:
- /* Destroy the completed transaction */
- ib_destroy_path ( ibdev, path );
- conn->path = NULL;
- }
-
- /** Connection path operations */
- static struct ib_path_operations ib_cm_path_op = {
- .complete = ib_cm_path_complete,
- };
-
- /**
- * Create connection to remote QP
- *
- * @v ibdev Infiniband device
- * @v qp Queue pair
- * @v dgid Target GID
- * @v service_id Target service ID
- * @v private_data Connection request private data
- * @v private_data_len Length of connection request private data
- * @v op Connection operations
- * @ret conn Connection
- */
- struct ib_connection *
- ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
- union ib_gid *dgid, union ib_guid *service_id,
- void *private_data, size_t private_data_len,
- struct ib_connection_operations *op ) {
- struct ib_connection *conn;
-
- /* Allocate and initialise request */
- conn = zalloc ( sizeof ( *conn ) + private_data_len );
- if ( ! conn )
- goto err_alloc_conn;
- conn->ibdev = ibdev;
- conn->qp = qp;
- memset ( &qp->av, 0, sizeof ( qp->av ) );
- qp->av.gid_present = 1;
- memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
- conn->local_id = random();
- memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
- conn->op = op;
- conn->private_data_len = private_data_len;
- memcpy ( &conn->private_data, private_data, private_data_len );
-
- /* Create path */
- conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
- if ( ! conn->path )
- goto err_create_path;
- ib_path_set_ownerdata ( conn->path, conn );
-
- /* Add to list of connections */
- list_add ( &conn->list, &ib_cm_conns );
-
- DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
- conn, ibdev, qp->qpn );
- DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n",
- conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
-
- return conn;
-
- ib_destroy_path ( ibdev, conn->path );
- err_create_path:
- free ( conn );
- err_alloc_conn:
- return NULL;
- }
-
- /**
- * Destroy connection to remote QP
- *
- * @v ibdev Infiniband device
- * @v qp Queue pair
- * @v conn Connection
- */
- void ib_destroy_conn ( struct ib_device *ibdev,
- struct ib_queue_pair *qp __unused,
- struct ib_connection *conn ) {
-
- list_del ( &conn->list );
- if ( conn->madx )
- ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
- if ( conn->path )
- ib_destroy_path ( ibdev, conn->path );
- free ( conn );
- }
|