瀏覽代碼

[infiniband] Handle duplicate Communication Management REPs

We will terminate our transaction as soon as we receive the first CM
REP, since that provides all the state that we need.  However, the
peer may resend the REP if it didn't see our RTU, and if we don't
respond with another RTU we risk being disconnected.  (This protocol
appears not to handle retries gracefully.)

Fix by adding a management agent that will listen for these duplicate
REPs and send back an RTU.
tags/v0.9.8
Michael Brown 15 年之前
父節點
當前提交
46073f1239
共有 2 個檔案被更改,包括 58 行新增0 行删除
  1. 3
    0
      src/include/gpxe/ib_cm.h
  2. 55
    0
      src/net/infiniband/ib_cm.c

+ 3
- 0
src/include/gpxe/ib_cm.h 查看文件

46
 	/** Connection operations */
46
 	/** Connection operations */
47
 	struct ib_connection_operations *op;
47
 	struct ib_connection_operations *op;
48
 
48
 
49
+	/** List of connections */
50
+	struct list_head list;
51
+
49
 	/** Path to target */
52
 	/** Path to target */
50
 	struct ib_path *path;
53
 	struct ib_path *path;
51
 	/** Connection request management transaction */
54
 	/** Connection request management transaction */

+ 55
- 0
src/net/infiniband/ib_cm.c 查看文件

36
  *
36
  *
37
  */
37
  */
38
 
38
 
39
+/** List of connections */
40
+static LIST_HEAD ( ib_cm_conns );
41
+
39
 /**
42
 /**
40
  * Send "ready to use" response
43
  * Send "ready to use" response
41
  *
44
  *
71
 	return 0;
74
 	return 0;
72
 }
75
 }
73
 
76
 
77
+/**
78
+ * Handle duplicate connection replies
79
+ *
80
+ * @v ibdev		Infiniband device
81
+ * @v mi		Management interface
82
+ * @v mad		Received MAD
83
+ * @v av		Source address vector
84
+ * @ret rc		Return status code
85
+ *
86
+ * If a "ready to use" MAD is lost, the peer may resend the connection
87
+ * reply.  We have to respond to these with duplicate "ready to use"
88
+ * MADs, otherwise the peer may time out and drop the connection.
89
+ */
90
+static void ib_cm_connect_rep ( struct ib_device *ibdev,
91
+				struct ib_mad_interface *mi,
92
+				union ib_mad *mad,
93
+				struct ib_address_vector *av ) {
94
+	struct ib_cm_connect_reply *connect_rep =
95
+		&mad->cm.cm_data.connect_reply;
96
+	struct ib_connection *conn;
97
+	int rc;
98
+
99
+	/* Identify connection */
100
+	list_for_each_entry ( conn, &ib_cm_conns, list ) {
101
+		if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
102
+			continue;
103
+		/* Try to send "ready to use" reply */
104
+		if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
105
+			/* Ignore errors */
106
+			return;
107
+		}
108
+		return;
109
+	}
110
+
111
+	DBG ( "CM unidentified connection %08x\n",
112
+	      ntohl ( connect_rep->remote_id ) );
113
+}
114
+
115
+/** Communication management agents */
116
+struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
117
+	{
118
+		.mgmt_class = IB_MGMT_CLASS_CM,
119
+		.class_version = IB_CM_CLASS_VERSION,
120
+		.attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
121
+		.handle = ib_cm_connect_rep,
122
+	},
123
+};
124
+
74
 /**
125
 /**
75
  * Handle connection request transaction completion
126
  * Handle connection request transaction completion
76
  *
127
  *
296
 		goto err_create_path;
347
 		goto err_create_path;
297
 	ib_path_set_ownerdata ( conn->path, conn );
348
 	ib_path_set_ownerdata ( conn->path, conn );
298
 
349
 
350
+	/* Add to list of connections */
351
+	list_add ( &conn->list, &ib_cm_conns );
352
+
299
 	DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
353
 	DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
300
 	       conn, ibdev, qp->qpn );
354
 	       conn, ibdev, qp->qpn );
301
 	DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
355
 	DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
324
 		       struct ib_queue_pair *qp __unused,
378
 		       struct ib_queue_pair *qp __unused,
325
 		       struct ib_connection *conn ) {
379
 		       struct ib_connection *conn ) {
326
 
380
 
381
+	list_del ( &conn->list );
327
 	if ( conn->madx )
382
 	if ( conn->madx )
328
 		ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
383
 		ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
329
 	if ( conn->path )
384
 	if ( conn->path )

Loading…
取消
儲存