Browse Source

[infiniband] Add Communication Manager (CM)

The Communication Manager is responsible for handling the setup and
teardown of RC connections.
tags/v0.9.8
Michael Brown 15 years ago
parent
commit
cc2e767b5a
4 changed files with 524 additions and 0 deletions
  1. 1
    0
      src/include/gpxe/errfile.h
  2. 21
    0
      src/include/gpxe/ib_cm.h
  3. 173
    0
      src/include/gpxe/ib_mad.h
  4. 329
    0
      src/net/infiniband/ib_cm.c

+ 1
- 0
src/include/gpxe/errfile.h View File

@@ -147,6 +147,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
147 147
 #define ERRFILE_ib_gma			( ERRFILE_NET | 0x001b0000 )
148 148
 #define ERRFILE_ib_pathrec		( ERRFILE_NET | 0x001c0000 )
149 149
 #define ERRFILE_ib_mcast		( ERRFILE_NET | 0x001d0000 )
150
+#define ERRFILE_ib_cm			( ERRFILE_NET | 0x001e0000 )
150 151
 
151 152
 #define ERRFILE_image		      ( ERRFILE_IMAGE | 0x00000000 )
152 153
 #define ERRFILE_elf		      ( ERRFILE_IMAGE | 0x00010000 )

+ 21
- 0
src/include/gpxe/ib_cm.h View File

@@ -0,0 +1,21 @@
1
+#ifndef _GPXE_IB_CM_H
2
+#define _GPXE_IB_CM_H
3
+
4
+/** @file
5
+ *
6
+ * Infiniband communication management
7
+ *
8
+ */
9
+
10
+FILE_LICENCE ( GPL2_OR_LATER );
11
+
12
+#include <gpxe/infiniband.h>
13
+
14
+extern int ib_cm_connect ( struct ib_queue_pair *qp, struct ib_gid *dgid,
15
+			   struct ib_gid_half *service_id,
16
+			   void *private_data, size_t private_data_len,
17
+			   void ( * notify ) ( struct ib_queue_pair *qp,
18
+					       int rc, void *private_data,
19
+					       size_t private_data_len ) );
20
+
21
+#endif /* _GPXE_IB_CM_H */

+ 173
- 0
src/include/gpxe/ib_mad.h View File

@@ -282,6 +282,172 @@ union ib_sa_data {
282 282
 	struct ib_mc_member_record mc_member_record;
283 283
 } __attribute__ (( packed ));
284 284
 
285
+/*****************************************************************************
286
+ *
287
+ * Communication management MADs
288
+ *
289
+ *****************************************************************************
290
+ */
291
+
292
+/** Communication management class version */
293
+#define IB_CM_CLASS_VERSION			2
294
+
295
+/* Communication management attributes */
296
+#define IB_CM_ATTR_CLASS_PORT_INFO		0x0001
297
+#define IB_CM_ATTR_CONNECT_REQUEST		0x0010
298
+#define IB_CM_ATTR_MSG_RCPT_ACK			0x0011
299
+#define IB_CM_ATTR_CONNECT_REJECT		0x0012
300
+#define IB_CM_ATTR_CONNECT_REPLY		0x0013
301
+#define IB_CM_ATTR_READY_TO_USE			0x0014
302
+#define IB_CM_ATTR_DISCONNECT_REQUEST		0x0015
303
+#define IB_CM_ATTR_DISCONNECT_REPLY		0x0016
304
+#define IB_CM_ATTR_SERVICE_ID_RES_REQ		0x0016
305
+#define IB_CM_ATTR_SERVICE_ID_RES_REQ_RESP	0x0018
306
+#define IB_CM_ATTR_LOAD_ALTERNATE_PATH		0x0019
307
+#define IB_CM_ATTR_ALTERNATE_PATH_RESPONSE	0x001a
308
+
309
+/** A communication management path */
310
+struct ib_cm_path {
311
+	/** Local port LID */
312
+	uint16_t local_lid;
313
+	/** Remote port LID */
314
+	uint16_t remote_lid;
315
+	/** Local port GID */
316
+	struct ib_gid local_gid;
317
+	/** Remote port GID */
318
+	struct ib_gid remote_gid;
319
+	/** Flow label and rate */
320
+	uint32_t flow_label__rate;
321
+	/** Traffic class */
322
+	uint8_t tc;
323
+	/** Hop limit */
324
+	uint8_t hop_limit;
325
+	/** SL and subnet local*/
326
+	uint8_t sl__subnet_local;
327
+	/** Local ACK timeout */
328
+	uint8_t local_ack_timeout;
329
+} __attribute__ (( packed ));
330
+
331
+/** A communication management connection request
332
+ *
333
+ * Defined in section 12.6.5 of the IBA.
334
+ */
335
+struct ib_cm_connect_request {
336
+	/** Local communication ID */
337
+	uint32_t local_id;
338
+	/** Reserved */
339
+	uint32_t reserved0[1];
340
+	/** Service ID */
341
+	struct ib_gid_half service_id;
342
+	/** Local CA GUID */
343
+	struct ib_gid_half local_ca;
344
+	/** Reserved */
345
+	uint32_t reserved1[1];
346
+	/** Local queue key */
347
+	uint32_t local_qkey;
348
+	/** Local QPN and responder resources*/
349
+	uint32_t local_qpn__responder_resources;
350
+	/** Local EECN and initiator depth */
351
+	uint32_t local_eecn__initiator_depth;
352
+	/** Remote EECN, remote CM response timeout, transport service
353
+	 * type, EE flow control
354
+	 */
355
+	uint32_t remote_eecn__remote_timeout__service_type__ee_flow_ctrl;
356
+	/** Starting PSN, local CM response timeout and retry count */
357
+	uint32_t starting_psn__local_timeout__retry_count;
358
+	/** Partition key */
359
+	uint16_t pkey;
360
+	/** Path packet payload MTU, RDC exists, RNR retry count */
361
+	uint8_t payload_mtu__rdc_exists__rnr_retry;
362
+	/** Max CM retries and SRQ */
363
+	uint8_t max_cm_retries__srq;
364
+	/** Primary path */
365
+	struct ib_cm_path primary;
366
+	/** Alternate path */
367
+	struct ib_cm_path alternate;
368
+	/** Private data */
369
+	uint8_t private_data[92];
370
+} __attribute__ (( packed ));
371
+
372
+/** CM transport types */
373
+#define IB_CM_TRANSPORT_RC		0
374
+#define IB_CM_TRANSPORT_UC		1
375
+#define IB_CM_TRANSPORT_RD		2
376
+
377
+/** A communication management connection rejection
378
+ *
379
+ * Defined in section 12.6.7 of the IBA.
380
+ */
381
+struct ib_cm_connect_reject {
382
+	/** Local communication ID */
383
+	uint32_t local_id;
384
+	/** Remote communication ID */
385
+	uint32_t remote_id;
386
+	/** Message rejected */
387
+	uint8_t message;
388
+	/** Reject information length */
389
+	uint8_t info_len;
390
+	/** Rejection reason */
391
+	uint16_t reason;
392
+	/** Additional rejection information */
393
+	uint8_t info[72];
394
+	/** Private data */
395
+	uint8_t private_data[148];
396
+} __attribute__ (( packed ));
397
+
398
+/** A communication management connection reply
399
+ *
400
+ * Defined in section 12.6.8 of the IBA.
401
+ */
402
+struct ib_cm_connect_reply {
403
+	/** Local communication ID */
404
+	uint32_t local_id;
405
+	/** Remote communication ID */
406
+	uint32_t remote_id;
407
+	/** Local queue key */
408
+	uint32_t local_qkey;
409
+	/** Local QPN */
410
+	uint32_t local_qpn;
411
+	/** Local EECN */
412
+	uint32_t local_eecn;
413
+	/** Starting PSN */
414
+	uint32_t starting_psn;
415
+	/** Responder resources */
416
+	uint8_t responder_resources;
417
+	/** Initiator depth */
418
+	uint8_t initiator_depth;
419
+	/** Target ACK delay, failover accepted, and end-to-end flow control */
420
+	uint8_t target_ack_delay__failover_accepted__ee_flow_ctrl;
421
+	/** RNR retry count, SRQ */
422
+	uint8_t rnr_retry__srq;
423
+	/** Local CA GUID */
424
+	struct ib_gid_half local_ca;
425
+	/** Private data */
426
+	uint8_t private_data[196];
427
+} __attribute__ (( packed ));
428
+
429
+/** A communication management ready to use reply
430
+ *
431
+ * Defined in section 12.6.9 of the IBA.
432
+ */
433
+struct ib_cm_ready_to_use {
434
+	/** Local communication ID */
435
+	uint32_t local_id;
436
+	/** Remote communication ID */
437
+	uint32_t remote_id;
438
+	/** Private data */
439
+	uint8_t private_data[224];
440
+} __attribute__ (( packed ));
441
+
442
+/** A communication management attribute */
443
+union ib_cm_data {
444
+	struct ib_cm_connect_request connect_request;
445
+	struct ib_cm_connect_reject connect_reject;
446
+	struct ib_cm_connect_reply connect_reply;
447
+	struct ib_cm_ready_to_use ready_to_use;
448
+	uint8_t bytes[232];
449
+} __attribute__ (( packed ));
450
+
285 451
 /*****************************************************************************
286 452
  *
287 453
  * MADs
@@ -362,11 +528,18 @@ struct ib_mad_sa {
362 528
 	union ib_sa_data sa_data;
363 529
 } __attribute__ (( packed ));
364 530
 
531
+/** A communication management MAD */
532
+struct ib_mad_cm {
533
+	struct ib_mad_hdr mad_hdr;
534
+	union ib_cm_data cm_data;
535
+} __attribute__ (( packed ));
536
+
365 537
 /** A management datagram */
366 538
 union ib_mad {
367 539
 	struct ib_mad_hdr hdr;
368 540
 	struct ib_mad_smp smp;
369 541
 	struct ib_mad_sa sa;
542
+	struct ib_mad_cm cm;
370 543
 	uint8_t bytes[256];
371 544
 } __attribute__ (( packed ));
372 545
 

+ 329
- 0
src/net/infiniband/ib_cm.c View File

@@ -0,0 +1,329 @@
1
+/*
2
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
+ */
18
+
19
+FILE_LICENCE ( GPL2_OR_LATER );
20
+
21
+#include <stdint.h>
22
+#include <stdlib.h>
23
+#include <string.h>
24
+#include <byteswap.h>
25
+#include <errno.h>
26
+#include <assert.h>
27
+#include <gpxe/list.h>
28
+#include <gpxe/process.h>
29
+#include <gpxe/infiniband.h>
30
+#include <gpxe/ib_gma.h>
31
+#include <gpxe/ib_pathrec.h>
32
+#include <gpxe/ib_cm.h>
33
+
34
+/**
35
+ * @file
36
+ *
37
+ * Infiniband communication management
38
+ *
39
+ */
40
+
41
+/** An outstanding connection request */
42
+struct ib_cm_request {
43
+	/** List of all outstanding requests */
44
+	struct list_head list;
45
+	/** Local communication ID */
46
+	uint32_t local_id;
47
+	/** Remote communication ID */
48
+	uint32_t remote_id;
49
+	/** Queue pair */
50
+	struct ib_queue_pair *qp;
51
+	/** Target service ID */
52
+	struct ib_gid_half service_id;
53
+	/** Connection process */
54
+	struct process process;
55
+	/** Notification handler
56
+	 *
57
+	 * @v qp		Queue pair
58
+	 * @v rc		Connection status code
59
+	 * @v private_data	Private data
60
+	 * @v private_data_len	Length of private data
61
+	 */
62
+	void ( * notify ) ( struct ib_queue_pair *qp, int rc,
63
+			    void *private_data, size_t private_data_len );
64
+	/** Private data length */
65
+	size_t private_data_len;
66
+	/** Private data */
67
+	uint8_t private_data[0];
68
+};
69
+
70
+/** List of all outstanding connection requests */
71
+static LIST_HEAD ( ib_cm_requests );
72
+
73
+/**
74
+ * Send connection request
75
+ *
76
+ * @v request		Connection request
77
+ * @ret rc		Return status code
78
+ */
79
+static int ib_cm_send_request ( struct ib_cm_request *request ) {
80
+	struct ib_queue_pair *qp = request->qp;
81
+	struct ib_device *ibdev = qp->ibdev;
82
+	struct ib_gma *gma = ibdev->gma;
83
+	union ib_mad mad;
84
+	struct ib_mad_cm *cm = &mad.cm;
85
+	struct ib_cm_connect_request *connect_req =
86
+		&cm->cm_data.connect_request;
87
+	size_t private_data_len;
88
+	int rc;
89
+
90
+	/* Construct connection request */
91
+	memset ( cm, 0, sizeof ( *cm ) );
92
+	cm->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
93
+	cm->mad_hdr.mgmt_class = IB_MGMT_CLASS_CM;
94
+	cm->mad_hdr.class_version = IB_CM_CLASS_VERSION;
95
+	cm->mad_hdr.method = IB_MGMT_METHOD_SEND;
96
+	cm->mad_hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
97
+	connect_req->local_id = htonl ( request->local_id );
98
+	memcpy ( &connect_req->service_id, &request->service_id,
99
+		 sizeof ( connect_req->service_id ) );
100
+	ib_get_hca_info ( ibdev, &connect_req->local_ca );
101
+	connect_req->local_qpn__responder_resources =
102
+		htonl ( ( qp->qpn << 8 ) | 1 );
103
+	connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
104
+	connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
105
+		htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
106
+			( 0 << 0 ) );
107
+	connect_req->starting_psn__local_timeout__retry_count =
108
+		htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
109
+			( 0x07 << 0 ) );
110
+	connect_req->pkey = htons ( ibdev->pkey );
111
+	connect_req->payload_mtu__rdc_exists__rnr_retry =
112
+		( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
113
+	connect_req->max_cm_retries__srq =
114
+		( ( 0x0f << 4 ) | ( 0 << 3 ) );
115
+	connect_req->primary.local_lid = htons ( ibdev->lid );
116
+	connect_req->primary.remote_lid = htons ( request->qp->av.lid );
117
+	memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
118
+		 sizeof ( connect_req->primary.local_gid ) );
119
+	memcpy ( &connect_req->primary.remote_gid, &request->qp->av.gid,
120
+		 sizeof ( connect_req->primary.remote_gid ) );
121
+	connect_req->primary.flow_label__rate =
122
+		htonl ( ( 0 << 12 ) | ( request->qp->av.rate << 0 ) );
123
+	connect_req->primary.hop_limit = 0;
124
+	connect_req->primary.sl__subnet_local =
125
+		( ( request->qp->av.sl << 4 ) | ( 1 << 3 ) );
126
+	connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
127
+	private_data_len = request->private_data_len;
128
+	if ( private_data_len > sizeof ( connect_req->private_data ) )
129
+		private_data_len = sizeof ( connect_req->private_data );
130
+	memcpy ( &connect_req->private_data, &request->private_data,
131
+		 private_data_len );
132
+
133
+	/* Send request */
134
+	if ( ( rc = ib_gma_request ( gma, &mad, NULL, 1 ) ) != 0 ) {
135
+		DBGC ( gma, "GMA %p could not send connection request: %s\n",
136
+		       gma, strerror ( rc ) );
137
+		return rc;
138
+	}
139
+
140
+	return 0;
141
+
142
+}
143
+
144
+/**
145
+ * Connection request process step
146
+ *
147
+ * @v process		Connection request process
148
+ */
149
+static void ib_cm_step ( struct process *process ) {
150
+	struct ib_cm_request *request =
151
+		container_of ( process, struct ib_cm_request, process );
152
+	struct ib_queue_pair *qp = request->qp;
153
+	struct ib_device *ibdev = qp->ibdev;
154
+	int rc;
155
+
156
+	/* Wait until path can be resolved */
157
+	if ( ( rc = ib_resolve_path ( ibdev, &request->qp->av ) ) != 0 )
158
+		return;
159
+
160
+	/* Wait until request can be sent */
161
+	if ( ( rc = ib_cm_send_request ( request ) ) != 0 )
162
+		return;
163
+
164
+	/* Stop process */
165
+	process_del ( process );
166
+}
167
+
168
+/**
169
+ * Identify connection request by communication ID
170
+ *
171
+ * @v local_id		Local communication ID
172
+ * @v remote_id		Remote communication ID
173
+ * @ret request		Connection request, or NULL
174
+ */
175
+static struct ib_cm_request * ib_cm_find_request ( uint32_t local_id,
176
+						   uint32_t remote_id ) {
177
+	struct ib_cm_request *request;
178
+
179
+	list_for_each_entry ( request, &ib_cm_requests, list ) {
180
+		if ( request->local_id == local_id ) {
181
+			request->remote_id = remote_id;
182
+			return request;
183
+		}
184
+	}
185
+	return NULL;
186
+}
187
+
188
+/**
189
+ * Handle connection reply
190
+ *
191
+ * @v gma		General management agent
192
+ * @v mad		MAD
193
+ * @ret response	MAD response
194
+ */
195
+static union ib_mad * ib_cm_connect_reply ( struct ib_gma *gma,
196
+					    union ib_mad *mad ) {
197
+	struct ib_cm_connect_reply *connect_rep =
198
+		&mad->cm.cm_data.connect_reply;
199
+	struct ib_cm_ready_to_use *ready =
200
+		&mad->cm.cm_data.ready_to_use;
201
+	struct ib_cm_request *request;
202
+	int rc;
203
+
204
+	/* Identify request */
205
+	request = ib_cm_find_request ( ntohl ( connect_rep->remote_id ),
206
+				       ntohl ( connect_rep->local_id ) );
207
+	if ( ! request ) {
208
+		DBGC ( gma, "GMA %p received connection reply with unknown "
209
+		       "ID %08x\n", gma, ntohl ( connect_rep->remote_id ) );
210
+		return NULL;
211
+	}
212
+
213
+	/* Extract fields */
214
+	request->qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
215
+	request->qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
216
+	DBGC ( gma, "GMA %p QPN %lx connected to QPN %lx PSN %x\n", gma,
217
+	       request->qp->qpn, request->qp->av.qpn, request->qp->send.psn );
218
+
219
+	/* Modify queue pair */
220
+	if ( ( rc = ib_modify_qp ( request->qp->ibdev, request->qp ) ) != 0 ) {
221
+		DBGC ( gma, "GMA %p QPN %lx could not modify queue pair: %s\n",
222
+		       gma, request->qp->qpn, strerror ( rc ) );
223
+		return NULL;
224
+	}
225
+
226
+	/* Inform recipient that we are now connected */
227
+	request->notify ( request->qp, 0, &connect_rep->private_data,
228
+			  sizeof ( connect_rep->private_data ) );
229
+
230
+	/* Construct ready to use reply */
231
+	mad->hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
232
+	memset ( ready, 0, sizeof ( *ready ) );
233
+	ready->local_id = htonl ( request->local_id );
234
+	ready->remote_id = htonl ( request->remote_id );
235
+
236
+	return mad;
237
+}
238
+
239
+/**
240
+ * Handle connection rejection
241
+ *
242
+ * @v gma		General management agent
243
+ * @v mad		MAD
244
+ * @ret response	MAD response
245
+ */
246
+static union ib_mad * ib_cm_connect_reject ( struct ib_gma *gma,
247
+					     union ib_mad *mad ) {
248
+	struct ib_cm_connect_reject *connect_rej =
249
+		&mad->cm.cm_data.connect_reject;
250
+	struct ib_cm_request *request;
251
+	uint16_t reason;
252
+
253
+	/* Identify request */
254
+	request = ib_cm_find_request ( ntohl ( connect_rej->remote_id ),
255
+				       ntohl ( connect_rej->local_id ) );
256
+	if ( ! request ) {
257
+		DBGC ( gma, "GMA %p received connection rejection with "
258
+		       "unknown ID %08x\n", gma,
259
+		       ntohl ( connect_rej->remote_id ) );
260
+		return NULL;
261
+	}
262
+
263
+	/* Extract fields */
264
+	reason = ntohs ( connect_rej->reason );
265
+	DBGC ( gma, "GMA %p QPN %lx connection rejected (reason %d)\n",
266
+	       gma, request->qp->qpn, reason );
267
+
268
+	/* Inform recipient that we are now disconnected */
269
+	request->notify ( request->qp, -ENOTCONN, &connect_rej->private_data,
270
+			  sizeof ( connect_rej->private_data ) );
271
+
272
+	return NULL;
273
+}
274
+
275
+/** Communication management MAD handlers */
276
+struct ib_gma_handler ib_cm_handlers[] __ib_gma_handler = {
277
+	{
278
+		.mgmt_class = IB_MGMT_CLASS_CM,
279
+		.class_version = IB_CM_CLASS_VERSION,
280
+		.method = IB_MGMT_METHOD_SEND,
281
+		.attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
282
+		.handle = ib_cm_connect_reply,
283
+	},
284
+	{
285
+		.mgmt_class = IB_MGMT_CLASS_CM,
286
+		.class_version = IB_CM_CLASS_VERSION,
287
+		.method = IB_MGMT_METHOD_SEND,
288
+		.attr_id = htons ( IB_CM_ATTR_CONNECT_REJECT ),
289
+		.handle = ib_cm_connect_reject,
290
+	},
291
+};
292
+
293
+/**
294
+ * Connect to remote QP
295
+ *
296
+ * @v qp		Queue pair
297
+ * @v dgid		Target GID
298
+ * @v service_id	Target service ID
299
+ * @v private_data	Private data
300
+ * @v private_data_len	Length of private data
301
+ * @ret rc		Return status code
302
+ */
303
+int ib_cm_connect ( struct ib_queue_pair *qp, struct ib_gid *dgid,
304
+		    struct ib_gid_half *service_id,
305
+		    void *private_data, size_t private_data_len,
306
+		    void ( * notify ) ( struct ib_queue_pair *qp, int rc,
307
+					void *private_data,
308
+					size_t private_data_len ) ) {
309
+	struct ib_cm_request *request;
310
+
311
+	/* Allocate and initialise request */
312
+	request = zalloc ( sizeof ( *request ) + private_data_len );
313
+	if ( ! request )
314
+		return -ENOMEM;
315
+	list_add ( &request->list, &ib_cm_requests );
316
+	request->local_id = random();
317
+	request->qp = qp;
318
+	memset ( &qp->av, 0, sizeof ( qp->av ) );
319
+	qp->av.gid_present = 1;
320
+	memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
321
+	memcpy ( &request->service_id, service_id,
322
+		 sizeof ( request->service_id ) );
323
+	request->notify = notify;
324
+	request->private_data_len = private_data_len;
325
+	memcpy ( &request->private_data, private_data, private_data_len );
326
+	process_init ( &request->process, ib_cm_step, NULL );
327
+
328
+	return 0;
329
+}

Loading…
Cancel
Save