Browse Source

Starting to introduce an Infiniband device abstraction

tags/v0.9.3
Michael Brown 16 years ago
parent
commit
9d08b7c692
2 changed files with 201 additions and 0 deletions
  1. 138
    0
      src/drivers/net/mlx_ipoib/mt25218.c
  2. 63
    0
      src/include/gpxe/infiniband.h

+ 138
- 0
src/drivers/net/mlx_ipoib/mt25218.c View File

@@ -72,6 +72,7 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, };
72 72
 static int mlx_transmit ( struct net_device *netdev,
73 73
 			  struct io_buffer *iobuf ) {
74 74
 	struct mlx_nic *mlx = netdev->priv;
75
+	ud_av_t av = iobuf->data;
75 76
 	ud_send_wqe_t snd_wqe;
76 77
 	int rc;
77 78
 
@@ -222,6 +223,143 @@ static struct net_device_operations mlx_operations = {
222 223
 	.irq		= mlx_irq,
223 224
 };
224 225
 
226
+
227
+
228
+int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) {
229
+	unsigned int wqe_idx;
230
+	unsigned int new_write_ptr;
231
+
232
+	/* Allocate queue entry */
233
+	wqe_idx = new_write_ptr = wq->write_ptr;
234
+	if ( wq->iobuf[wqe_idx] )
235
+		return -ENOBUFS;
236
+	wq->iobuf[wqe_idx] = iobuf;
237
+
238
+	/* Update write pointer */
239
+	new_write_ptr++;
240
+	new_write_ptr &= ( wq->num_wqes - 1 );
241
+	wq->write_ptr = new_write_ptr;
242
+
243
+	return wqe_idx;
244
+}
245
+
246
+static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) {
247
+	assert ( wq->iobuf[wqe_idx] != NULL );
248
+	wq->iobuf[wqe_idx] = NULL;
249
+}
250
+
251
+static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf,
252
+			   struct ib_address_vector *av,
253
+			   struct ib_queue_pair *qp ) {
254
+	struct mlx *mlx = ibdev->priv;
255
+	struct ib_work_queue *wq = &qp->send;
256
+	struct mlx_work_queue *mlx_wq = wq->priv;
257
+	unsigned int wqe_idx_mask = ( wq->num_wqes - 1 );
258
+	unsigned int prev_wqe_idx;
259
+	struct ud_send_wqe_st *prev_wqe;
260
+	unsigned int wqe_idx;
261
+	struct ud_send_wqe_st *wqe;
262
+	struct ib_gid *gid;
263
+	size_t nds;
264
+	struct send_doorbell_st doorbell;
265
+
266
+	/* Allocate work queue entry */
267
+	prev_wqe_idx = wq->posted;
268
+	wqe_idx = ( prev_wqe_index + 1 );
269
+	if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) {
270
+		DBGC ( mlx, "MLX %p send queue full", mlx );
271
+		return -ENOBUFS;
272
+	}
273
+	prev_wqe = &mlx_wq->wqe[prev_wqe_idx & wqe_idx_mask];
274
+	wqe = &mlx_wq->wqe[wqe_idx & wqe_idx_mask];
275
+
276
+	/* Construct work queue entry */
277
+	memset ( &wqe->next.control, 0,
278
+		 sizeof ( wqe->next.control ) );
279
+	MLX_POPULATE_1 ( &wqe->next.control,
280
+			 arbelprm_wqe_segment_ctrl_send_st, 0,
281
+			 always1, 1 );
282
+	memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) );
283
+	MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0,
284
+			 pd, GLOBAL_PD,
285
+			 port_number, mlx->port );
286
+	MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1,
287
+			 rlid, av->remote_lid,
288
+			 g, av->gid_present );
289
+	MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2,
290
+			 max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ),
291
+			 msg, 3 );
292
+	MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3,
293
+			 sl, av->sl );
294
+	gid = ( av->gid_present ? av->gid : &ib_no_gid );
295
+	memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ),
296
+		 gid, sizeof ( *gid ) );
297
+	MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8,
298
+			 destination_qp, av->dest_qp );
299
+	MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9,
300
+			 q_key, av->qkey );
301
+	wqe->mpointer[0].local_addr_l =
302
+		cpu_to_be32 ( virt_to_bus ( iobuf->data ) );
303
+	wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) );
304
+
305
+	/* Update previous work queue entry's "next" field */
306
+	nds = ( offsetof ( typeof ( *wqe ), mpointer ) +
307
+		sizeof ( wqe->mpointer[0] ) );
308
+	MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0,
309
+		       nopcode, XDEV_NOPCODE_SEND );
310
+	MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1,
311
+			 nds, nds,
312
+			 f, 1,
313
+			 always1, 1 );
314
+
315
+	/* Ring doorbell */
316
+
317
+	doorbell index is a property of the queue pair
318
+
319
+
320
+	MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, 
321
+			 counter, ( wqe_idx & 0xffff ) );
322
+	memset ( &doorbell, 0, sizeof ( doorbell ) );
323
+	MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0,
324
+			 nopcode, XDEV_NOPCODE_SEND,
325
+			 f, 1,
326
+			 wqe_counter, ( prev_wqe_idx & 0xffff ),
327
+			 wqe_cnt, 1 );
328
+	MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1,
329
+			 nds, nds,
330
+			 qpn, qp->qpn );
331
+	barrier();
332
+
333
+	wq->posted = wqe_idx;
334
+
335
+
336
+	struct mlx_nic *mlx = netdev->priv;
337
+	ud_av_t av = iobuf->data;
338
+	ud_send_wqe_t snd_wqe;
339
+	int rc;
340
+
341
+	snd_wqe = alloc_send_wqe ( mlx->ipoib_qph );
342
+	if ( ! snd_wqe ) {
343
+		DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx );
344
+		return -ENOBUFS;
345
+	}
346
+
347
+	prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe,
348
+			    iobuf->data, 0, iob_len ( iobuf ), 0 );
349
+	if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) {
350
+		DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n",
351
+		       mlx, snd_wqe, strerror ( rc ) );
352
+		free_wqe ( snd_wqe );
353
+		return rc;
354
+	}
355
+
356
+
357
+}
358
+
359
+static struct ib_device_operations mlx_ib_operations = {
360
+	.post_send	= mlx_post_send,
361
+};
362
+
225 363
 /**
226 364
  * Remove PCI device
227 365
  *

+ 63
- 0
src/include/gpxe/infiniband.h View File

@@ -61,6 +61,69 @@ struct ibhdr {
61 61
 	uint16_t reserved;
62 62
 } __attribute__ (( packed ));
63 63
 
64
+/** An Infiniband Work Queue */
65
+struct ib_work_queue {
66
+	/** Number of work queue entries */
67
+	unsigned int num_wqes;
68
+	/** Posted index
69
+	 *
70
+	 * This is the index of the most recently posted entry.
71
+	 */
72
+	unsigned int posted;
73
+	/** Driver-private data
74
+	 *
75
+	 * Typically used to hold the address of the work queue.
76
+	 */
77
+	void *priv;
78
+	/** I/O buffers assigned to work queue */
79
+	struct io_buffer *iobuf[0];
80
+};
81
+
82
+/** An Infiniband Queue Pair */
83
+struct ib_queue_pair {
84
+	/** Queue Pair Number */
85
+	uint32_t qpn;
86
+	/** Send queue */
87
+	struct ib_work_queue send;
88
+	/** Receive queue */
89
+	struct ib_work_queue recv;
90
+};
91
+
92
+/** An Infiniband Address Vector */
93
+struct ib_address_vector {
94
+	
95
+};
96
+
97
+/**
98
+ * Infiniband device operations
99
+ *
100
+ * These represent a subset of the Infiniband Verbs.
101
+ */
102
+struct ib_device_operations {
103
+	/** Post Send work queue entry
104
+	 *
105
+	 * @v ibdev		Infiniband device
106
+	 * @v iobuf		I/O buffer
107
+	 * @v av		Address vector
108
+	 * @v qp		Queue pair
109
+	 * @ret rc		Return status code
110
+	 *
111
+	 * If this method returns success, the I/O buffer remains
112
+	 * owned by the queue pair.  If this method returns failure,
113
+	 * the I/O buffer is immediately released; the failure is
114
+	 * interpreted as "failure to enqueue buffer".
115
+	 */
116
+	int ( * post_send ) ( struct ib_device *ibdev,
117
+			      struct io_buffer *iobuf,
118
+			      struct ib_address_vector *av,
119
+			      struct ib_queue_pair *qp );
120
+};
121
+
122
+
123
+
124
+
125
+
126
+
64 127
 extern struct ll_protocol infiniband_protocol;
65 128
 
66 129
 extern const char * ib_ntoa ( const void *ll_addr );

Loading…
Cancel
Save