Parcourir la source

[eoib] Add Ethernet over Infiniband (EoIB) driver

EoIB is a fairly simple protocol in which raw Ethernet frames
(excluding the CRC) are encapsulated within Infiniband Unreliable
Datagrams, with a four-byte fixed EoIB header (which conveys no actual
information).  The Ethernet broadcast domain is provided by a
multicast group, similar to the IPoIB IPv4 multicast group.

The mapping from Ethernet MAC addresses to Infiniband address vectors
is achieved by snooping incoming traffic and building a peer cache
which can then be used to map a MAC address into a port GID.  The
address vector is completed using a path record lookup, as for IPoIB.
Note that this requires every packet to include a GRH.

Add basic support for EoIB devices.  This driver is substantially
derived from the IPoIB driver.  There is currently no mechanism for
automatically creating EoIB devices.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown il y a 8 ans
Parent
révision
9154d7a65c
3 fichiers modifiés avec 815 ajouts et 0 suppressions
  1. 754
    0
      src/drivers/net/eoib.c
  2. 60
    0
      src/include/ipxe/eoib.h
  3. 1
    0
      src/include/ipxe/errfile.h

+ 754
- 0
src/drivers/net/eoib.c Voir le fichier

@@ -0,0 +1,754 @@
1
+/*
2
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
3
+ *
4
+ * This program is free software; you can redistribute it and/or
5
+ * modify it under the terms of the GNU General Public License as
6
+ * published by the Free Software Foundation; either version 2 of the
7
+ * License, or any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful, but
10
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
+ * General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU General Public License
15
+ * along with this program; if not, write to the Free Software
16
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17
+ * 02110-1301, USA.
18
+ *
19
+ * You can also choose to distribute this program under the terms of
20
+ * the Unmodified Binary Distribution Licence (as given in the file
21
+ * COPYING.UBDL), provided that you have satisfied its requirements.
22
+ */
23
+
24
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
+
26
+#include <stdio.h>
27
+#include <string.h>
28
+#include <errno.h>
29
+#include <ipxe/errortab.h>
30
+#include <ipxe/malloc.h>
31
+#include <ipxe/iobuf.h>
32
+#include <ipxe/if_ether.h>
33
+#include <ipxe/netdevice.h>
34
+#include <ipxe/ethernet.h>
35
+#include <ipxe/infiniband.h>
36
+#include <ipxe/ib_mcast.h>
37
+#include <ipxe/ib_pathrec.h>
38
+#include <ipxe/eoib.h>
39
+
40
+/** @file
41
+ *
42
+ * Ethernet over Infiniband
43
+ *
44
+ */
45
+
46
+/** Number of EoIB send work queue entries */
47
+#define EOIB_NUM_SEND_WQES 8
48
+
49
+/** Number of EoIB receive work queue entries */
50
+#define EOIB_NUM_RECV_WQES 4
51
+
52
+/** Number of EoIB completion queue entries */
53
+#define EOIB_NUM_CQES 16
54
+
55
+/** Link status for "broadcast join in progress" */
56
+#define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
57
+#define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
58
+	( EINFO_EINPROGRESS, 0x01, "Joining" )
59
+
60
+/** Human-readable message for the link status */
61
+struct errortab eoib_errors[] __errortab = {
62
+	__einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
63
+};
64
+
65
+/** List of EoIB devices */
66
+static LIST_HEAD ( eoib_devices );
67
+
68
+static struct net_device_operations eoib_operations;
69
+
70
+/****************************************************************************
71
+ *
72
+ * EoIB peer cache
73
+ *
74
+ ****************************************************************************
75
+ */
76
+
77
+/** An EoIB peer cache entry */
78
+struct eoib_peer {
79
+	/** List of EoIB peer cache entries */
80
+	struct list_head list;
81
+	/** Ethernet MAC */
82
+	uint8_t mac[ETH_ALEN];
83
+	/** Infiniband address vector */
84
+	struct ib_address_vector av;
85
+};
86
+
87
+/**
88
+ * Find EoIB peer cache entry
89
+ *
90
+ * @v eoib		EoIB device
91
+ * @v mac		Ethernet MAC
92
+ * @ret peer		EoIB peer, or NULL if not found
93
+ */
94
+static struct eoib_peer * eoib_find_peer ( struct eoib_device *eoib,
95
+					   const uint8_t *mac ) {
96
+	struct eoib_peer *peer;
97
+
98
+	/* Find peer cache entry */
99
+	list_for_each_entry ( peer, &eoib->peers, list ) {
100
+		if ( memcmp ( mac, peer->mac, sizeof ( peer->mac ) ) == 0 ) {
101
+			/* Move peer to start of list */
102
+			list_del ( &peer->list );
103
+			list_add ( &peer->list, &eoib->peers );
104
+			return peer;
105
+		}
106
+	}
107
+
108
+	return NULL;
109
+}
110
+
111
+/**
112
+ * Create EoIB peer cache entry
113
+ *
114
+ * @v eoib		EoIB device
115
+ * @v mac		Ethernet MAC
116
+ * @ret peer		EoIB peer, or NULL on error
117
+ */
118
+static struct eoib_peer * eoib_create_peer ( struct eoib_device *eoib,
119
+					     const uint8_t *mac ) {
120
+	struct eoib_peer *peer;
121
+
122
+	/* Allocate and initialise peer cache entry */
123
+	peer = zalloc ( sizeof ( *peer ) );
124
+	if ( peer ) {
125
+		memcpy ( peer->mac, mac, sizeof ( peer->mac ) );
126
+		list_add ( &peer->list, &eoib->peers );
127
+	}
128
+	return peer;
129
+}
130
+
131
+/**
132
+ * Flush EoIB peer cache
133
+ *
134
+ * @v eoib		EoIB device
135
+ */
136
+static void eoib_flush_peers ( struct eoib_device *eoib ) {
137
+	struct eoib_peer *peer;
138
+	struct eoib_peer *tmp;
139
+
140
+	list_for_each_entry_safe ( peer, tmp, &eoib->peers, list ) {
141
+		list_del ( &peer->list );
142
+		free ( peer );
143
+	}
144
+}
145
+
146
+/**
147
+ * Discard some entries from the peer cache
148
+ *
149
+ * @ret discarded	Number of cached items discarded
150
+ */
151
+static unsigned int eoib_discard ( void ) {
152
+	struct net_device *netdev;
153
+	struct eoib_device *eoib;
154
+	struct eoib_peer *peer;
155
+	unsigned int discarded = 0;
156
+
157
+	/* Try to discard one cache entry for each EoIB device */
158
+	for_each_netdev ( netdev ) {
159
+
160
+		/* Skip non-EoIB devices */
161
+		if ( netdev->op != &eoib_operations )
162
+			continue;
163
+		eoib = netdev->priv;
164
+
165
+		/* Discard least recently used cache entry (if any) */
166
+		list_for_each_entry_reverse ( peer, &eoib->peers, list ) {
167
+			list_del ( &peer->list );
168
+			free ( peer );
169
+			discarded++;
170
+			break;
171
+		}
172
+	}
173
+
174
+	return discarded;
175
+}
176
+
177
+/** EoIB cache discarder */
178
+struct cache_discarder eoib_discarder __cache_discarder ( CACHE_EXPENSIVE ) = {
179
+	.discard = eoib_discard,
180
+};
181
+
182
+/**
183
+ * Find destination address vector
184
+ *
185
+ * @v eoib		EoIB device
186
+ * @v mac		Ethernet MAC
187
+ * @ret av		Address vector, or NULL to send as broadcast
188
+ */
189
+static struct ib_address_vector * eoib_tx_av ( struct eoib_device *eoib,
190
+					       const uint8_t *mac ) {
191
+	struct ib_device *ibdev = eoib->ibdev;
192
+	struct eoib_peer *peer;
193
+	int rc;
194
+
195
+	/* If this is a broadcast or multicast MAC address, then send
196
+	 * this packet as a broadcast.
197
+	 */
198
+	if ( is_multicast_ether_addr ( mac ) ) {
199
+		DBGCP ( eoib, "EoIB %s %s TX multicast\n",
200
+			eoib->name, eth_ntoa ( mac ) );
201
+		return NULL;
202
+	}
203
+
204
+	/* If we have no peer cache entry, then create one and send
205
+	 * this packet as a broadcast.
206
+	 */
207
+	peer = eoib_find_peer ( eoib, mac );
208
+	if ( ! peer ) {
209
+		DBGC ( eoib, "EoIB %s %s TX unknown\n",
210
+		       eoib->name, eth_ntoa ( mac ) );
211
+		eoib_create_peer ( eoib, mac );
212
+		return NULL;
213
+	}
214
+
215
+	/* If we have not yet recorded a received GID and QPN for this
216
+	 * peer cache entry, then send this packet as a broadcast.
217
+	 */
218
+	if ( ! peer->av.gid_present ) {
219
+		DBGCP ( eoib, "EoIB %s %s TX not yet recorded\n",
220
+			eoib->name, eth_ntoa ( mac ) );
221
+		return NULL;
222
+	}
223
+
224
+	/* If we have not yet resolved a path to this peer, then send
225
+	 * this packet as a broadcast.
226
+	 */
227
+	if ( ( rc = ib_resolve_path ( ibdev, &peer->av ) ) != 0 ) {
228
+		DBGCP ( eoib, "EoIB %s %s TX not yet resolved\n",
229
+			eoib->name, eth_ntoa ( mac ) );
230
+		return NULL;
231
+	}
232
+
233
+	/* Force use of GRH even for local destinations */
234
+	peer->av.gid_present = 1;
235
+
236
+	/* We have a fully resolved peer: send this packet as a
237
+	 * unicast.
238
+	 */
239
+	DBGCP ( eoib, "EoIB %s %s TX " IB_GID_FMT " QPN %#lx\n", eoib->name,
240
+		eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
241
+	return &peer->av;
242
+}
243
+
244
+/**
245
+ * Record source address vector
246
+ *
247
+ * @v eoib		EoIB device
248
+ * @v mac		Ethernet MAC
249
+ * @v lid		Infiniband LID
250
+ */
251
+static void eoib_rx_av ( struct eoib_device *eoib, const uint8_t *mac,
252
+			 const struct ib_address_vector *av ) {
253
+	const union ib_gid *gid = &av->gid;
254
+	unsigned long qpn = av->qpn;
255
+	struct eoib_peer *peer;
256
+
257
+	/* Sanity checks */
258
+	if ( ! av->gid_present ) {
259
+		DBGC ( eoib, "EoIB %s %s RX with no GID\n",
260
+		       eoib->name, eth_ntoa ( mac ) );
261
+		return;
262
+	}
263
+
264
+	/* Find peer cache entry (if any) */
265
+	peer = eoib_find_peer ( eoib, mac );
266
+	if ( ! peer ) {
267
+		DBGCP ( eoib, "EoIB %s %s RX " IB_GID_FMT " (ignored)\n",
268
+			eoib->name, eth_ntoa ( mac ), IB_GID_ARGS ( gid ) );
269
+		return;
270
+	}
271
+
272
+	/* Do nothing if peer cache entry is complete and correct */
273
+	if ( ( peer->av.lid == av->lid ) && ( peer->av.qpn == qpn ) ) {
274
+		DBGCP ( eoib, "EoIB %s %s RX unchanged\n",
275
+			eoib->name, eth_ntoa ( mac ) );
276
+		return;
277
+	}
278
+
279
+	/* Update peer cache entry */
280
+	peer->av.qpn = qpn;
281
+	peer->av.qkey = eoib->broadcast.qkey;
282
+	peer->av.gid_present = 1;
283
+	memcpy ( &peer->av.gid, gid, sizeof ( peer->av.gid ) );
284
+	DBGC ( eoib, "EoIB %s %s RX " IB_GID_FMT " QPN %#lx\n", eoib->name,
285
+	       eth_ntoa ( mac ), IB_GID_ARGS ( &peer->av.gid ), peer->av.qpn );
286
+}
287
+
288
+/****************************************************************************
289
+ *
290
+ * EoIB network device
291
+ *
292
+ ****************************************************************************
293
+ */
294
+
295
+/**
296
+ * Transmit packet via EoIB network device
297
+ *
298
+ * @v netdev		Network device
299
+ * @v iobuf		I/O buffer
300
+ * @ret rc		Return status code
301
+ */
302
+static int eoib_transmit ( struct net_device *netdev,
303
+			   struct io_buffer *iobuf ) {
304
+	struct eoib_device *eoib = netdev->priv;
305
+	struct eoib_header *eoib_hdr;
306
+	struct ethhdr *ethhdr;
307
+	struct ib_address_vector *av;
308
+	size_t zlen;
309
+
310
+	/* Sanity checks */
311
+	assert ( iob_len ( iobuf ) >= sizeof ( *ethhdr ) );
312
+	assert ( iob_headroom ( iobuf ) >= sizeof ( *eoib_hdr ) );
313
+
314
+	/* Look up destination address vector */
315
+	ethhdr = iobuf->data;
316
+	av = eoib_tx_av ( eoib, ethhdr->h_dest );
317
+
318
+	/* Prepend EoIB header */
319
+	eoib_hdr = iob_push ( iobuf, sizeof ( *eoib_hdr ) );
320
+	eoib_hdr->magic = htons ( EOIB_MAGIC );
321
+	eoib_hdr->reserved = 0;
322
+
323
+	/* Pad buffer to minimum Ethernet frame size */
324
+	zlen = ( sizeof ( *eoib_hdr ) + ETH_ZLEN );
325
+	assert ( zlen <= IOB_ZLEN );
326
+	if ( iob_len ( iobuf ) < zlen )
327
+		iob_pad ( iobuf, zlen );
328
+
329
+	/* If we have no unicast address then send as a broadcast */
330
+	if ( ! av )
331
+		av = &eoib->broadcast;
332
+
333
+	/* Post send work queue entry */
334
+	return ib_post_send ( eoib->ibdev, eoib->qp, av, iobuf );
335
+}
336
+
337
+/**
338
+ * Handle EoIB send completion
339
+ *
340
+ * @v ibdev		Infiniband device
341
+ * @v qp		Queue pair
342
+ * @v iobuf		I/O buffer
343
+ * @v rc		Completion status code
344
+ */
345
+static void eoib_complete_send ( struct ib_device *ibdev __unused,
346
+				 struct ib_queue_pair *qp,
347
+				 struct io_buffer *iobuf, int rc ) {
348
+	struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
349
+
350
+	netdev_tx_complete_err ( eoib->netdev, iobuf, rc );
351
+}
352
+
353
+/**
354
+ * Handle EoIB receive completion
355
+ *
356
+ * @v ibdev		Infiniband device
357
+ * @v qp		Queue pair
358
+ * @v dest		Destination address vector, or NULL
359
+ * @v source		Source address vector, or NULL
360
+ * @v iobuf		I/O buffer
361
+ * @v rc		Completion status code
362
+ */
363
+static void eoib_complete_recv ( struct ib_device *ibdev __unused,
364
+				 struct ib_queue_pair *qp,
365
+				 struct ib_address_vector *dest __unused,
366
+				 struct ib_address_vector *source,
367
+				 struct io_buffer *iobuf, int rc ) {
368
+	struct eoib_device *eoib = ib_qp_get_ownerdata ( qp );
369
+	struct net_device *netdev = eoib->netdev;
370
+	struct eoib_header *eoib_hdr;
371
+	struct ethhdr *ethhdr;
372
+
373
+	/* Record errors */
374
+	if ( rc != 0 ) {
375
+		netdev_rx_err ( netdev, iobuf, rc );
376
+		return;
377
+	}
378
+
379
+	/* Sanity check */
380
+	if ( iob_len ( iobuf ) < ( sizeof ( *eoib_hdr ) + sizeof ( *ethhdr ) )){
381
+		DBGC ( eoib, "EoIB %s received packet too short to "
382
+		       "contain EoIB and Ethernet headers\n", eoib->name );
383
+		DBGC_HD ( eoib, iobuf->data, iob_len ( iobuf ) );
384
+		netdev_rx_err ( netdev, iobuf, -EIO );
385
+		return;
386
+	}
387
+	if ( ! source ) {
388
+		DBGC ( eoib, "EoIB %s received packet without address "
389
+		       "vector\n", eoib->name );
390
+		netdev_rx_err ( netdev, iobuf, -ENOTTY );
391
+		return;
392
+	}
393
+
394
+	/* Strip EoIB header */
395
+	iob_pull ( iobuf, sizeof ( *eoib_hdr ) );
396
+
397
+	/* Update neighbour cache entry, if any */
398
+	ethhdr = iobuf->data;
399
+	eoib_rx_av ( eoib, ethhdr->h_source, source );
400
+
401
+	/* Hand off to network layer */
402
+	netdev_rx ( netdev, iobuf );
403
+}
404
+
405
+/** EoIB completion operations */
406
+static struct ib_completion_queue_operations eoib_cq_op = {
407
+	.complete_send = eoib_complete_send,
408
+	.complete_recv = eoib_complete_recv,
409
+};
410
+
411
+/** EoIB queue pair operations */
412
+static struct ib_queue_pair_operations eoib_qp_op = {
413
+	.alloc_iob = alloc_iob,
414
+};
415
+
416
+/**
417
+ * Poll EoIB network device
418
+ *
419
+ * @v netdev		Network device
420
+ */
421
+static void eoib_poll ( struct net_device *netdev ) {
422
+	struct eoib_device *eoib = netdev->priv;
423
+	struct ib_device *ibdev = eoib->ibdev;
424
+
425
+	/* Poll Infiniband device */
426
+	ib_poll_eq ( ibdev );
427
+
428
+	/* Poll the retry timers (required for EoIB multicast join) */
429
+	retry_poll();
430
+}
431
+
432
+/**
433
+ * Handle EoIB broadcast multicast group join completion
434
+ *
435
+ * @v membership	Multicast group membership
436
+ * @v rc		Status code
437
+ */
438
+static void eoib_join_complete ( struct ib_mc_membership *membership, int rc ) {
439
+	struct eoib_device *eoib =
440
+		container_of ( membership, struct eoib_device, membership );
441
+
442
+	/* Record join status as link status */
443
+	netdev_link_err ( eoib->netdev, rc );
444
+}
445
+
446
+/**
447
+ * Join EoIB broadcast multicast group
448
+ *
449
+ * @v eoib		EoIB device
450
+ * @ret rc		Return status code
451
+ */
452
+static int eoib_join_broadcast_group ( struct eoib_device *eoib ) {
453
+	int rc;
454
+
455
+	/* Join multicast group */
456
+	if ( ( rc = ib_mcast_join ( eoib->ibdev, eoib->qp,
457
+				    &eoib->membership, &eoib->broadcast, 0,
458
+				    eoib_join_complete ) ) != 0 ) {
459
+		DBGC ( eoib, "EoIB %s could not join broadcast group: %s\n",
460
+		       eoib->name, strerror ( rc ) );
461
+		return rc;
462
+	}
463
+
464
+	return 0;
465
+}
466
+
467
+/**
468
+ * Leave EoIB broadcast multicast group
469
+ *
470
+ * @v eoib		EoIB device
471
+ */
472
+static void eoib_leave_broadcast_group ( struct eoib_device *eoib ) {
473
+
474
+	/* Leave multicast group */
475
+	ib_mcast_leave ( eoib->ibdev, eoib->qp, &eoib->membership );
476
+}
477
+
478
+/**
479
+ * Handle link status change
480
+ *
481
+ * @v eoib		EoIB device
482
+ */
483
+static void eoib_link_state_changed ( struct eoib_device *eoib ) {
484
+	struct net_device *netdev = eoib->netdev;
485
+	struct ib_device *ibdev = eoib->ibdev;
486
+	int rc;
487
+
488
+	/* Leave existing broadcast group */
489
+	if ( eoib->qp )
490
+		eoib_leave_broadcast_group ( eoib );
491
+
492
+	/* Update broadcast GID based on potentially-new partition key */
493
+	eoib->broadcast.gid.words[2] = htons ( ibdev->pkey | IB_PKEY_FULL );
494
+
495
+	/* Set net device link state to reflect Infiniband link state */
496
+	rc = ib_link_rc ( ibdev );
497
+	netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
498
+
499
+	/* Join new broadcast group */
500
+	if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) && eoib->qp &&
501
+	     ( ( rc = eoib_join_broadcast_group ( eoib ) ) != 0 ) ) {
502
+		DBGC ( eoib, "EoIB %s could not rejoin broadcast group: "
503
+		       "%s\n", eoib->name, strerror ( rc ) );
504
+		netdev_link_err ( netdev, rc );
505
+		return;
506
+	}
507
+}
508
+
509
+/**
510
+ * Open EoIB network device
511
+ *
512
+ * @v netdev		Network device
513
+ * @ret rc		Return status code
514
+ */
515
+static int eoib_open ( struct net_device *netdev ) {
516
+	struct eoib_device *eoib = netdev->priv;
517
+	struct ib_device *ibdev = eoib->ibdev;
518
+	int rc;
519
+
520
+	/* Open IB device */
521
+	if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
522
+		DBGC ( eoib, "EoIB %s could not open %s: %s\n",
523
+		       eoib->name, ibdev->name, strerror ( rc ) );
524
+		goto err_ib_open;
525
+	}
526
+
527
+	/* Allocate completion queue */
528
+	eoib->cq = ib_create_cq ( ibdev, EOIB_NUM_CQES, &eoib_cq_op );
529
+	if ( ! eoib->cq ) {
530
+		DBGC ( eoib, "EoIB %s could not allocate completion queue\n",
531
+		       eoib->name );
532
+		rc = -ENOMEM;
533
+		goto err_create_cq;
534
+	}
535
+
536
+	/* Allocate queue pair */
537
+	eoib->qp = ib_create_qp ( ibdev, IB_QPT_UD, EOIB_NUM_SEND_WQES,
538
+				   eoib->cq, EOIB_NUM_RECV_WQES, eoib->cq,
539
+				  &eoib_qp_op, netdev->name );
540
+	if ( ! eoib->qp ) {
541
+		DBGC ( eoib, "EoIB %s could not allocate queue pair\n",
542
+		       eoib->name );
543
+		rc = -ENOMEM;
544
+		goto err_create_qp;
545
+	}
546
+	ib_qp_set_ownerdata ( eoib->qp, eoib );
547
+
548
+	/* Fill receive rings */
549
+	ib_refill_recv ( ibdev, eoib->qp );
550
+
551
+	/* Fake a link status change to join the broadcast group */
552
+	eoib_link_state_changed ( eoib );
553
+
554
+	return 0;
555
+
556
+	ib_destroy_qp ( ibdev, eoib->qp );
557
+	eoib->qp = NULL;
558
+ err_create_qp:
559
+	ib_destroy_cq ( ibdev, eoib->cq );
560
+	eoib->cq = NULL;
561
+ err_create_cq:
562
+	ib_close ( ibdev );
563
+ err_ib_open:
564
+	return rc;
565
+}
566
+
567
+/**
568
+ * Close EoIB network device
569
+ *
570
+ * @v netdev		Network device
571
+ */
572
+static void eoib_close ( struct net_device *netdev ) {
573
+	struct eoib_device *eoib = netdev->priv;
574
+	struct ib_device *ibdev = eoib->ibdev;
575
+
576
+	/* Flush peer cache */
577
+	eoib_flush_peers ( eoib );
578
+
579
+	/* Leave broadcast group */
580
+	eoib_leave_broadcast_group ( eoib );
581
+
582
+	/* Tear down the queues */
583
+	ib_destroy_qp ( ibdev, eoib->qp );
584
+	eoib->qp = NULL;
585
+	ib_destroy_cq ( ibdev, eoib->cq );
586
+	eoib->cq = NULL;
587
+
588
+	/* Close IB device */
589
+	ib_close ( ibdev );
590
+}
591
+
592
+/** EoIB network device operations */
593
+static struct net_device_operations eoib_operations = {
594
+	.open		= eoib_open,
595
+	.close		= eoib_close,
596
+	.transmit	= eoib_transmit,
597
+	.poll		= eoib_poll,
598
+};
599
+
600
+/**
601
+ * Create EoIB device
602
+ *
603
+ * @v ibdev		Infiniband device
604
+ * @v hw_addr		Ethernet MAC
605
+ * @v broadcast		Broadcast address vector
606
+ * @v name		Interface name (or NULL to use default)
607
+ * @ret rc		Return status code
608
+ */
609
+int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
610
+		  struct ib_address_vector *broadcast, const char *name ) {
611
+	struct net_device *netdev;
612
+	struct eoib_device *eoib;
613
+	int rc;
614
+
615
+	/* Allocate network device */
616
+	netdev = alloc_etherdev ( sizeof ( *eoib ) );
617
+	if ( ! netdev ) {
618
+		rc = -ENOMEM;
619
+		goto err_alloc;
620
+	}
621
+	netdev_init ( netdev, &eoib_operations );
622
+	eoib = netdev->priv;
623
+	netdev->dev = ibdev->dev;
624
+	eoib->netdev = netdev;
625
+	eoib->ibdev = ibdev_get ( ibdev );
626
+	memcpy ( &eoib->broadcast, broadcast, sizeof ( eoib->broadcast ) );
627
+	INIT_LIST_HEAD ( &eoib->peers );
628
+
629
+	/* Set MAC address */
630
+	memcpy ( netdev->hw_addr, hw_addr, ETH_ALEN );
631
+
632
+	/* Set interface name, if applicable */
633
+	if ( name )
634
+		snprintf ( netdev->name, sizeof ( netdev->name ), "%s", name );
635
+	eoib->name = netdev->name;
636
+
637
+	/* Add to list of EoIB devices */
638
+	list_add_tail ( &eoib->list, &eoib_devices );
639
+
640
+	/* Register network device */
641
+	if ( ( rc = register_netdev ( netdev ) ) != 0 )
642
+		goto err_register;
643
+
644
+	DBGC ( eoib, "EoIB %s created for %s MAC %s\n",
645
+	       eoib->name, ibdev->name, eth_ntoa ( hw_addr ) );
646
+	DBGC ( eoib, "EoIB %s broadcast GID " IB_GID_FMT "\n",
647
+	       eoib->name, IB_GID_ARGS ( &broadcast->gid ) );
648
+	return 0;
649
+
650
+	unregister_netdev ( netdev );
651
+ err_register:
652
+	list_del ( &eoib->list );
653
+	ibdev_put ( ibdev );
654
+	netdev_nullify ( netdev );
655
+	netdev_put ( netdev );
656
+ err_alloc:
657
+	return rc;
658
+}
659
+
660
+/**
661
+ * Find EoIB device
662
+ *
663
+ * @v ibdev		Infiniband device
664
+ * @v hw_addr		Original Ethernet MAC
665
+ * @ret eoib		EoIB device
666
+ */
667
+struct eoib_device * eoib_find ( struct ib_device *ibdev,
668
+				 const uint8_t *hw_addr ) {
669
+	struct eoib_device *eoib;
670
+
671
+	list_for_each_entry ( eoib, &eoib_devices, list ) {
672
+		if ( ( eoib->ibdev == ibdev ) &&
673
+		     ( memcmp ( eoib->netdev->hw_addr, hw_addr,
674
+				ETH_ALEN ) == 0 ) )
675
+			return eoib;
676
+	}
677
+	return NULL;
678
+}
679
+
680
+/**
681
+ * Remove EoIB device
682
+ *
683
+ * @v eoib		EoIB device
684
+ */
685
+void eoib_destroy ( struct eoib_device *eoib ) {
686
+	struct net_device *netdev = eoib->netdev;
687
+
688
+	/* Unregister network device */
689
+	unregister_netdev ( netdev );
690
+
691
+	/* Remove from list of network devices */
692
+	list_del ( &eoib->list );
693
+
694
+	/* Drop reference to Infiniband device */
695
+	ibdev_put ( eoib->ibdev );
696
+
697
+	/* Free network device */
698
+	DBGC ( eoib, "EoIB %s destroyed\n", eoib->name );
699
+	netdev_nullify ( netdev );
700
+	netdev_put ( netdev );
701
+}
702
+
703
+/**
704
+ * Probe EoIB device
705
+ *
706
+ * @v ibdev		Infiniband device
707
+ * @ret rc		Return status code
708
+ */
709
+static int eoib_probe ( struct ib_device *ibdev __unused ) {
710
+
711
+	/* EoIB devices are not created automatically */
712
+	return 0;
713
+}
714
+
715
+/**
716
+ * Handle device or link status change
717
+ *
718
+ * @v ibdev		Infiniband device
719
+ */
720
+static void eoib_notify ( struct ib_device *ibdev ) {
721
+	struct eoib_device *eoib;
722
+
723
+	/* Handle link status change for any attached EoIB devices */
724
+	list_for_each_entry ( eoib, &eoib_devices, list ) {
725
+		if ( eoib->ibdev != ibdev )
726
+			continue;
727
+		eoib_link_state_changed ( eoib );
728
+	}
729
+}
730
+
731
+/**
732
+ * Remove EoIB device
733
+ *
734
+ * @v ibdev		Infiniband device
735
+ */
736
+static void eoib_remove ( struct ib_device *ibdev ) {
737
+	struct eoib_device *eoib;
738
+	struct eoib_device *tmp;
739
+
740
+	/* Remove any attached EoIB devices */
741
+	list_for_each_entry_safe ( eoib, tmp, &eoib_devices, list ) {
742
+		if ( eoib->ibdev != ibdev )
743
+			continue;
744
+		eoib_destroy ( eoib );
745
+	}
746
+}
747
+
748
+/** EoIB driver */
749
+struct ib_driver eoib_driver __ib_driver = {
750
+	.name = "EoIB",
751
+	.probe = eoib_probe,
752
+	.notify = eoib_notify,
753
+	.remove = eoib_remove,
754
+};

+ 60
- 0
src/include/ipxe/eoib.h Voir le fichier

@@ -0,0 +1,60 @@
1
+#ifndef _IPXE_EOIB_H
2
+#define _IPXE_EOIB_H
3
+
4
+/** @file
5
+ *
6
+ * Ethernet over Infiniband
7
+ *
8
+ */
9
+
10
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11
+
12
+#include <stdint.h>
13
+#include <byteswap.h>
14
+#include <ipxe/netdevice.h>
15
+#include <ipxe/infiniband.h>
16
+#include <ipxe/ib_mcast.h>
17
+
18
+/** An EoIB header */
19
+struct eoib_header {
20
+	/** Signature */
21
+	uint16_t magic;
22
+	/** Reserved */
23
+	uint16_t reserved;
24
+} __attribute__ (( packed ));
25
+
26
+/** EoIB magic signature */
27
+#define EOIB_MAGIC 0x8919
28
+
29
+/** An EoIB device */
30
+struct eoib_device {
31
+	/** Name */
32
+	const char *name;
33
+	/** Network device */
34
+	struct net_device *netdev;
35
+	/** Underlying Infiniband device */
36
+	struct ib_device *ibdev;
37
+	/** List of EoIB devices */
38
+	struct list_head list;
39
+	/** Broadcast address */
40
+	struct ib_address_vector broadcast;
41
+
42
+	/** Completion queue */
43
+	struct ib_completion_queue *cq;
44
+	/** Queue pair */
45
+	struct ib_queue_pair *qp;
46
+	/** Broadcast group membership */
47
+	struct ib_mc_membership membership;
48
+
49
+	/** Peer cache */
50
+	struct list_head peers;
51
+};
52
+
53
+extern int eoib_create ( struct ib_device *ibdev, const uint8_t *hw_addr,
54
+			 struct ib_address_vector *broadcast,
55
+			 const char *name );
56
+extern struct eoib_device * eoib_find ( struct ib_device *ibdev,
57
+					const uint8_t *hw_addr );
58
+extern void eoib_destroy ( struct eoib_device *eoib );
59
+
60
+#endif /* _IPXE_EOIB_H */

+ 1
- 0
src/include/ipxe/errfile.h Voir le fichier

@@ -185,6 +185,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
185 185
 #define ERRFILE_intelxvf	     ( ERRFILE_DRIVER | 0x00790000 )
186 186
 #define ERRFILE_smsc95xx	     ( ERRFILE_DRIVER | 0x007a0000 )
187 187
 #define ERRFILE_acm		     ( ERRFILE_DRIVER | 0x007b0000 )
188
+#define ERRFILE_eoib		     ( ERRFILE_DRIVER | 0x007c0000 )
188 189
 
189 190
 #define ERRFILE_aoe			( ERRFILE_NET | 0x00000000 )
190 191
 #define ERRFILE_arp			( ERRFILE_NET | 0x00010000 )

Chargement…
Annuler
Enregistrer