Browse Source

[ipoib] Expose Ethernet-compatible eIPoIB link-layer addresses and headers

Almost all clients of the raw-packet interfaces (UNDI and SNP) can
handle only Ethernet link layers.  Expose an Ethernet-compatible link
layer to local clients, while remaining compatible with IPoIB on the
wire.  This requires manipulation of ARP (but not DHCP) packets within
the IPoIB driver.

This is ugly, but it's the only viable way to allow IPoIB devices to
be driven via the raw-packet interfaces.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 12 years ago
parent
commit
03f0c23f8b

+ 2
- 1
src/arch/i386/interface/pxe/pxe_undi.c View File

358
 	}
358
 	}
359
 
359
 
360
 	/* Allocate and fill I/O buffer */
360
 	/* Allocate and fill I/O buffer */
361
-	iobuf = alloc_iob ( MAX_LL_HEADER_LEN + len );
361
+	iobuf = alloc_iob ( MAX_LL_HEADER_LEN +
362
+			    ( ( len > IOB_ZLEN ) ? len : IOB_ZLEN ) );
362
 	if ( ! iobuf ) {
363
 	if ( ! iobuf ) {
363
 		DBGC2 ( &pxe_netdev, " could not allocate iobuf\n" );
364
 		DBGC2 ( &pxe_netdev, " could not allocate iobuf\n" );
364
 		undi_transmit->Status = PXENV_STATUS_OUT_OF_RESOURCES;
365
 		undi_transmit->Status = PXENV_STATUS_OUT_OF_RESOURCES;

+ 375
- 260
src/drivers/net/ipoib.c View File

20
 FILE_LICENCE ( GPL2_OR_LATER );
20
 FILE_LICENCE ( GPL2_OR_LATER );
21
 
21
 
22
 #include <stdint.h>
22
 #include <stdint.h>
23
+#include <stdlib.h>
23
 #include <stdio.h>
24
 #include <stdio.h>
24
 #include <unistd.h>
25
 #include <unistd.h>
25
 #include <string.h>
26
 #include <string.h>
26
 #include <byteswap.h>
27
 #include <byteswap.h>
27
 #include <errno.h>
28
 #include <errno.h>
28
 #include <ipxe/errortab.h>
29
 #include <ipxe/errortab.h>
30
+#include <ipxe/malloc.h>
29
 #include <ipxe/if_arp.h>
31
 #include <ipxe/if_arp.h>
32
+#include <ipxe/if_ether.h>
33
+#include <ipxe/ethernet.h>
30
 #include <ipxe/iobuf.h>
34
 #include <ipxe/iobuf.h>
31
 #include <ipxe/netdevice.h>
35
 #include <ipxe/netdevice.h>
32
 #include <ipxe/infiniband.h>
36
 #include <ipxe/infiniband.h>
33
 #include <ipxe/ib_pathrec.h>
37
 #include <ipxe/ib_pathrec.h>
34
 #include <ipxe/ib_mcast.h>
38
 #include <ipxe/ib_mcast.h>
39
+#include <ipxe/retry.h>
35
 #include <ipxe/ipoib.h>
40
 #include <ipxe/ipoib.h>
36
 
41
 
37
 /** @file
42
 /** @file
58
 	struct ib_completion_queue *cq;
63
 	struct ib_completion_queue *cq;
59
 	/** Queue pair */
64
 	/** Queue pair */
60
 	struct ib_queue_pair *qp;
65
 	struct ib_queue_pair *qp;
66
+	/** Local MAC */
67
+	struct ipoib_mac mac;
61
 	/** Broadcast MAC */
68
 	/** Broadcast MAC */
62
 	struct ipoib_mac broadcast;
69
 	struct ipoib_mac broadcast;
63
 	/** Joined to IPv4 broadcast multicast group
70
 	/** Joined to IPv4 broadcast multicast group
68
 	int broadcast_joined;
75
 	int broadcast_joined;
69
 	/** IPv4 broadcast multicast group membership */
76
 	/** IPv4 broadcast multicast group membership */
70
 	struct ib_mc_membership broadcast_membership;
77
 	struct ib_mc_membership broadcast_membership;
78
+	/** REMAC cache */
79
+	struct list_head peers;
71
 };
80
 };
72
 
81
 
73
 /** Broadcast IPoIB address */
82
 /** Broadcast IPoIB address */
89
 
98
 
90
 /****************************************************************************
99
 /****************************************************************************
91
  *
100
  *
92
- * IPoIB peer cache
101
+ * IPoIB REMAC cache
93
  *
102
  *
94
  ****************************************************************************
103
  ****************************************************************************
95
  */
104
  */
96
 
105
 
97
-/**
106
+/** An IPoIB REMAC cache entry */
98
- * IPoIB peer address
99
- *
100
- * The IPoIB link-layer header is only four bytes long and so does not
101
- * have sufficient room to store IPoIB MAC address(es).  We therefore
102
- * maintain a cache of MAC addresses identified by a single-byte key,
103
- * and abuse the spare two bytes within the link-layer header to
104
- * communicate these MAC addresses between the link-layer code and the
105
- * netdevice driver.
106
- */
107
 struct ipoib_peer {
107
 struct ipoib_peer {
108
-	/** Key */
108
+	/** List of REMAC cache entries */
109
-	uint8_t key;
109
+	struct list_head list;
110
+	/** Remote Ethermet MAC */
111
+	struct ipoib_remac remac;
110
 	/** MAC address */
112
 	/** MAC address */
111
 	struct ipoib_mac mac;
113
 	struct ipoib_mac mac;
112
 };
114
 };
113
 
115
 
114
-/** Number of IPoIB peer cache entries
115
- *
116
- * Must be a power of two.
117
- */
118
-#define IPOIB_NUM_CACHED_PEERS 4
119
-
120
-/** IPoIB peer address cache */
121
-static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
122
-
123
-/** Oldest IPoIB peer cache entry index */
124
-static unsigned int ipoib_peer_cache_idx = 0;
125
-
126
-/** IPoIB peer cache entry validity flag */
127
-#define IPOIB_PEER_KEY_VALID 0x80
128
-
129
 /**
116
 /**
130
- * Look up cached peer by key
117
+ * Find IPoIB MAC from REMAC
131
  *
118
  *
132
- * @v key		Peer cache key
119
+ * @v ipoib		IPoIB device
133
- * @ret peer		Peer cache entry, or NULL
120
+ * @v remac		Remote Ethernet MAC
121
+ * @ret mac		IPoIB MAC (or NULL if not found)
134
  */
122
  */
135
-static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
123
+static struct ipoib_mac * ipoib_find_remac ( struct ipoib_device *ipoib,
124
+					     const struct ipoib_remac *remac ) {
136
 	struct ipoib_peer *peer;
125
 	struct ipoib_peer *peer;
137
-	unsigned int i;
138
 
126
 
139
-	if ( ! key )
127
+	/* Check for broadcast REMAC */
140
-		return NULL;
128
+	if ( is_broadcast_ether_addr ( remac ) )
141
-
129
+		return &ipoib->broadcast;
142
-	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
130
+
143
-		peer = &ipoib_peer_cache[i];
131
+	/* Try to find via REMAC cache */
144
-		if ( peer->key == key )
132
+	list_for_each_entry ( peer, &ipoib->peers, list ) {
145
-			return peer;
133
+		if ( memcmp ( remac, &peer->remac,
134
+			      sizeof ( peer->remac ) ) == 0 ) {
135
+			/* Move peer to start of list */
136
+			list_del ( &peer->list );
137
+			list_add ( &peer->list, &ipoib->peers );
138
+			return &peer->mac;
139
+		}
146
 	}
140
 	}
147
 
141
 
148
-	DBG ( "IPoIB warning: peer cache lost track of key %x while still in "
142
+	DBGC ( ipoib, "IPoIB %p unknown REMAC %s\n",
149
-	      "use\n", key );
143
+	       ipoib, eth_ntoa ( remac ) );
150
 	return NULL;
144
 	return NULL;
151
 }
145
 }
152
 
146
 
153
 /**
147
 /**
154
- * Store GID and QPN in peer cache
148
+ * Add IPoIB MAC to REMAC cache
155
  *
149
  *
156
- * @v mac		Peer MAC address
150
+ * @v ipoib		IPoIB device
157
- * @ret peer		Peer cache entry
151
+ * @v remac		Remote Ethernet MAC
152
+ * @v mac		IPoIB MAC
153
+ * @ret rc		Return status code
158
  */
154
  */
159
-static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
155
+static int ipoib_map_remac ( struct ipoib_device *ipoib,
156
+			     const struct ipoib_remac *remac,
157
+			     const struct ipoib_mac *mac ) {
160
 	struct ipoib_peer *peer;
158
 	struct ipoib_peer *peer;
161
-	uint8_t key;
162
-	unsigned int i;
163
 
159
 
164
-	/* Look for existing cache entry */
160
+	/* Check for existing entry in REMAC cache */
165
-	for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
161
+	list_for_each_entry ( peer, &ipoib->peers, list ) {
166
-		peer = &ipoib_peer_cache[i];
162
+		if ( memcmp ( remac, &peer->remac,
167
-		if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
163
+			      sizeof ( peer->remac ) ) == 0 ) {
168
-			return peer;
164
+			/* Move peer to start of list */
165
+			list_del ( &peer->list );
166
+			list_add ( &peer->list, &ipoib->peers );
167
+			/* Update MAC */
168
+			memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
169
+			return 0;
170
+		}
169
 	}
171
 	}
170
 
172
 
171
-	/* No entry found: create a new one */
173
+	/* Create new entry */
172
-	key = ( ipoib_peer_cache_idx++ | IPOIB_PEER_KEY_VALID );
174
+	peer = malloc ( sizeof ( *peer ) );
173
-	peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
175
+	if ( ! peer )
174
-	if ( peer->key )
176
+		return -ENOMEM;
175
-		DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
177
+	memcpy ( &peer->remac, remac, sizeof ( peer->remac ) );
176
-
177
-	memset ( peer, 0, sizeof ( *peer ) );
178
-	peer->key = key;
179
 	memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
178
 	memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
180
-	DBG ( "IPoIB peer %x has MAC %s\n",
179
+	list_add ( &peer->list, &ipoib->peers );
181
-	      peer->key, ipoib_ntoa ( &peer->mac ) );
182
-	return peer;
183
-}
184
 
180
 
185
-/****************************************************************************
181
+	return 0;
186
- *
182
+}
187
- * IPoIB link layer
188
- *
189
- ****************************************************************************
190
- */
191
 
183
 
192
 /**
184
 /**
193
- * Add IPoIB link-layer header
185
+ * Flush REMAC cache
194
  *
186
  *
195
- * @v netdev		Network device
187
+ * @v ipoib		IPoIB device
196
- * @v iobuf		I/O buffer
197
- * @v ll_dest		Link-layer destination address
198
- * @v ll_source		Source link-layer address
199
- * @v net_proto		Network-layer protocol, in network-byte order
200
- * @ret rc		Return status code
201
  */
188
  */
202
-static int ipoib_push ( struct net_device *netdev __unused,
189
+static void ipoib_flush_remac ( struct ipoib_device *ipoib ) {
203
-			struct io_buffer *iobuf, const void *ll_dest,
190
+	struct ipoib_peer *peer;
204
-			const void *ll_source __unused, uint16_t net_proto ) {
191
+	struct ipoib_peer *tmp;
205
-	struct ipoib_hdr *ipoib_hdr =
206
-		iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
207
-	const struct ipoib_mac *dest_mac = ll_dest;
208
-	const struct ipoib_mac *src_mac = ll_source;
209
-	struct ipoib_peer *dest;
210
-	struct ipoib_peer *src;
211
-
212
-	/* Add link-layer addresses to cache */
213
-	dest = ipoib_cache_peer ( dest_mac );
214
-	src = ipoib_cache_peer ( src_mac );
215
-
216
-	/* Build IPoIB header */
217
-	ipoib_hdr->proto = net_proto;
218
-	ipoib_hdr->u.peer.dest = dest->key;
219
-	ipoib_hdr->u.peer.src = src->key;
220
 
192
 
221
-	return 0;
193
+	list_for_each_entry_safe ( peer, tmp, &ipoib->peers, list ) {
194
+		list_del ( &peer->list );
195
+		free ( peer );
196
+	}
222
 }
197
 }
223
 
198
 
224
 /**
199
 /**
225
- * Remove IPoIB link-layer header
200
+ * Discard some entries from the REMAC cache
226
  *
201
  *
227
- * @v netdev		Network device
202
+ * @ret discarded	Number of cached items discarded
228
- * @v iobuf		I/O buffer
229
- * @ret ll_dest		Link-layer destination address
230
- * @ret ll_source	Source link-layer address
231
- * @ret net_proto	Network-layer protocol, in network-byte order
232
- * @ret flags		Packet flags
233
- * @ret rc		Return status code
234
  */
203
  */
235
-static int ipoib_pull ( struct net_device *netdev,
204
+static unsigned int ipoib_discard_remac ( void ) {
236
-			struct io_buffer *iobuf, const void **ll_dest,
205
+	struct ib_device *ibdev;
237
-			const void **ll_source, uint16_t *net_proto,
206
+	struct ipoib_device *ipoib;
238
-			unsigned int *flags ) {
207
+	struct ipoib_peer *peer;
239
-	struct ipoib_device *ipoib = netdev->priv;
208
+	unsigned int discarded = 0;
240
-	struct ipoib_hdr *ipoib_hdr = iobuf->data;
209
+
241
-	struct ipoib_peer *dest;
210
+	/* Try to discard one cache entry for each IPoIB device */
242
-	struct ipoib_peer *source;
211
+	for_each_ibdev ( ibdev ) {
243
-
212
+		ipoib = ib_get_ownerdata ( ibdev );
244
-	/* Sanity check */
213
+		list_for_each_entry_reverse ( peer, &ipoib->peers, list ) {
245
-	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
214
+			list_del ( &peer->list );
246
-		DBG ( "IPoIB packet too short for link-layer header\n" );
215
+			free ( peer );
247
-		DBG_HD ( iobuf->data, iob_len ( iobuf ) );
216
+			discarded++;
248
-		return -EINVAL;
217
+			break;
218
+		}
249
 	}
219
 	}
250
 
220
 
251
-	/* Strip off IPoIB header */
221
+	return discarded;
252
-	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
222
+}
253
-
254
-	/* Identify source and destination addresses, and clear
255
-	 * reserved word in IPoIB header
256
-	 */
257
-	dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
258
-	source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
259
-	ipoib_hdr->u.reserved = 0;
260
 
223
 
261
-	/* Fill in required fields */
224
+/** IPoIB cache discarder */
262
-	*ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
225
+struct cache_discarder ipoib_discarder __cache_discarder ( CACHE_NORMAL ) = {
263
-	*ll_source = ( source ? &source->mac : &ipoib->broadcast );
226
+	.discard = ipoib_discard_remac,
264
-	*net_proto = ipoib_hdr->proto;
227
+};
265
-	*flags = ( ( *ll_dest == &ipoib->broadcast ) ?
266
-		   ( LL_MULTICAST | LL_BROADCAST ) : 0 );
267
 
228
 
268
-	return 0;
229
+/****************************************************************************
269
-}
230
+ *
231
+ * IPoIB link layer
232
+ *
233
+ ****************************************************************************
234
+ */
270
 
235
 
271
 /**
236
 /**
272
  * Initialise IPoIB link-layer address
237
  * Initialise IPoIB link-layer address
275
  * @v ll_addr		Link-layer address
240
  * @v ll_addr		Link-layer address
276
  */
241
  */
277
 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
242
 static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
278
-	const union ib_guid *guid = hw_addr;
243
+	const uint8_t *guid = hw_addr;
279
-	struct ipoib_mac *mac = ll_addr;
244
+	uint8_t *eth_addr = ll_addr;
245
+	uint8_t guid_mask = IPOIB_GUID_MASK;
246
+	unsigned int i;
280
 
247
 
281
-	memset ( mac, 0, sizeof ( *mac ) );
248
+	/* Extract bytes from GUID according to mask */
282
-	memcpy ( &mac->gid.s.guid, guid, sizeof ( mac->gid.s.guid ) );
249
+	for ( i = 0 ; i < 8 ; i++, guid++, guid_mask <<= 1 ) {
250
+		if ( guid_mask & 0x80 )
251
+			*(eth_addr++) = *guid;
252
+	}
283
 }
253
 }
284
 
254
 
255
+/** IPoIB protocol */
256
+struct ll_protocol ipoib_protocol __ll_protocol = {
257
+	.name		= "IPoIB",
258
+	.ll_proto	= htons ( ARPHRD_ETHER ),
259
+	.hw_addr_len	= sizeof ( union ib_guid ),
260
+	.ll_addr_len	= ETH_ALEN,
261
+	.ll_header_len	= ETH_HLEN,
262
+	.push		= eth_push,
263
+	.pull		= eth_pull,
264
+	.init_addr	= ipoib_init_addr,
265
+	.ntoa		= eth_ntoa,
266
+	.mc_hash	= eth_mc_hash,
267
+	.eth_addr	= eth_eth_addr,
268
+	.flags		= LL_NAME_ONLY,
269
+};
270
+
285
 /**
271
 /**
286
- * Transcribe IPoIB link-layer address
272
+ * Allocate IPoIB device
287
  *
273
  *
288
- * @v ll_addr	Link-layer address
274
+ * @v priv_size		Size of driver private data
289
- * @ret string	Link-layer address in human-readable format
275
+ * @ret netdev		Network device, or NULL
290
  */
276
  */
291
-const char * ipoib_ntoa ( const void *ll_addr ) {
277
+struct net_device * alloc_ipoibdev ( size_t priv_size ) {
292
-	static char buf[45];
278
+	struct net_device *netdev;
293
-	const struct ipoib_mac *mac = ll_addr;
279
+
294
-
280
+	netdev = alloc_netdev ( priv_size );
295
-	snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
281
+	if ( netdev ) {
296
-		   htonl ( mac->flags__qpn ), htonl ( mac->gid.dwords[0] ),
282
+		netdev->ll_protocol = &ipoib_protocol;
297
-		   htonl ( mac->gid.dwords[1] ),
283
+		netdev->ll_broadcast = eth_broadcast;
298
-		   htonl ( mac->gid.dwords[2] ),
284
+		netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
299
-		   htonl ( mac->gid.dwords[3] ) );
285
+	}
300
-	return buf;
286
+	return netdev;
301
 }
287
 }
302
 
288
 
289
+/****************************************************************************
290
+ *
291
+ * IPoIB translation layer
292
+ *
293
+ ****************************************************************************
294
+ */
295
+
303
 /**
296
 /**
304
- * Hash multicast address
297
+ * Translate transmitted ARP packet
305
  *
298
  *
306
- * @v af		Address family
299
+ * @v netdev		Network device
307
- * @v net_addr		Network-layer address
300
+ * @v iobuf		Packet to be transmitted (with no link-layer headers)
308
- * @v ll_addr		Link-layer address to fill in
309
  * @ret rc		Return status code
301
  * @ret rc		Return status code
310
  */
302
  */
311
-static int ipoib_mc_hash ( unsigned int af __unused,
303
+static int ipoib_translate_tx_arp ( struct net_device *netdev,
312
-			   const void *net_addr __unused,
304
+				    struct io_buffer *iobuf ) {
313
-			   void *ll_addr __unused ) {
305
+	struct ipoib_device *ipoib = netdev->priv;
306
+	struct arphdr *arphdr = iobuf->data;
307
+	struct ipoib_mac *target_ha = NULL;
308
+	void *sender_pa;
309
+	void *target_pa;
310
+
311
+	/* Do nothing unless ARP contains eIPoIB link-layer addresses */
312
+	if ( arphdr->ar_hln != ETH_ALEN )
313
+		return 0;
314
+
315
+	/* Fail unless we have room to expand packet */
316
+	if ( iob_tailroom ( iobuf ) < ( 2 * ( sizeof ( ipoib->mac ) -
317
+					      ETH_ALEN ) ) ) {
318
+		DBGC ( ipoib, "IPoIB %p insufficient space in TX ARP\n",
319
+		       ipoib );
320
+		return -ENOBUFS;
321
+	}
314
 
322
 
315
-	return -ENOTSUP;
323
+	/* Look up REMAC, if applicable */
324
+	if ( arphdr->ar_op == ARPOP_REPLY ) {
325
+		target_ha = ipoib_find_remac ( ipoib, arp_target_pa ( arphdr ));
326
+		if ( ! target_ha )
327
+			return -ENXIO;
328
+	}
329
+
330
+	/* Construct new packet */
331
+	iob_put ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
332
+	sender_pa = arp_sender_pa ( arphdr );
333
+	target_pa = arp_target_pa ( arphdr );
334
+	arphdr->ar_hrd = htons ( ARPHRD_INFINIBAND );
335
+	arphdr->ar_hln = sizeof ( ipoib->mac );
336
+	memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
337
+	memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
338
+	memcpy ( arp_sender_ha ( arphdr ), &ipoib->mac, sizeof ( ipoib->mac ) );
339
+	memset ( arp_target_ha ( arphdr ), 0, sizeof ( ipoib->mac ) );
340
+	if ( target_ha ) {
341
+		memcpy ( arp_target_ha ( arphdr ), target_ha,
342
+			 sizeof ( *target_ha ) );
343
+	}
344
+
345
+	return 0;
316
 }
346
 }
317
 
347
 
318
 /**
348
 /**
319
- * Generate Mellanox Ethernet-compatible compressed link-layer address
349
+ * Translate transmitted packet
320
  *
350
  *
321
- * @v ll_addr		Link-layer address
351
+ * @v netdev		Network device
322
- * @v eth_addr		Ethernet-compatible address to fill in
352
+ * @v iobuf		Packet to be transmitted (with no link-layer headers)
353
+ * @v net_proto		Network-layer protocol (in network byte order)
354
+ * @ret rc		Return status code
323
  */
355
  */
324
-static int ipoib_mlx_eth_addr ( const union ib_guid *guid,
356
+static int ipoib_translate_tx ( struct net_device *netdev,
325
-				uint8_t *eth_addr ) {
357
+				struct io_buffer *iobuf, uint16_t net_proto ) {
326
-	eth_addr[0] = ( ( guid->bytes[3] == 2 ) ? 0x00 : 0x02 );
358
+
327
-	eth_addr[1] = guid->bytes[1];
359
+	switch ( net_proto ) {
328
-	eth_addr[2] = guid->bytes[2];
360
+	case htons ( ETH_P_ARP ) :
329
-	eth_addr[3] = guid->bytes[5];
361
+		return ipoib_translate_tx_arp ( netdev, iobuf );
330
-	eth_addr[4] = guid->bytes[6];
362
+	case htons ( ETH_P_IP ) :
331
-	eth_addr[5] = guid->bytes[7];
363
+		/* No translation needed */
332
-	return 0;
364
+		return 0;
365
+	default:
366
+		/* Cannot handle other traffic via eIPoIB */
367
+		return -ENOTSUP;
368
+	}
333
 }
369
 }
334
 
370
 
335
-/** An IPoIB Ethernet-compatible compressed link-layer address generator */
336
-struct ipoib_eth_addr_handler {
337
-	/** GUID byte 1 */
338
-	uint8_t byte1;
339
-	/** GUID byte 2 */
340
-	uint8_t byte2;
341
-	/** Handler */
342
-	int ( * eth_addr ) ( const union ib_guid *guid,
343
-			     uint8_t *eth_addr );
344
-};
345
-
346
-/** IPoIB Ethernet-compatible compressed link-layer address generators */
347
-static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
348
-	{ 0x02, 0xc9, ipoib_mlx_eth_addr },
349
-};
350
-
351
 /**
371
 /**
352
- * Generate Ethernet-compatible compressed link-layer address
372
+ * Translate received ARP packet
353
  *
373
  *
354
- * @v ll_addr		Link-layer address
374
+ * @v netdev		Network device
355
- * @v eth_addr		Ethernet-compatible address to fill in
375
+ * @v iobuf		Received packet (with no link-layer headers)
376
+ * @v remac		Constructed Remote Ethernet MAC
377
+ * @ret rc		Return status code
356
  */
378
  */
357
-static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
379
+static int ipoib_translate_rx_arp ( struct net_device *netdev,
358
-	const struct ipoib_mac *ipoib_addr = ll_addr;
380
+				    struct io_buffer *iobuf,
359
-	const union ib_guid *guid = &ipoib_addr->gid.s.guid;
381
+				    struct ipoib_remac *remac ) {
360
-	struct ipoib_eth_addr_handler *handler;
382
+	struct ipoib_device *ipoib = netdev->priv;
361
-	unsigned int i;
383
+	struct arphdr *arphdr = iobuf->data;
384
+	void *sender_pa;
385
+	void *target_pa;
386
+	int rc;
362
 
387
 
363
-	for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
388
+	/* Do nothing unless ARP contains IPoIB link-layer addresses */
364
-			    sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
389
+	if ( arphdr->ar_hln != sizeof ( ipoib->mac ) )
365
-		handler = &ipoib_eth_addr_handlers[i];
390
+		return 0;
366
-		if ( ( handler->byte1 == guid->bytes[1] ) &&
391
+
367
-		     ( handler->byte2 == guid->bytes[2] ) ) {
392
+	/* Create REMAC cache entry */
368
-			return handler->eth_addr ( guid, eth_addr );
393
+	if ( ( rc = ipoib_map_remac ( ipoib, remac,
369
-		}
394
+				      arp_sender_ha ( arphdr ) ) ) != 0 ) {
395
+		DBGC ( ipoib, "IPoIB %p could not map REMAC: %s\n",
396
+		       ipoib, strerror ( rc ) );
397
+		return rc;
370
 	}
398
 	}
371
-	return -ENOTSUP;
372
-}
373
 
399
 
374
-/** IPoIB protocol */
400
+	/* Construct new packet */
375
-struct ll_protocol ipoib_protocol __ll_protocol = {
401
+	sender_pa = arp_sender_pa ( arphdr );
376
-	.name		= "IPoIB",
402
+	target_pa = arp_target_pa ( arphdr );
377
-	.ll_proto	= htons ( ARPHRD_INFINIBAND ),
403
+	arphdr->ar_hrd = htons ( ARPHRD_ETHER );
378
-	.hw_addr_len	= sizeof ( union ib_guid ),
404
+	arphdr->ar_hln = ETH_ALEN;
379
-	.ll_addr_len	= IPOIB_ALEN,
405
+	memcpy ( arp_sender_pa ( arphdr ), sender_pa, arphdr->ar_pln );
380
-	.ll_header_len	= IPOIB_HLEN,
406
+	memcpy ( arp_target_pa ( arphdr ), target_pa, arphdr->ar_pln );
381
-	.push		= ipoib_push,
407
+	memcpy ( arp_sender_ha ( arphdr ), remac, ETH_ALEN );
382
-	.pull		= ipoib_pull,
408
+	memset ( arp_target_ha ( arphdr ), 0, ETH_ALEN );
383
-	.init_addr	= ipoib_init_addr,
409
+	if ( arphdr->ar_op == ARPOP_REPLY ) {
384
-	.ntoa		= ipoib_ntoa,
410
+		/* Assume received replies were directed to us */
385
-	.mc_hash	= ipoib_mc_hash,
411
+		memcpy ( arp_target_ha ( arphdr ), netdev->ll_addr, ETH_ALEN );
386
-	.eth_addr	= ipoib_eth_addr,
412
+	}
387
-};
413
+	iob_unput ( iobuf, ( 2 * ( sizeof ( ipoib->mac ) - ETH_ALEN ) ) );
414
+
415
+	return 0;
416
+}
388
 
417
 
389
 /**
418
 /**
390
- * Allocate IPoIB device
419
+ * Translate received packet
391
  *
420
  *
392
- * @v priv_size		Size of driver private data
421
+ * @v netdev		Network device
393
- * @ret netdev		Network device, or NULL
422
+ * @v iobuf		Received packet (with no link-layer headers)
423
+ * @v remac		Constructed Remote Ethernet MAC
424
+ * @v net_proto		Network-layer protocol (in network byte order)
425
+ * @ret rc		Return status code
394
  */
426
  */
395
-struct net_device * alloc_ipoibdev ( size_t priv_size ) {
427
+static int ipoib_translate_rx ( struct net_device *netdev,
396
-	struct net_device *netdev;
428
+				struct io_buffer *iobuf,
397
-
429
+				struct ipoib_remac *remac,
398
-	netdev = alloc_netdev ( priv_size );
430
+				uint16_t net_proto ) {
399
-	if ( netdev ) {
431
+
400
-		netdev->ll_protocol = &ipoib_protocol;
432
+	switch ( net_proto ) {
401
-		netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
433
+	case htons ( ETH_P_ARP ) :
402
-		netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
434
+		return ipoib_translate_rx_arp ( netdev, iobuf, remac );
435
+	case htons ( ETH_P_IP ) :
436
+		/* No translation needed */
437
+		return 0;
438
+	default:
439
+		/* Cannot handle other traffic via eIPoIB */
440
+		return -ENOTSUP;
403
 	}
441
 	}
404
-	return netdev;
405
 }
442
 }
406
 
443
 
407
 /****************************************************************************
444
 /****************************************************************************
422
 			    struct io_buffer *iobuf ) {
459
 			    struct io_buffer *iobuf ) {
423
 	struct ipoib_device *ipoib = netdev->priv;
460
 	struct ipoib_device *ipoib = netdev->priv;
424
 	struct ib_device *ibdev = ipoib->ibdev;
461
 	struct ib_device *ibdev = ipoib->ibdev;
462
+	struct ethhdr *ethhdr;
425
 	struct ipoib_hdr *ipoib_hdr;
463
 	struct ipoib_hdr *ipoib_hdr;
426
-	struct ipoib_peer *peer;
464
+	struct ipoib_mac *mac;
427
 	struct ib_address_vector dest;
465
 	struct ib_address_vector dest;
466
+	uint16_t net_proto;
428
 	int rc;
467
 	int rc;
429
 
468
 
430
 	/* Sanity check */
469
 	/* Sanity check */
431
-	if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
470
+	if ( iob_len ( iobuf ) < sizeof ( *ethhdr ) ) {
432
 		DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
471
 		DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
433
 		return -EINVAL;
472
 		return -EINVAL;
434
 	}
473
 	}
435
-	ipoib_hdr = iobuf->data;
436
 
474
 
437
 	/* Attempting transmission while link is down will put the
475
 	/* Attempting transmission while link is down will put the
438
 	 * queue pair into an error state, so don't try it.
476
 	 * queue pair into an error state, so don't try it.
440
 	if ( ! ib_link_ok ( ibdev ) )
478
 	if ( ! ib_link_ok ( ibdev ) )
441
 		return -ENETUNREACH;
479
 		return -ENETUNREACH;
442
 
480
 
481
+	/* Strip eIPoIB header */
482
+	ethhdr = iobuf->data;
483
+	net_proto = ethhdr->h_protocol;
484
+	iob_pull ( iobuf, sizeof ( *ethhdr ) );
485
+
443
 	/* Identify destination address */
486
 	/* Identify destination address */
444
-	peer = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
487
+	mac = ipoib_find_remac ( ipoib, ( ( void *) ethhdr->h_dest ) );
445
-	if ( ! peer )
488
+	if ( ! mac )
446
 		return -ENXIO;
489
 		return -ENXIO;
447
-	ipoib_hdr->u.reserved = 0;
490
+
491
+	/* Translate packet if applicable */
492
+	if ( ( rc = ipoib_translate_tx ( netdev, iobuf, net_proto ) ) != 0 )
493
+		return rc;
494
+
495
+	/* Prepend real IPoIB header */
496
+	ipoib_hdr = iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
497
+	ipoib_hdr->proto = net_proto;
498
+	ipoib_hdr->reserved = 0;
448
 
499
 
449
 	/* Construct address vector */
500
 	/* Construct address vector */
450
 	memset ( &dest, 0, sizeof ( dest ) );
501
 	memset ( &dest, 0, sizeof ( dest ) );
451
-	dest.qpn = ( ntohl ( peer->mac.flags__qpn ) & IB_QPN_MASK );
502
+	dest.qpn = ( ntohl ( mac->flags__qpn ) & IB_QPN_MASK );
452
 	dest.gid_present = 1;
503
 	dest.gid_present = 1;
453
-	memcpy ( &dest.gid, &peer->mac.gid, sizeof ( dest.gid ) );
504
+	memcpy ( &dest.gid, &mac->gid, sizeof ( dest.gid ) );
454
 	if ( ( rc = ib_resolve_path ( ibdev, &dest ) ) != 0 ) {
505
 	if ( ( rc = ib_resolve_path ( ibdev, &dest ) ) != 0 ) {
455
 		/* Path not resolved yet */
506
 		/* Path not resolved yet */
456
 		return rc;
507
 		return rc;
487
  */
538
  */
488
 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
539
 static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
489
 				  struct ib_queue_pair *qp,
540
 				  struct ib_queue_pair *qp,
490
-				  struct ib_address_vector *dest __unused,
541
+				  struct ib_address_vector *dest,
491
 				  struct ib_address_vector *source,
542
 				  struct ib_address_vector *source,
492
 				  struct io_buffer *iobuf, int rc ) {
543
 				  struct io_buffer *iobuf, int rc ) {
493
 	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
544
 	struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
494
 	struct net_device *netdev = ipoib->netdev;
545
 	struct net_device *netdev = ipoib->netdev;
495
 	struct ipoib_hdr *ipoib_hdr;
546
 	struct ipoib_hdr *ipoib_hdr;
496
-	struct ipoib_mac ll_src;
547
+	struct ethhdr *ethhdr;
497
-	struct ipoib_peer *src;
548
+	struct ipoib_remac remac;
549
+	uint16_t net_proto;
498
 
550
 
499
 	/* Record errors */
551
 	/* Record errors */
500
 	if ( rc != 0 ) {
552
 	if ( rc != 0 ) {
510
 		netdev_rx_err ( netdev, iobuf, -EIO );
562
 		netdev_rx_err ( netdev, iobuf, -EIO );
511
 		return;
563
 		return;
512
 	}
564
 	}
513
-	ipoib_hdr = iobuf->data;
514
 	if ( ! source ) {
565
 	if ( ! source ) {
515
 		DBGC ( ipoib, "IPoIB %p received packet without address "
566
 		DBGC ( ipoib, "IPoIB %p received packet without address "
516
 		       "vector\n", ipoib );
567
 		       "vector\n", ipoib );
518
 		return;
569
 		return;
519
 	}
570
 	}
520
 
571
 
521
-	/* Parse source address */
572
+	/* Strip real IPoIB header */
522
-	if ( source->gid_present ) {
573
+	ipoib_hdr = iobuf->data;
523
-		ll_src.flags__qpn = htonl ( source->qpn );
574
+	net_proto = ipoib_hdr->proto;
524
-		memcpy ( &ll_src.gid, &source->gid, sizeof ( ll_src.gid ) );
575
+	iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
525
-		src = ipoib_cache_peer ( &ll_src );
576
+
526
-		ipoib_hdr->u.peer.src = src->key;
577
+	/* Construct source address from remote QPN and LID */
578
+	remac.qpn = htonl ( source->qpn | EIPOIB_QPN_LA );
579
+	remac.lid = htons ( source->lid );
580
+
581
+	/* Translate packet if applicable */
582
+	if ( ( rc = ipoib_translate_rx ( netdev, iobuf, &remac,
583
+					 net_proto ) ) != 0 ) {
584
+		netdev_rx_err ( netdev, iobuf, rc );
585
+		return;
586
+	}
587
+
588
+	/* Prepend eIPoIB header */
589
+	ethhdr = iob_push ( iobuf, sizeof ( *ethhdr ) );
590
+	memcpy ( &ethhdr->h_source, &remac, sizeof ( ethhdr->h_source ) );
591
+	ethhdr->h_protocol = net_proto;
592
+
593
+	/* Construct destination address */
594
+	if ( dest->gid_present && ( memcmp ( &dest->gid, &ipoib->broadcast.gid,
595
+					     sizeof ( dest->gid ) ) == 0 ) ) {
596
+		/* Broadcast GID; use the Ethernet broadcast address */
597
+		memcpy ( &ethhdr->h_dest, eth_broadcast,
598
+			 sizeof ( ethhdr->h_dest ) );
599
+	} else {
600
+		/* Assume destination address is local Ethernet MAC */
601
+		memcpy ( &ethhdr->h_dest, netdev->ll_addr,
602
+			 sizeof ( ethhdr->h_dest ) );
527
 	}
603
 	}
528
 
604
 
529
 	/* Hand off to network layer */
605
 	/* Hand off to network layer */
536
 	.complete_recv = ipoib_complete_recv,
612
 	.complete_recv = ipoib_complete_recv,
537
 };
613
 };
538
 
614
 
615
+/**
616
+ * Allocate IPoIB receive I/O buffer
617
+ *
618
+ * @v len		Length of buffer
619
+ * @ret iobuf		I/O buffer, or NULL
620
+ *
621
+ * Some Infiniband hardware requires 2kB alignment of receive buffers
622
+ * and provides no way to disable header separation.  The result is
623
+ * that there are only four bytes of link-layer header (the real IPoIB
624
+ * header) before the payload.  This is not sufficient space to insert
625
+ * an eIPoIB link-layer pseudo-header.
626
+ *
627
+ * We therefore allocate I/O buffers offset to start slightly before
628
+ * the natural alignment boundary, in order to allow sufficient space.
629
+ */
630
+static struct io_buffer * ipoib_alloc_iob ( size_t len ) {
631
+	struct io_buffer *iobuf;
632
+	size_t reserve_len;
633
+
634
+	/* Calculate additional length required at start of buffer */
635
+	reserve_len = ( sizeof ( struct ethhdr ) -
636
+			sizeof ( struct ipoib_hdr ) );
637
+
638
+	/* Allocate buffer */
639
+	iobuf = alloc_iob_raw ( ( len + reserve_len ), len, -reserve_len );
640
+	if ( iobuf ) {
641
+		iob_reserve ( iobuf, reserve_len );
642
+	}
643
+	return iobuf;
644
+}
645
+
539
 /** IPoIB queue pair operations */
646
 /** IPoIB queue pair operations */
540
 static struct ib_queue_pair_operations ipoib_qp_op = {
647
 static struct ib_queue_pair_operations ipoib_qp_op = {
541
-	.alloc_iob = alloc_iob,
648
+	.alloc_iob = ipoib_alloc_iob,
542
 };
649
 };
543
 
650
 
544
 /**
651
 /**
550
 	struct ipoib_device *ipoib = netdev->priv;
657
 	struct ipoib_device *ipoib = netdev->priv;
551
 	struct ib_device *ibdev = ipoib->ibdev;
658
 	struct ib_device *ibdev = ipoib->ibdev;
552
 
659
 
660
+	/* Poll Infiniband device */
553
 	ib_poll_eq ( ibdev );
661
 	ib_poll_eq ( ibdev );
662
+
663
+	/* Poll the retry timers (required for IPoIB multicast join) */
664
+	retry_poll();
554
 }
665
 }
555
 
666
 
556
 /**
667
 /**
617
 static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
728
 static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
618
 	struct net_device *netdev = ib_get_ownerdata ( ibdev );
729
 	struct net_device *netdev = ib_get_ownerdata ( ibdev );
619
 	struct ipoib_device *ipoib = netdev->priv;
730
 	struct ipoib_device *ipoib = netdev->priv;
620
-	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
621
 	int rc;
731
 	int rc;
622
 
732
 
623
 	/* Leave existing broadcast group */
733
 	/* Leave existing broadcast group */
624
 	ipoib_leave_broadcast_group ( ipoib );
734
 	ipoib_leave_broadcast_group ( ipoib );
625
 
735
 
626
 	/* Update MAC address based on potentially-new GID prefix */
736
 	/* Update MAC address based on potentially-new GID prefix */
627
-	memcpy ( &mac->gid.s.prefix, &ibdev->gid.s.prefix,
737
+	memcpy ( &ipoib->mac.gid.s.prefix, &ibdev->gid.s.prefix,
628
-		 sizeof ( mac->gid.s.prefix ) );
738
+		 sizeof ( ipoib->mac.gid.s.prefix ) );
629
 
739
 
630
 	/* Update broadcast GID based on potentially-new partition key */
740
 	/* Update broadcast GID based on potentially-new partition key */
631
 	ipoib->broadcast.gid.words[2] =
741
 	ipoib->broadcast.gid.words[2] =
654
 static int ipoib_open ( struct net_device *netdev ) {
764
 static int ipoib_open ( struct net_device *netdev ) {
655
 	struct ipoib_device *ipoib = netdev->priv;
765
 	struct ipoib_device *ipoib = netdev->priv;
656
 	struct ib_device *ibdev = ipoib->ibdev;
766
 	struct ib_device *ibdev = ipoib->ibdev;
657
-	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
658
 	int rc;
767
 	int rc;
659
 
768
 
660
 	/* Open IB device */
769
 	/* Open IB device */
686
 	ib_qp_set_ownerdata ( ipoib->qp, ipoib );
795
 	ib_qp_set_ownerdata ( ipoib->qp, ipoib );
687
 
796
 
688
 	/* Update MAC address with QPN */
797
 	/* Update MAC address with QPN */
689
-	mac->flags__qpn = htonl ( ipoib->qp->qpn );
798
+	ipoib->mac.flags__qpn = htonl ( ipoib->qp->qpn );
690
 
799
 
691
 	/* Fill receive rings */
800
 	/* Fill receive rings */
692
 	ib_refill_recv ( ibdev, ipoib->qp );
801
 	ib_refill_recv ( ibdev, ipoib->qp );
713
 static void ipoib_close ( struct net_device *netdev ) {
822
 static void ipoib_close ( struct net_device *netdev ) {
714
 	struct ipoib_device *ipoib = netdev->priv;
823
 	struct ipoib_device *ipoib = netdev->priv;
715
 	struct ib_device *ibdev = ipoib->ibdev;
824
 	struct ib_device *ibdev = ipoib->ibdev;
716
-	struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
825
+
826
+	/* Flush REMAC cache */
827
+	ipoib_flush_remac ( ipoib );
717
 
828
 
718
 	/* Leave broadcast group */
829
 	/* Leave broadcast group */
719
 	ipoib_leave_broadcast_group ( ipoib );
830
 	ipoib_leave_broadcast_group ( ipoib );
720
 
831
 
721
 	/* Remove QPN from MAC address */
832
 	/* Remove QPN from MAC address */
722
-	mac->flags__qpn = 0;
833
+	ipoib->mac.flags__qpn = 0;
723
 
834
 
724
 	/* Tear down the queues */
835
 	/* Tear down the queues */
725
 	ib_destroy_qp ( ibdev, ipoib->qp );
836
 	ib_destroy_qp ( ibdev, ipoib->qp );
759
 	memset ( ipoib, 0, sizeof ( *ipoib ) );
870
 	memset ( ipoib, 0, sizeof ( *ipoib ) );
760
 	ipoib->netdev = netdev;
871
 	ipoib->netdev = netdev;
761
 	ipoib->ibdev = ibdev;
872
 	ipoib->ibdev = ibdev;
873
+	INIT_LIST_HEAD ( &ipoib->peers );
762
 
874
 
763
 	/* Extract hardware address */
875
 	/* Extract hardware address */
764
 	memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
876
 	memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
765
 		 sizeof ( ibdev->gid.s.guid ) );
877
 		 sizeof ( ibdev->gid.s.guid ) );
766
 
878
 
767
-	/* Set default broadcast address */
879
+	/* Set local MAC address */
880
+	memcpy ( &ipoib->mac.gid.s.guid, &ibdev->gid.s.guid,
881
+		 sizeof ( ipoib->mac.gid.s.guid ) );
882
+
883
+	/* Set default broadcast MAC address */
768
 	memcpy ( &ipoib->broadcast, &ipoib_broadcast,
884
 	memcpy ( &ipoib->broadcast, &ipoib_broadcast,
769
 		 sizeof ( ipoib->broadcast ) );
885
 		 sizeof ( ipoib->broadcast ) );
770
-	netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
771
 
886
 
772
 	/* Register network device */
887
 	/* Register network device */
773
 	if ( ( rc = register_netdev ( netdev ) ) != 0 )
888
 	if ( ( rc = register_netdev ( netdev ) ) != 0 )

+ 0
- 2
src/include/ipxe/dhcp.h View File

651
 #define PXEBS_SETTINGS_NAME "pxebs"
651
 #define PXEBS_SETTINGS_NAME "pxebs"
652
 
652
 
653
 extern uint32_t dhcp_last_xid;
653
 extern uint32_t dhcp_last_xid;
654
-extern unsigned int dhcp_chaddr ( struct net_device *netdev, void *chaddr,
655
-				  uint16_t *flags );
656
 extern int dhcp_create_packet ( struct dhcp_packet *dhcppkt,
654
 extern int dhcp_create_packet ( struct dhcp_packet *dhcppkt,
657
 				struct net_device *netdev, uint8_t msgtype,
655
 				struct net_device *netdev, uint8_t msgtype,
658
 				uint32_t xid, const void *options,
656
 				uint32_t xid, const void *options,

+ 25
- 17
src/include/ipxe/ipoib.h View File

8
 
8
 
9
 FILE_LICENCE ( GPL2_OR_LATER );
9
 FILE_LICENCE ( GPL2_OR_LATER );
10
 
10
 
11
+#include <ipxe/if_arp.h>
11
 #include <ipxe/infiniband.h>
12
 #include <ipxe/infiniband.h>
12
 
13
 
13
 /** IPoIB MAC address length */
14
 /** IPoIB MAC address length */
33
 	/** Network-layer protocol */
34
 	/** Network-layer protocol */
34
 	uint16_t proto;
35
 	uint16_t proto;
35
 	/** Reserved, must be zero */
36
 	/** Reserved, must be zero */
36
-	union {
37
+	uint16_t reserved;
37
-		/** Reserved, must be zero */
38
-		uint16_t reserved;
39
-		/** Peer addresses
40
-		 *
41
-		 * We use these fields internally to represent the
42
-		 * peer addresses using a lookup key.  There simply
43
-		 * isn't enough room in the IPoIB header to store
44
-		 * literal source or destination MAC addresses.
45
-		 */
46
-		struct {
47
-			/** Destination address key */
48
-			uint8_t dest;
49
-			/** Source address key */
50
-			uint8_t src;
51
-		} __attribute__ (( packed )) peer;
52
-	} __attribute__ (( packed )) u;
53
 } __attribute__ (( packed ));
38
 } __attribute__ (( packed ));
54
 
39
 
40
+/** GUID mask used for constructing eIPoIB Local Ethernet MAC address (LEMAC) */
41
+#define IPOIB_GUID_MASK 0xe7
42
+
43
+/** eIPoIB Remote Ethernet MAC address
44
+ *
45
+ * An eIPoIB REMAC address is an Ethernet-like (6 byte) link-layer
46
+ * pseudo-address used to look up a full IPoIB link-layer address.
47
+ */
48
+struct ipoib_remac {
49
+	/** Remote QPN
50
+	 *
51
+	 * Must be ORed with EIPOIB_QPN_LA so that eIPoIB REMAC
52
+	 * addresses are considered as locally-assigned Ethernet MAC
53
+	 * addreses.
54
+	 */
55
+	uint32_t qpn;
56
+	/** Remote LID */
57
+	uint16_t lid;
58
+} __attribute__ (( packed ));
59
+
60
+/** eIPoIB REMAC locally-assigned address indicator */
61
+#define EIPOIB_QPN_LA 0x02000000UL
62
+
55
 extern const char * ipoib_ntoa ( const void *ll_addr );
63
 extern const char * ipoib_ntoa ( const void *ll_addr );
56
 extern struct net_device * alloc_ipoibdev ( size_t priv_size );
64
 extern struct net_device * alloc_ipoibdev ( size_t priv_size );
57
 
65
 

+ 9
- 0
src/include/ipxe/netdevice.h View File

188
 	uint8_t ll_addr_len;
188
 	uint8_t ll_addr_len;
189
 	/** Link-layer header length */
189
 	/** Link-layer header length */
190
 	uint8_t ll_header_len;
190
 	uint8_t ll_header_len;
191
+	/** Flags */
192
+	unsigned int flags;
191
 };
193
 };
192
 
194
 
195
+/** Local link-layer address functions only as a name
196
+ *
197
+ * This flag indicates that the local link-layer address cannot
198
+ * directly be used as a destination address by a remote node.
199
+ */
200
+#define LL_NAME_ONLY 0x0001
201
+
193
 /** Network device operations */
202
 /** Network device operations */
194
 struct net_device_operations {
203
 struct net_device_operations {
195
 	/** Open network device
204
 	/** Open network device

+ 2
- 1
src/interface/efi/efi_snp.c View File

590
 
590
 
591
 	/* Allocate buffer */
591
 	/* Allocate buffer */
592
 	ll_headroom = ( MAX_LL_HEADER_LEN - ll_header_len );
592
 	ll_headroom = ( MAX_LL_HEADER_LEN - ll_header_len );
593
-	iobuf = alloc_iob ( ll_headroom + len );
593
+	iobuf = alloc_iob ( ll_headroom +
594
+			    ( ( len > IOB_ZLEN ) ? len : IOB_ZLEN ) );
594
 	if ( ! iobuf ) {
595
 	if ( ! iobuf ) {
595
 		DBGC ( snpdev, "SNPDEV %p TX could not allocate %ld-byte "
596
 		DBGC ( snpdev, "SNPDEV %p TX could not allocate %ld-byte "
596
 		       "buffer\n", snpdev, ( ( unsigned long ) len ) );
597
 		       "buffer\n", snpdev, ( ( unsigned long ) len ) );

+ 10
- 2
src/net/udp/dhcp.c View File

938
 	dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE );
938
 	dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE );
939
 	dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto );
939
 	dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto );
940
 	dhcphdr->op = dhcp_op[msgtype];
940
 	dhcphdr->op = dhcp_op[msgtype];
941
-	dhcphdr->hlen = dhcp_chaddr ( netdev, dhcphdr->chaddr,
941
+	dhcphdr->hlen = netdev->ll_protocol->ll_addr_len;
942
-				      &dhcphdr->flags );
942
+	memcpy ( dhcphdr->chaddr, netdev->ll_addr,
943
+		 netdev->ll_protocol->ll_addr_len );
943
 	memcpy ( dhcphdr->options, options, options_len );
944
 	memcpy ( dhcphdr->options, options, options_len );
944
 
945
 
946
+	/* If the local link-layer address functions only as a name
947
+	 * (i.e. cannot be used as a destination address), then
948
+	 * request broadcast responses.
949
+	 */
950
+	if ( netdev->ll_protocol->flags & LL_NAME_ONLY )
951
+		dhcphdr->flags |= htons ( BOOTP_FL_BROADCAST );
952
+
945
 	/* If the network device already has an IPv4 address then
953
 	/* If the network device already has an IPv4 address then
946
 	 * unicast responses from the DHCP server may be rejected, so
954
 	 * unicast responses from the DHCP server may be rejected, so
947
 	 * request broadcast responses.
955
 	 * request broadcast responses.

+ 2
- 10
src/usr/dhcpmgmt.c View File

38
  */
38
  */
39
 
39
 
40
 int dhcp ( struct net_device *netdev ) {
40
 int dhcp ( struct net_device *netdev ) {
41
-	struct dhcphdr *dhcphdr;
42
-	typeof ( dhcphdr->chaddr ) chaddr;
43
-	unsigned int hlen;
44
-	unsigned int i;
45
 	int rc;
41
 	int rc;
46
 
42
 
47
 	/* Check we can open the interface first */
43
 	/* Check we can open the interface first */
53
 		return rc;
49
 		return rc;
54
 
50
 
55
 	/* Perform DHCP */
51
 	/* Perform DHCP */
56
-	printf ( "DHCP (%s", netdev->name );
52
+	printf ( "DHCP (%s %s)", netdev->name,
57
-	hlen = dhcp_chaddr ( netdev, chaddr, NULL );
53
+		 netdev->ll_protocol->ntoa ( netdev->ll_addr ) );
58
-	for ( i = 0 ; i < hlen ; i++ )
59
-		printf ( "%c%02x", ( i ? ':' : ' ' ), chaddr[i] );
60
-	printf ( ")" );
61
-
62
 	if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 ) {
54
 	if ( ( rc = start_dhcp ( &monojob, netdev ) ) == 0 ) {
63
 		rc = monojob_wait ( "" );
55
 		rc = monojob_wait ( "" );
64
 	} else if ( rc > 0 ) {
56
 	} else if ( rc > 0 ) {

Loading…
Cancel
Save