Browse Source

[xen] Use version 1 grant tables by default

Using version 1 grant tables limits guests to using 16TB of grantable
RAM, and prevents the use of subpage grants.  Some versions of the Xen
hypervisor refuse to allow the grant table version to be set after the
first grant references have been created, so the loaded operating
system may be stuck with whatever choice we make here.  We therefore
currently use version 2 grant tables, since they give the most
flexibility to the loaded OS.

Current versions (7.2.0) of the Windows PV drivers have no support for
version 2 grant tables, and will merrily create version 1 entries in
what the hypervisor believes to be a version 2 table.  This causes
some confusion.

Avoid this problem by attempting to use version 1 tables, since
otherwise we may render Windows unable to boot.

Play nicely with other potential bootloaders by accepting either
version 1 or version 2 grant tables (if we are unable to set our
requested version).

Note that the use of version 1 tables on a 64-bit system introduces a
possible failure path in which a frame number cannot fit into the
32-bit field within the v1 structure.  This in turn introduces
additional failure paths into netfront_transmit() and
netfront_refill_rx().

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 10 years ago
parent
commit
be79ca535a

+ 23
- 52
src/arch/x86/drivers/xen/hvm.c View File

145
  *
145
  *
146
  * @v hvm		HVM device
146
  * @v hvm		HVM device
147
  * @v space		Source mapping space
147
  * @v space		Source mapping space
148
- * @v pages		Number of pages
148
+ * @v len		Length (must be a multiple of PAGE_SIZE)
149
  * @ret mmio		MMIO space address, or NULL on error
149
  * @ret mmio		MMIO space address, or NULL on error
150
  */
150
  */
151
 static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
151
 static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
152
-			    unsigned int pages ) {
152
+			    size_t len ) {
153
 	struct xen_add_to_physmap add;
153
 	struct xen_add_to_physmap add;
154
 	struct xen_remove_from_physmap remove;
154
 	struct xen_remove_from_physmap remove;
155
+	unsigned int pages = ( len / PAGE_SIZE );
155
 	physaddr_t mmio_phys;
156
 	physaddr_t mmio_phys;
156
 	unsigned int i;
157
 	unsigned int i;
157
-	size_t len;
158
 	void *mmio;
158
 	void *mmio;
159
 	int xenrc;
159
 	int xenrc;
160
 	int rc;
160
 	int rc;
161
 
161
 
162
+	/* Sanity check */
163
+	assert ( ( len % PAGE_SIZE ) == 0 );
164
+
162
 	/* Check for available space */
165
 	/* Check for available space */
163
-	len = ( pages * PAGE_SIZE );
164
 	if ( ( hvm->mmio_offset + len ) > hvm->mmio_len ) {
166
 	if ( ( hvm->mmio_offset + len ) > hvm->mmio_len ) {
165
 		DBGC ( hvm, "HVM could not allocate %zd bytes of MMIO space "
167
 		DBGC ( hvm, "HVM could not allocate %zd bytes of MMIO space "
166
 		       "(%zd of %zd remaining)\n", len,
168
 		       "(%zd of %zd remaining)\n", len,
218
  *
220
  *
219
  * @v hvm		HVM device
221
  * @v hvm		HVM device
220
  * @v mmio		MMIO space address
222
  * @v mmio		MMIO space address
221
- * @v pages		Number of pages
223
+ * @v len		Length (must be a multiple of PAGE_SIZE)
222
  */
224
  */
223
-static void hvm_iounmap ( struct hvm_device *hvm, void *mmio,
224
-			  unsigned int pages ) {
225
+static void hvm_iounmap ( struct hvm_device *hvm, void *mmio, size_t len ) {
225
 	struct xen_remove_from_physmap remove;
226
 	struct xen_remove_from_physmap remove;
226
 	physaddr_t mmio_phys = virt_to_phys ( mmio );
227
 	physaddr_t mmio_phys = virt_to_phys ( mmio );
228
+	unsigned int pages = ( len / PAGE_SIZE );
227
 	unsigned int i;
229
 	unsigned int i;
228
 	int xenrc;
230
 	int xenrc;
229
 	int rc;
231
 	int rc;
258
 	int rc;
260
 	int rc;
259
 
261
 
260
 	/* Map shared info page */
262
 	/* Map shared info page */
261
-	hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info, 1 );
263
+	hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info,
264
+					PAGE_SIZE );
262
 	if ( ! hvm->xen.shared ) {
265
 	if ( ! hvm->xen.shared ) {
263
 		rc = -ENOMEM;
266
 		rc = -ENOMEM;
264
 		goto err_alloc;
267
 		goto err_alloc;
273
 
276
 
274
 	return 0;
277
 	return 0;
275
 
278
 
276
-	hvm_iounmap ( hvm, hvm->xen.shared, 1 );
279
+	hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
277
  err_alloc:
280
  err_alloc:
278
 	return rc;
281
 	return rc;
279
 }
282
 }
286
 static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
289
 static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
287
 
290
 
288
 	/* Unmap shared info page */
291
 	/* Unmap shared info page */
289
-	hvm_iounmap ( hvm, hvm->xen.shared, 1 );
292
+	hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
290
 }
293
 }
291
 
294
 
292
 /**
295
 /**
296
  * @ret rc		Return status code
299
  * @ret rc		Return status code
297
  */
300
  */
298
 static int hvm_map_grant ( struct hvm_device *hvm ) {
301
 static int hvm_map_grant ( struct hvm_device *hvm ) {
299
-	struct gnttab_query_size size;
300
-	struct gnttab_set_version version;
301
 	physaddr_t grant_phys;
302
 	physaddr_t grant_phys;
302
-	size_t len;
303
-	int xenrc;
304
 	int rc;
303
 	int rc;
305
 
304
 
306
-	/* Get grant table size */
307
-	size.dom = DOMID_SELF;
308
-	if ( ( xenrc = xengrant_query_size ( &hvm->xen, &size ) ) != 0 ) {
309
-		rc = -EXEN ( xenrc );
310
-		DBGC ( hvm, "HVM could not get grant table size: %s\n",
305
+	/* Initialise grant table */
306
+	if ( ( rc = xengrant_init ( &hvm->xen ) ) != 0 ) {
307
+		DBGC ( hvm, "HVM could not initialise grant table: %s\n",
311
 		       strerror ( rc ) );
308
 		       strerror ( rc ) );
312
-		goto err_query_size;
313
-	}
314
-	len = ( size.nr_frames * PAGE_SIZE );
315
-
316
-	/* Configure to use version 2 tables */
317
-	version.version = 2;
318
-	if ( ( xenrc = xengrant_set_version ( &hvm->xen, &version ) ) != 0 ) {
319
-		rc = -EXEN ( xenrc );
320
-		DBGC ( hvm, "HVM could not set version 2 grant table: %s\n",
321
-		       strerror ( rc ) );
322
-		goto err_set_version;
323
-	}
324
-	if ( version.version != 2 ) {
325
-		DBGC ( hvm, "HVM could not set version 2 grant table\n" );
326
-		rc = -ENOTTY;
327
-		goto err_set_version;
309
+		return rc;
328
 	}
310
 	}
329
 
311
 
330
 	/* Map grant table */
312
 	/* Map grant table */
331
 	hvm->xen.grant.table = hvm_ioremap ( hvm, XENMAPSPACE_grant_table,
313
 	hvm->xen.grant.table = hvm_ioremap ( hvm, XENMAPSPACE_grant_table,
332
-					     size.nr_frames );
333
-	if ( ! hvm->xen.grant.table ) {
334
-		rc = -ENODEV;
335
-		goto err_ioremap;
336
-	}
314
+					     hvm->xen.grant.len );
315
+	if ( ! hvm->xen.grant.table )
316
+		return -ENODEV;
317
+
337
 	grant_phys = virt_to_phys ( hvm->xen.grant.table );
318
 	grant_phys = virt_to_phys ( hvm->xen.grant.table );
338
 	DBGC2 ( hvm, "HVM mapped grant table at [%08lx,%08lx)\n",
319
 	DBGC2 ( hvm, "HVM mapped grant table at [%08lx,%08lx)\n",
339
-		grant_phys, ( grant_phys + len ) );
340
-	hvm->xen.grant.count = ( len / sizeof ( hvm->xen.grant.table[0] ) );
341
-
320
+		grant_phys, ( grant_phys + hvm->xen.grant.len ) );
342
 	return 0;
321
 	return 0;
343
-
344
-	hvm_iounmap ( hvm, hvm->xen.grant.table, size.nr_frames );
345
- err_ioremap:
346
- err_set_version:
347
- err_query_size:
348
-	return rc;
349
 }
322
 }
350
 
323
 
351
 /**
324
 /**
354
  * @v hvm		HVM device
327
  * @v hvm		HVM device
355
  */
328
  */
356
 static void hvm_unmap_grant ( struct hvm_device *hvm ) {
329
 static void hvm_unmap_grant ( struct hvm_device *hvm ) {
357
-	size_t len;
358
 
330
 
359
 	/* Unmap grant table */
331
 	/* Unmap grant table */
360
-	len = ( hvm->xen.grant.count * sizeof ( hvm->xen.grant.table[0] ) );
361
-	hvm_iounmap ( hvm, hvm->xen.grant.table, ( len / PAGE_SIZE ) );
332
+	hvm_iounmap ( hvm, hvm->xen.grant.table, hvm->xen.grant.len );
362
 }
333
 }
363
 
334
 
364
 /**
335
 /**

+ 62
- 28
src/drivers/net/netfront.c View File

292
 	}
292
 	}
293
 
293
 
294
 	/* Grant access to shared ring */
294
 	/* Grant access to shared ring */
295
-	xengrant_permit_access ( xen, ring->ref, xendev->backend_id, 0,
296
-				 ring->sring.raw );
295
+	if ( ( rc = xengrant_permit_access ( xen, ring->ref, xendev->backend_id,
296
+					     0, ring->sring.raw ) ) != 0 ) {
297
+		DBGC ( netfront, "NETFRONT %s could not permit access to "
298
+		       "%#08lx: %s\n", xendev->key,
299
+		       virt_to_phys ( ring->sring.raw ), strerror ( rc ) );
300
+		goto err_permit_access;
301
+	}
297
 
302
 
298
 	/* Publish shared ring reference */
303
 	/* Publish shared ring reference */
299
 	if ( ( rc = netfront_write_num ( netfront, ring->ref_key,
304
 	if ( ( rc = netfront_write_num ( netfront, ring->ref_key,
309
 	netfront_rm ( netfront, ring->ref_key );
314
 	netfront_rm ( netfront, ring->ref_key );
310
  err_write_num:
315
  err_write_num:
311
 	xengrant_invalidate ( xen, ring->ref );
316
 	xengrant_invalidate ( xen, ring->ref );
317
+ err_permit_access:
312
 	free_dma ( ring->sring.raw, PAGE_SIZE );
318
 	free_dma ( ring->sring.raw, PAGE_SIZE );
313
  err_alloc:
319
  err_alloc:
314
 	return rc;
320
 	return rc;
320
  * @v netfront		Netfront device
326
  * @v netfront		Netfront device
321
  * @v ring		Descriptor ring
327
  * @v ring		Descriptor ring
322
  * @v iobuf		I/O buffer
328
  * @v iobuf		I/O buffer
329
+ * @v id		Buffer ID to fill in
323
  * @v ref		Grant reference to fill in
330
  * @v ref		Grant reference to fill in
324
- * @ret id		Buffer ID
331
+ * @ret rc		Return status code
325
  *
332
  *
326
  * The caller is responsible for ensuring that there is space in the
333
  * The caller is responsible for ensuring that there is space in the
327
  * ring.
334
  * ring.
328
  */
335
  */
329
-static unsigned int netfront_push ( struct netfront_nic *netfront,
330
-				    struct netfront_ring *ring,
331
-				    struct io_buffer *iobuf,
332
-				    grant_ref_t *ref ) {
336
+static int netfront_push ( struct netfront_nic *netfront,
337
+			   struct netfront_ring *ring, struct io_buffer *iobuf,
338
+			   uint16_t *id, grant_ref_t *ref ) {
333
 	struct xen_device *xendev = netfront->xendev;
339
 	struct xen_device *xendev = netfront->xendev;
334
 	struct xen_hypervisor *xen = xendev->xen;
340
 	struct xen_hypervisor *xen = xendev->xen;
335
-	unsigned int id;
341
+	unsigned int next_id;
342
+	unsigned int next_ref;
343
+	int rc;
336
 
344
 
337
 	/* Sanity check */
345
 	/* Sanity check */
338
 	assert ( ! netfront_ring_is_full ( ring ) );
346
 	assert ( ! netfront_ring_is_full ( ring ) );
339
 
347
 
340
 	/* Allocate buffer ID */
348
 	/* Allocate buffer ID */
341
-	id = ring->ids[ ( ring->id_prod++ ) & ( ring->count - 1 ) ];
342
-
343
-	/* Store I/O buffer */
344
-	assert ( ring->iobufs[id] == NULL );
345
-	ring->iobufs[id] = iobuf;
349
+	next_id = ring->ids[ ring->id_prod & ( ring->count - 1 ) ];
350
+	next_ref = ring->refs[next_id];
346
 
351
 
347
 	/* Grant access to I/O buffer page.  I/O buffers are naturally
352
 	/* Grant access to I/O buffer page.  I/O buffers are naturally
348
 	 * aligned, so we never need to worry about crossing a page
353
 	 * aligned, so we never need to worry about crossing a page
349
 	 * boundary.
354
 	 * boundary.
350
 	 */
355
 	 */
351
-	*ref = ring->refs[id];
352
-	xengrant_permit_access ( xen, ring->refs[id], xendev->backend_id, 0,
353
-				 iobuf->data );
356
+	if ( ( rc = xengrant_permit_access ( xen, next_ref, xendev->backend_id,
357
+					     0, iobuf->data ) ) != 0 ) {
358
+		DBGC ( netfront, "NETFRONT %s could not permit access to "
359
+		       "%#08lx: %s\n", xendev->key,
360
+		       virt_to_phys ( iobuf->data ), strerror ( rc ) );
361
+		return rc;
362
+	}
363
+
364
+	/* Store I/O buffer */
365
+	assert ( ring->iobufs[next_id] == NULL );
366
+	ring->iobufs[next_id] = iobuf;
354
 
367
 
355
-	return id;
368
+	/* Consume buffer ID */
369
+	ring->id_prod++;
370
+
371
+	/* Return buffer ID and grant reference */
372
+	*id = next_id;
373
+	*ref = next_ref;
374
+
375
+	return 0;
356
 }
376
 }
357
 
377
 
358
 /**
378
 /**
431
 /**
451
 /**
432
  * Refill receive descriptor ring
452
  * Refill receive descriptor ring
433
  *
453
  *
434
- * @v netfront		Netfront device
454
+ * @v netdev		Network device
435
  */
455
  */
436
-static void netfront_refill_rx ( struct netfront_nic *netfront ) {
456
+static void netfront_refill_rx ( struct net_device *netdev ) {
457
+	struct netfront_nic *netfront = netdev->priv;
437
 	struct xen_device *xendev = netfront->xendev;
458
 	struct xen_device *xendev = netfront->xendev;
438
 	struct io_buffer *iobuf;
459
 	struct io_buffer *iobuf;
439
 	struct netif_rx_request *request;
460
 	struct netif_rx_request *request;
440
 	int notify;
461
 	int notify;
462
+	int rc;
441
 
463
 
442
 	/* Do nothing if ring is already full */
464
 	/* Do nothing if ring is already full */
443
 	if ( netfront_ring_is_full ( &netfront->rx ) )
465
 	if ( netfront_ring_is_full ( &netfront->rx ) )
455
 
477
 
456
 		/* Add to descriptor ring */
478
 		/* Add to descriptor ring */
457
 		request = RING_GET_REQUEST ( &netfront->rx_fring,
479
 		request = RING_GET_REQUEST ( &netfront->rx_fring,
458
-					     netfront->rx_fring.req_prod_pvt++);
459
-		request->id = netfront_push ( netfront, &netfront->rx, iobuf,
460
-					      &request->gref );
480
+					     netfront->rx_fring.req_prod_pvt );
481
+		if ( ( rc = netfront_push ( netfront, &netfront->rx,
482
+					    iobuf, &request->id,
483
+					    &request->gref ) ) != 0 ) {
484
+			netdev_rx_err ( netdev, iobuf, rc );
485
+			break;
486
+		}
461
 		DBGC2 ( netfront, "NETFRONT %s RX id %d ref %d is %#08lx+%zx\n",
487
 		DBGC2 ( netfront, "NETFRONT %s RX id %d ref %d is %#08lx+%zx\n",
462
 			xendev->key, request->id, request->gref,
488
 			xendev->key, request->id, request->gref,
463
 			virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ) );
489
 			virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ) );
464
 
490
 
491
+		/* Move to next descriptor */
492
+		netfront->rx_fring.req_prod_pvt++;
493
+
465
 	} while ( ! netfront_ring_is_full ( &netfront->rx ) );
494
 	} while ( ! netfront_ring_is_full ( &netfront->rx ) );
466
 
495
 
467
 	/* Push new descriptors and notify backend if applicable */
496
 	/* Push new descriptors and notify backend if applicable */
526
 	}
555
 	}
527
 
556
 
528
 	/* Refill receive descriptor ring */
557
 	/* Refill receive descriptor ring */
529
-	netfront_refill_rx ( netfront );
558
+	netfront_refill_rx ( netdev );
530
 
559
 
531
 	/* Set link up */
560
 	/* Set link up */
532
 	netdev_link_up ( netdev );
561
 	netdev_link_up ( netdev );
614
 	struct xen_device *xendev = netfront->xendev;
643
 	struct xen_device *xendev = netfront->xendev;
615
 	struct netif_tx_request *request;
644
 	struct netif_tx_request *request;
616
 	int notify;
645
 	int notify;
646
+	int rc;
617
 
647
 
618
 	/* Check that we have space in the ring */
648
 	/* Check that we have space in the ring */
619
 	if ( netfront_ring_is_full ( &netfront->tx ) ) {
649
 	if ( netfront_ring_is_full ( &netfront->tx ) ) {
624
 
654
 
625
 	/* Add to descriptor ring */
655
 	/* Add to descriptor ring */
626
 	request = RING_GET_REQUEST ( &netfront->tx_fring,
656
 	request = RING_GET_REQUEST ( &netfront->tx_fring,
627
-				     netfront->tx_fring.req_prod_pvt++ );
628
-	request->id = netfront_push ( netfront, &netfront->tx, iobuf,
629
-				      &request->gref );
657
+				     netfront->tx_fring.req_prod_pvt );
658
+	if ( ( rc = netfront_push ( netfront, &netfront->tx, iobuf,
659
+				    &request->id, &request->gref ) ) != 0 ) {
660
+		return rc;
661
+	}
630
 	request->offset = ( virt_to_phys ( iobuf->data ) & ( PAGE_SIZE - 1 ) );
662
 	request->offset = ( virt_to_phys ( iobuf->data ) & ( PAGE_SIZE - 1 ) );
631
 	request->flags = NETTXF_data_validated;
663
 	request->flags = NETTXF_data_validated;
632
 	request->size = iob_len ( iobuf );
664
 	request->size = iob_len ( iobuf );
634
 		xendev->key, request->id, request->gref,
666
 		xendev->key, request->id, request->gref,
635
 		virt_to_phys ( iobuf->data ), iob_len ( iobuf ) );
667
 		virt_to_phys ( iobuf->data ), iob_len ( iobuf ) );
636
 
668
 
669
+	/* Consume descriptor */
670
+	netfront->tx_fring.req_prod_pvt++;
671
+
637
 	/* Push new descriptor and notify backend if applicable */
672
 	/* Push new descriptor and notify backend if applicable */
638
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( &netfront->tx_fring, notify );
673
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( &netfront->tx_fring, notify );
639
 	if ( notify )
674
 	if ( notify )
727
  * @v netdev		Network device
762
  * @v netdev		Network device
728
  */
763
  */
729
 static void netfront_poll ( struct net_device *netdev ) {
764
 static void netfront_poll ( struct net_device *netdev ) {
730
-	struct netfront_nic *netfront = netdev->priv;
731
 
765
 
732
 	/* Poll for TX completions */
766
 	/* Poll for TX completions */
733
 	netfront_poll_tx ( netdev );
767
 	netfront_poll_tx ( netdev );
736
 	netfront_poll_rx ( netdev );
770
 	netfront_poll_rx ( netdev );
737
 
771
 
738
 	/* Refill RX descriptor ring */
772
 	/* Refill RX descriptor ring */
739
-	netfront_refill_rx ( netfront );
773
+	netfront_refill_rx ( netdev );
740
 }
774
 }
741
 
775
 
742
 /** Network device operations */
776
 /** Network device operations */

+ 5
- 3
src/include/ipxe/xen.h View File

27
 /** A Xen grant table */
27
 /** A Xen grant table */
28
 struct xen_grant {
28
 struct xen_grant {
29
 	/** Grant table entries */
29
 	/** Grant table entries */
30
-	union grant_entry_v2 *table;
31
-	/** Number of grant table entries (must be a power of two) */
32
-	unsigned int count;
30
+	struct grant_entry_v1 *table;
31
+	/** Total grant table length */
32
+	size_t len;
33
+	/** Entry size shift (for later version tables) */
34
+	unsigned int shift;
33
 	/** Number of grant table entries in use */
35
 	/** Number of grant table entries in use */
34
 	unsigned int used;
36
 	unsigned int used;
35
 	/** Most recently used grant reference */
37
 	/** Most recently used grant reference */

+ 139
- 9
src/include/ipxe/xengrant.h View File

10
 FILE_LICENCE ( GPL2_OR_LATER );
10
 FILE_LICENCE ( GPL2_OR_LATER );
11
 
11
 
12
 #include <stdint.h>
12
 #include <stdint.h>
13
+#include <stdlib.h>
13
 #include <ipxe/io.h>
14
 #include <ipxe/io.h>
14
 #include <ipxe/xen.h>
15
 #include <ipxe/xen.h>
15
 #include <xen/grant_table.h>
16
 #include <xen/grant_table.h>
16
 
17
 
18
+/** Induced failure rate (for testing) */
19
+#define XENGRANT_FAIL_RATE 0
20
+
17
 /**
21
 /**
18
  * Query grant table size
22
  * Query grant table size
19
  *
23
  *
46
 				 virt_to_phys ( version ), 1 );
50
 				 virt_to_phys ( version ), 1 );
47
 }
51
 }
48
 
52
 
53
+/**
54
+ * Get grant table version
55
+ *
56
+ * @v xen		Xen hypervisor
57
+ * @v version		Version
58
+ * @ret xenrc		Xen status code
59
+ */
60
+static inline __attribute__ (( always_inline )) int
61
+xengrant_get_version ( struct xen_hypervisor *xen,
62
+		       struct gnttab_get_version *version ) {
63
+
64
+	return xen_hypercall_3 ( xen, __HYPERVISOR_grant_table_op,
65
+				 GNTTABOP_get_version,
66
+				 virt_to_phys ( version ), 1 );
67
+}
68
+
69
+/**
70
+ * Get number of grant table entries
71
+ *
72
+ * @v xen		Xen hypervisor
73
+ * @ret entries		Number of grant table entries
74
+ */
75
+static inline __attribute__ (( always_inline )) unsigned int
76
+xengrant_entries ( struct xen_hypervisor *xen ) {
77
+
78
+	return ( ( xen->grant.len / sizeof ( xen->grant.table[0] ) )
79
+		 >> xen->grant.shift );
80
+}
81
+
82
+/**
83
+ * Get grant table entry header
84
+ *
85
+ * @v xen		Xen hypervisor
86
+ * @v ref		Grant reference
87
+ * @ret hdr		Grant table entry header
88
+ */
89
+static inline __attribute__ (( always_inline )) struct grant_entry_header *
90
+xengrant_header ( struct xen_hypervisor *xen, grant_ref_t ref ) {
91
+	struct grant_entry_v1 *v1;
92
+
93
+	v1 = &xen->grant.table[ ref << xen->grant.shift ];
94
+	return ( container_of ( &v1->flags, struct grant_entry_header, flags ));
95
+}
96
+
97
+/**
98
+ * Get version 1 grant table entry
99
+ *
100
+ * @v hdr		Grant table entry header
101
+ * @ret v1		Version 1 grant table entry
102
+ */
103
+static inline __attribute__ (( always_inline )) struct grant_entry_v1 *
104
+xengrant_v1 ( struct grant_entry_header *hdr ) {
105
+
106
+	return ( container_of ( &hdr->flags, struct grant_entry_v1, flags ) );
107
+}
108
+
109
+/**
110
+ * Get version 2 grant table entry
111
+ *
112
+ * @v hdr		Grant table entry header
113
+ * @ret v2		Version 2 grant table entry
114
+ */
115
+static inline __attribute__ (( always_inline )) union grant_entry_v2 *
116
+xengrant_v2 ( struct grant_entry_header *hdr ) {
117
+
118
+	return ( container_of ( &hdr->flags, union grant_entry_v2, hdr.flags ));
119
+}
120
+
121
+/**
122
+ * Zero grant table entry
123
+ *
124
+ * @v xen		Xen hypervisor
125
+ * @v hdr		Grant table entry header
126
+ */
127
+static inline void xengrant_zero ( struct xen_hypervisor *xen,
128
+				   struct grant_entry_header *hdr ) {
129
+	uint32_t *dword = ( ( uint32_t * ) hdr );
130
+	unsigned int i = ( ( sizeof ( xen->grant.table[0] ) / sizeof ( *dword ))
131
+			   << xen->grant.shift );
132
+
133
+	while ( i-- )
134
+		writel ( 0, dword++ );
135
+}
136
+
49
 /**
137
 /**
50
  * Invalidate access to a page
138
  * Invalidate access to a page
51
  *
139
  *
54
  */
142
  */
55
 static inline __attribute__ (( always_inline )) void
143
 static inline __attribute__ (( always_inline )) void
56
 xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
144
 xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
57
-	union grant_entry_v2 *entry = &xen->grant.table[ref];
145
+	struct grant_entry_header *hdr = xengrant_header ( xen, ref );
58
 
146
 
59
 	/* Sanity check */
147
 	/* Sanity check */
60
-	assert ( ( readw ( &entry->hdr.flags ) &
148
+	assert ( ( readw ( &hdr->flags ) &
61
 		   ( GTF_reading | GTF_writing ) ) == 0 );
149
 		   ( GTF_reading | GTF_writing ) ) == 0 );
62
 
150
 
63
 	/* This should apparently be done using a cmpxchg instruction.
151
 	/* This should apparently be done using a cmpxchg instruction.
65
 	 * mainly since our control flow generally does not permit
153
 	 * mainly since our control flow generally does not permit
66
 	 * failure paths to themselves fail.
154
 	 * failure paths to themselves fail.
67
 	 */
155
 	 */
68
-	writew ( 0, &entry->hdr.flags );
156
+	writew ( 0, &hdr->flags );
157
+
158
+	/* Leave reference marked as in-use (see xengrant_alloc()) */
159
+	writew ( DOMID_SELF, &hdr->domid );
69
 }
160
 }
70
 
161
 
71
 /**
162
 /**
76
  * @v domid		Domain ID
167
  * @v domid		Domain ID
77
  * @v subflags		Additional flags
168
  * @v subflags		Additional flags
78
  * @v page		Page start
169
  * @v page		Page start
170
+ * @ret rc		Return status code
79
  */
171
  */
80
-static inline __attribute__ (( always_inline )) void
172
+static inline __attribute__ (( always_inline )) int
81
 xengrant_permit_access ( struct xen_hypervisor *xen, grant_ref_t ref,
173
 xengrant_permit_access ( struct xen_hypervisor *xen, grant_ref_t ref,
82
 			 domid_t domid, unsigned int subflags, void *page ) {
174
 			 domid_t domid, unsigned int subflags, void *page ) {
83
-	union grant_entry_v2 *entry = &xen->grant.table[ref];
175
+	struct grant_entry_header *hdr = xengrant_header ( xen, ref );
176
+	struct grant_entry_v1 *v1 = xengrant_v1 ( hdr );
177
+	union grant_entry_v2 *v2 = xengrant_v2 ( hdr );
84
 	unsigned long frame = ( virt_to_phys ( page ) / PAGE_SIZE );
178
 	unsigned long frame = ( virt_to_phys ( page ) / PAGE_SIZE );
85
 
179
 
86
-	writew ( domid, &entry->full_page.hdr.domid );
180
+	/* Fail (for test purposes) if applicable */
181
+	if ( ( XENGRANT_FAIL_RATE > 0 ) &&
182
+	     ( random() % XENGRANT_FAIL_RATE ) == 0 ) {
183
+		return -EAGAIN;
184
+	}
185
+
186
+	/* Record frame number.  This may fail on a 64-bit system if
187
+	 * we are using v1 grant tables.  On a 32-bit system, there is
188
+	 * no way for this code path to fail (with either v1 or v2
189
+	 * grant tables); we allow the compiler to optimise the
190
+	 * failure paths away to save space.
191
+	 */
87
 	if ( sizeof ( physaddr_t ) == sizeof ( uint64_t ) ) {
192
 	if ( sizeof ( physaddr_t ) == sizeof ( uint64_t ) ) {
88
-		writeq ( frame, &entry->full_page.frame );
193
+
194
+		/* 64-bit system */
195
+		if ( xen->grant.shift ) {
196
+			/* Version 2 table: no possible failure */
197
+			writeq ( frame, &v2->full_page.frame );
198
+		} else {
199
+			/* Version 1 table: may fail if address above 16TB */
200
+			if ( frame > 0xffffffffUL )
201
+				return -ERANGE;
202
+			writel ( frame, &v1->frame );
203
+		}
204
+
89
 	} else {
205
 	} else {
90
-		writel ( frame, &entry->full_page.frame );
206
+
207
+		/* 32-bit system */
208
+		if ( xen->grant.shift ) {
209
+			/* Version 2 table: no possible failure */
210
+			writel ( frame, &v2->full_page.frame );
211
+		} else {
212
+			/* Version 1 table: no possible failure */
213
+			writel ( frame, &v1->frame );
214
+		}
91
 	}
215
 	}
216
+
217
+	/* Record domain ID and flags */
218
+	writew ( domid, &hdr->domid );
92
 	wmb();
219
 	wmb();
93
-	writew ( ( GTF_permit_access | subflags ), &entry->full_page.hdr.flags);
220
+	writew ( ( GTF_permit_access | subflags ), &hdr->flags );
94
 	wmb();
221
 	wmb();
222
+
223
+	return 0;
95
 }
224
 }
96
 
225
 
226
+extern int xengrant_init ( struct xen_hypervisor *xen );
97
 extern int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
227
 extern int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
98
 			    unsigned int count );
228
 			    unsigned int count );
99
 extern void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
229
 extern void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,

+ 120
- 17
src/interface/xen/xengrant.c View File

20
 FILE_LICENCE ( GPL2_OR_LATER );
20
 FILE_LICENCE ( GPL2_OR_LATER );
21
 
21
 
22
 #include <stdint.h>
22
 #include <stdint.h>
23
+#include <strings.h>
23
 #include <errno.h>
24
 #include <errno.h>
24
 #include <assert.h>
25
 #include <assert.h>
25
 #include <ipxe/io.h>
26
 #include <ipxe/io.h>
32
  *
33
  *
33
  */
34
  */
34
 
35
 
36
+/** Grant table version to try setting
37
+ *
38
+ * Using version 1 grant tables limits guests to using 16TB of
39
+ * grantable RAM, and prevents the use of subpage grants.  Some
40
+ * versions of the Xen hypervisor refuse to allow the grant table
41
+ * version to be set after the first grant references have been
42
+ * created, so the loaded operating system may be stuck with whatever
43
+ * choice we make here.  We therefore currently use version 2 grant
44
+ * tables, since they give the most flexibility to the loaded OS.
45
+ *
46
+ * Current versions (7.2.0) of the Windows PV drivers have no support
47
+ * for version 2 grant tables, and will merrily create version 1
48
+ * entries in what the hypervisor believes to be a version 2 table.
49
+ * This causes some confusion.
50
+ *
51
+ * Avoid this problem by attempting to use version 1 tables, since
52
+ * otherwise we may render Windows unable to boot.
53
+ *
54
+ * Play nicely with other potential bootloaders by accepting either
55
+ * version 1 or version 2 grant tables (if we are unable to set our
56
+ * requested version).
57
+ */
58
+#define XENGRANT_TRY_VERSION 1
59
+
60
+/**
61
+ * Initialise grant table
62
+ *
63
+ * @v xen		Xen hypervisor
64
+ * @ret rc		Return status code
65
+ */
66
+int xengrant_init ( struct xen_hypervisor *xen ) {
67
+	struct gnttab_query_size size;
68
+	struct gnttab_set_version set_version;
69
+	struct gnttab_get_version get_version;
70
+	struct grant_entry_v1 *v1;
71
+	union grant_entry_v2 *v2;
72
+	unsigned int version;
73
+	int xenrc;
74
+	int rc;
75
+
76
+	/* Get grant table size */
77
+	size.dom = DOMID_SELF;
78
+	if ( ( xenrc = xengrant_query_size ( xen, &size ) ) != 0 ) {
79
+		rc = -EXEN ( xenrc );
80
+		DBGC ( xen, "XENGRANT could not get table size: %s\n",
81
+		       strerror ( rc ) );
82
+		return rc;
83
+	}
84
+	xen->grant.len = ( size.nr_frames * PAGE_SIZE );
85
+
86
+	/* Set grant table version, if applicable */
87
+	set_version.version = XENGRANT_TRY_VERSION;
88
+	if ( ( xenrc = xengrant_set_version ( xen, &set_version ) ) != 0 ) {
89
+		rc = -EXEN ( xenrc );
90
+		DBGC ( xen, "XENGRANT could not set version %d: %s\n",
91
+		       XENGRANT_TRY_VERSION, strerror ( rc ) );
92
+		/* Continue; use whatever version is current */
93
+	}
94
+
95
+	/* Get grant table version */
96
+	get_version.dom = DOMID_SELF;
97
+	get_version.pad = 0;
98
+	if ( ( xenrc = xengrant_get_version ( xen, &get_version ) ) == 0 ) {
99
+		version = get_version.version;
100
+		switch ( version ) {
101
+
102
+		case 0:
103
+			/* Version not yet specified: will be version 1 */
104
+			version = 1;
105
+			break;
106
+
107
+		case 1 :
108
+			/* Version 1 table: nothing special to do */
109
+			break;
110
+
111
+		case 2:
112
+			/* Version 2 table: configure shift appropriately */
113
+			xen->grant.shift = ( fls ( sizeof ( *v2 ) /
114
+						   sizeof ( *v1 ) ) - 1 );
115
+			break;
116
+
117
+		default:
118
+			/* Unsupported version */
119
+			DBGC ( xen, "XENGRANT detected unsupported version "
120
+			       "%d\n", version );
121
+			return -ENOTSUP;
122
+
123
+		}
124
+	} else {
125
+		rc = -EXEN ( xenrc );
126
+		DBGC ( xen, "XENGRANT could not get version (assuming v1): "
127
+		       "%s\n", strerror ( rc ) );
128
+		version = 1;
129
+	}
130
+
131
+	DBGC ( xen, "XENGRANT using v%d table with %d entries\n",
132
+	       version, xengrant_entries ( xen ) );
133
+	return 0;
134
+}
135
+
35
 /**
136
 /**
36
  * Allocate grant references
137
  * Allocate grant references
37
  *
138
  *
42
  */
143
  */
43
 int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
144
 int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
44
 		     unsigned int count ) {
145
 		     unsigned int count ) {
45
-	union grant_entry_v2 *entry;
46
-	unsigned int mask = ( xen->grant.count - 1 );
146
+	struct grant_entry_header *hdr;
147
+	unsigned int entries = xengrant_entries ( xen );
148
+	unsigned int mask = ( entries - 1 );
47
 	unsigned int check = 0;
149
 	unsigned int check = 0;
48
 	unsigned int avail;
150
 	unsigned int avail;
49
 	unsigned int ref;
151
 	unsigned int ref;
50
 
152
 
51
 	/* Fail unless we have enough references available */
153
 	/* Fail unless we have enough references available */
52
-	avail = ( xen->grant.count - xen->grant.used -
53
-		  GNTTAB_NR_RESERVED_ENTRIES );
154
+	avail = ( entries - xen->grant.used - GNTTAB_NR_RESERVED_ENTRIES );
54
 	if ( avail < count ) {
155
 	if ( avail < count ) {
55
 		DBGC ( xen, "XENGRANT cannot allocate %d references (only %d "
156
 		DBGC ( xen, "XENGRANT cannot allocate %d references (only %d "
56
-		       "of %d available)\n", count, avail, xen->grant.count );
157
+		       "of %d available)\n", count, avail, entries );
57
 		return -ENOBUFS;
158
 		return -ENOBUFS;
58
 	}
159
 	}
59
 	DBGC ( xen, "XENGRANT allocating %d references (from %d of %d "
160
 	DBGC ( xen, "XENGRANT allocating %d references (from %d of %d "
60
-	       "available)\n", count, avail, xen->grant.count );
161
+	       "available)\n", count, avail, entries );
61
 
162
 
62
 	/* Update number of references used */
163
 	/* Update number of references used */
63
 	xen->grant.used += count;
164
 	xen->grant.used += count;
66
 	for ( ref = xen->grant.ref ; count ; ref = ( ( ref + 1 ) & mask ) ) {
167
 	for ( ref = xen->grant.ref ; count ; ref = ( ( ref + 1 ) & mask ) ) {
67
 
168
 
68
 		/* Sanity check */
169
 		/* Sanity check */
69
-		assert ( check++ < xen->grant.count );
170
+		assert ( check++ < entries );
70
 
171
 
71
 		/* Skip reserved references */
172
 		/* Skip reserved references */
72
 		if ( ref < GNTTAB_NR_RESERVED_ENTRIES )
173
 		if ( ref < GNTTAB_NR_RESERVED_ENTRIES )
73
 			continue;
174
 			continue;
74
 
175
 
75
 		/* Skip in-use references */
176
 		/* Skip in-use references */
76
-		entry = &xen->grant.table[ref];
77
-		if ( readw ( &entry->hdr.flags ) & GTF_type_mask )
177
+		hdr = xengrant_header ( xen, ref );
178
+		if ( readw ( &hdr->flags ) & GTF_type_mask )
78
 			continue;
179
 			continue;
79
-		if ( readw ( &entry->hdr.domid ) == DOMID_SELF )
180
+		if ( readw ( &hdr->domid ) == DOMID_SELF )
80
 			continue;
181
 			continue;
81
 
182
 
183
+		/* Zero reference */
184
+		xengrant_zero ( xen, hdr );
185
+
82
 		/* Mark reference as in-use.  We leave the flags as
186
 		/* Mark reference as in-use.  We leave the flags as
83
 		 * empty (to avoid creating a valid grant table entry)
187
 		 * empty (to avoid creating a valid grant table entry)
84
 		 * and set the domid to DOMID_SELF.
188
 		 * and set the domid to DOMID_SELF.
85
 		 */
189
 		 */
86
-		writew ( DOMID_SELF, &entry->hdr.domid );
190
+		writew ( DOMID_SELF, &hdr->domid );
87
 		DBGC2 ( xen, "XENGRANT allocated ref %d\n", ref );
191
 		DBGC2 ( xen, "XENGRANT allocated ref %d\n", ref );
88
 
192
 
89
 		/* Record reference */
193
 		/* Record reference */
105
  */
209
  */
106
 void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
210
 void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
107
 		     unsigned int count ) {
211
 		     unsigned int count ) {
108
-	union grant_entry_v2 *entry;
212
+	struct grant_entry_header *hdr;
109
 	unsigned int ref;
213
 	unsigned int ref;
110
 	unsigned int i;
214
 	unsigned int i;
111
 
215
 
114
 
218
 
115
 		/* Sanity check */
219
 		/* Sanity check */
116
 		ref = refs[i];
220
 		ref = refs[i];
117
-		assert ( ref < xen->grant.count );
221
+		assert ( ref < xengrant_entries ( xen ) );
118
 
222
 
119
-		/* Mark reference as unused */
120
-		entry = &xen->grant.table[ref];
121
-		writew ( 0, &entry->hdr.flags );
122
-		writew ( 0, &entry->hdr.domid );
223
+		/* Zero reference */
224
+		hdr = xengrant_header ( xen, ref );
225
+		xengrant_zero ( xen, hdr );
123
 		DBGC2 ( xen, "XENGRANT freed ref %d\n", ref );
226
 		DBGC2 ( xen, "XENGRANT freed ref %d\n", ref );
124
 	}
227
 	}
125
 }
228
 }

Loading…
Cancel
Save