浏览代码

[xen] Use version 1 grant tables by default

Using version 1 grant tables limits guests to using 16TB of grantable
RAM, and prevents the use of subpage grants.  Some versions of the Xen
hypervisor refuse to allow the grant table version to be set after the
first grant references have been created, so the loaded operating
system may be stuck with whatever choice we make here.  We therefore
currently use version 2 grant tables, since they give the most
flexibility to the loaded OS.

Current versions (7.2.0) of the Windows PV drivers have no support for
version 2 grant tables, and will merrily create version 1 entries in
what the hypervisor believes to be a version 2 table.  This causes
some confusion.

Avoid this problem by attempting to use version 1 tables, since
otherwise we may render Windows unable to boot.

Play nicely with other potential bootloaders by accepting either
version 1 or version 2 grant tables (if we are unable to set our
requested version).

Note that the use of version 1 tables on a 64-bit system introduces a
possible failure path in which a frame number cannot fit into the
32-bit field within the v1 structure.  This in turn introduces
additional failure paths into netfront_transmit() and
netfront_refill_rx().

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 10 年前
父节点
当前提交
be79ca535a
共有 5 个文件被更改,包括 349 次插入109 次删除
  1. 23
    52
      src/arch/x86/drivers/xen/hvm.c
  2. 62
    28
      src/drivers/net/netfront.c
  3. 5
    3
      src/include/ipxe/xen.h
  4. 139
    9
      src/include/ipxe/xengrant.h
  5. 120
    17
      src/interface/xen/xengrant.c

+ 23
- 52
src/arch/x86/drivers/xen/hvm.c 查看文件

@@ -145,22 +145,24 @@ static void hvm_unmap_hypercall ( struct hvm_device *hvm ) {
145 145
  *
146 146
  * @v hvm		HVM device
147 147
  * @v space		Source mapping space
148
- * @v pages		Number of pages
148
+ * @v len		Length (must be a multiple of PAGE_SIZE)
149 149
  * @ret mmio		MMIO space address, or NULL on error
150 150
  */
151 151
 static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
152
-			    unsigned int pages ) {
152
+			    size_t len ) {
153 153
 	struct xen_add_to_physmap add;
154 154
 	struct xen_remove_from_physmap remove;
155
+	unsigned int pages = ( len / PAGE_SIZE );
155 156
 	physaddr_t mmio_phys;
156 157
 	unsigned int i;
157
-	size_t len;
158 158
 	void *mmio;
159 159
 	int xenrc;
160 160
 	int rc;
161 161
 
162
+	/* Sanity check */
163
+	assert ( ( len % PAGE_SIZE ) == 0 );
164
+
162 165
 	/* Check for available space */
163
-	len = ( pages * PAGE_SIZE );
164 166
 	if ( ( hvm->mmio_offset + len ) > hvm->mmio_len ) {
165 167
 		DBGC ( hvm, "HVM could not allocate %zd bytes of MMIO space "
166 168
 		       "(%zd of %zd remaining)\n", len,
@@ -218,12 +220,12 @@ static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
218 220
  *
219 221
  * @v hvm		HVM device
220 222
  * @v mmio		MMIO space address
221
- * @v pages		Number of pages
223
+ * @v len		Length (must be a multiple of PAGE_SIZE)
222 224
  */
223
-static void hvm_iounmap ( struct hvm_device *hvm, void *mmio,
224
-			  unsigned int pages ) {
225
+static void hvm_iounmap ( struct hvm_device *hvm, void *mmio, size_t len ) {
225 226
 	struct xen_remove_from_physmap remove;
226 227
 	physaddr_t mmio_phys = virt_to_phys ( mmio );
228
+	unsigned int pages = ( len / PAGE_SIZE );
227 229
 	unsigned int i;
228 230
 	int xenrc;
229 231
 	int rc;
@@ -258,7 +260,8 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
258 260
 	int rc;
259 261
 
260 262
 	/* Map shared info page */
261
-	hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info, 1 );
263
+	hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info,
264
+					PAGE_SIZE );
262 265
 	if ( ! hvm->xen.shared ) {
263 266
 		rc = -ENOMEM;
264 267
 		goto err_alloc;
@@ -273,7 +276,7 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
273 276
 
274 277
 	return 0;
275 278
 
276
-	hvm_iounmap ( hvm, hvm->xen.shared, 1 );
279
+	hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
277 280
  err_alloc:
278 281
 	return rc;
279 282
 }
@@ -286,7 +289,7 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
286 289
 static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
287 290
 
288 291
 	/* Unmap shared info page */
289
-	hvm_iounmap ( hvm, hvm->xen.shared, 1 );
292
+	hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
290 293
 }
291 294
 
292 295
 /**
@@ -296,56 +299,26 @@ static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
296 299
  * @ret rc		Return status code
297 300
  */
298 301
 static int hvm_map_grant ( struct hvm_device *hvm ) {
299
-	struct gnttab_query_size size;
300
-	struct gnttab_set_version version;
301 302
 	physaddr_t grant_phys;
302
-	size_t len;
303
-	int xenrc;
304 303
 	int rc;
305 304
 
306
-	/* Get grant table size */
307
-	size.dom = DOMID_SELF;
308
-	if ( ( xenrc = xengrant_query_size ( &hvm->xen, &size ) ) != 0 ) {
309
-		rc = -EXEN ( xenrc );
310
-		DBGC ( hvm, "HVM could not get grant table size: %s\n",
305
+	/* Initialise grant table */
306
+	if ( ( rc = xengrant_init ( &hvm->xen ) ) != 0 ) {
307
+		DBGC ( hvm, "HVM could not initialise grant table: %s\n",
311 308
 		       strerror ( rc ) );
312
-		goto err_query_size;
313
-	}
314
-	len = ( size.nr_frames * PAGE_SIZE );
315
-
316
-	/* Configure to use version 2 tables */
317
-	version.version = 2;
318
-	if ( ( xenrc = xengrant_set_version ( &hvm->xen, &version ) ) != 0 ) {
319
-		rc = -EXEN ( xenrc );
320
-		DBGC ( hvm, "HVM could not set version 2 grant table: %s\n",
321
-		       strerror ( rc ) );
322
-		goto err_set_version;
323
-	}
324
-	if ( version.version != 2 ) {
325
-		DBGC ( hvm, "HVM could not set version 2 grant table\n" );
326
-		rc = -ENOTTY;
327
-		goto err_set_version;
309
+		return rc;
328 310
 	}
329 311
 
330 312
 	/* Map grant table */
331 313
 	hvm->xen.grant.table = hvm_ioremap ( hvm, XENMAPSPACE_grant_table,
332
-					     size.nr_frames );
333
-	if ( ! hvm->xen.grant.table ) {
334
-		rc = -ENODEV;
335
-		goto err_ioremap;
336
-	}
314
+					     hvm->xen.grant.len );
315
+	if ( ! hvm->xen.grant.table )
316
+		return -ENODEV;
317
+
337 318
 	grant_phys = virt_to_phys ( hvm->xen.grant.table );
338 319
 	DBGC2 ( hvm, "HVM mapped grant table at [%08lx,%08lx)\n",
339
-		grant_phys, ( grant_phys + len ) );
340
-	hvm->xen.grant.count = ( len / sizeof ( hvm->xen.grant.table[0] ) );
341
-
320
+		grant_phys, ( grant_phys + hvm->xen.grant.len ) );
342 321
 	return 0;
343
-
344
-	hvm_iounmap ( hvm, hvm->xen.grant.table, size.nr_frames );
345
- err_ioremap:
346
- err_set_version:
347
- err_query_size:
348
-	return rc;
349 322
 }
350 323
 
351 324
 /**
@@ -354,11 +327,9 @@ static int hvm_map_grant ( struct hvm_device *hvm ) {
354 327
  * @v hvm		HVM device
355 328
  */
356 329
 static void hvm_unmap_grant ( struct hvm_device *hvm ) {
357
-	size_t len;
358 330
 
359 331
 	/* Unmap grant table */
360
-	len = ( hvm->xen.grant.count * sizeof ( hvm->xen.grant.table[0] ) );
361
-	hvm_iounmap ( hvm, hvm->xen.grant.table, ( len / PAGE_SIZE ) );
332
+	hvm_iounmap ( hvm, hvm->xen.grant.table, hvm->xen.grant.len );
362 333
 }
363 334
 
364 335
 /**

+ 62
- 28
src/drivers/net/netfront.c 查看文件

@@ -292,8 +292,13 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
292 292
 	}
293 293
 
294 294
 	/* Grant access to shared ring */
295
-	xengrant_permit_access ( xen, ring->ref, xendev->backend_id, 0,
296
-				 ring->sring.raw );
295
+	if ( ( rc = xengrant_permit_access ( xen, ring->ref, xendev->backend_id,
296
+					     0, ring->sring.raw ) ) != 0 ) {
297
+		DBGC ( netfront, "NETFRONT %s could not permit access to "
298
+		       "%#08lx: %s\n", xendev->key,
299
+		       virt_to_phys ( ring->sring.raw ), strerror ( rc ) );
300
+		goto err_permit_access;
301
+	}
297 302
 
298 303
 	/* Publish shared ring reference */
299 304
 	if ( ( rc = netfront_write_num ( netfront, ring->ref_key,
@@ -309,6 +314,7 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
309 314
 	netfront_rm ( netfront, ring->ref_key );
310 315
  err_write_num:
311 316
 	xengrant_invalidate ( xen, ring->ref );
317
+ err_permit_access:
312 318
 	free_dma ( ring->sring.raw, PAGE_SIZE );
313 319
  err_alloc:
314 320
 	return rc;
@@ -320,39 +326,53 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
320 326
  * @v netfront		Netfront device
321 327
  * @v ring		Descriptor ring
322 328
  * @v iobuf		I/O buffer
329
+ * @v id		Buffer ID to fill in
323 330
  * @v ref		Grant reference to fill in
324
- * @ret id		Buffer ID
331
+ * @ret rc		Return status code
325 332
  *
326 333
  * The caller is responsible for ensuring that there is space in the
327 334
  * ring.
328 335
  */
329
-static unsigned int netfront_push ( struct netfront_nic *netfront,
330
-				    struct netfront_ring *ring,
331
-				    struct io_buffer *iobuf,
332
-				    grant_ref_t *ref ) {
336
+static int netfront_push ( struct netfront_nic *netfront,
337
+			   struct netfront_ring *ring, struct io_buffer *iobuf,
338
+			   uint16_t *id, grant_ref_t *ref ) {
333 339
 	struct xen_device *xendev = netfront->xendev;
334 340
 	struct xen_hypervisor *xen = xendev->xen;
335
-	unsigned int id;
341
+	unsigned int next_id;
342
+	unsigned int next_ref;
343
+	int rc;
336 344
 
337 345
 	/* Sanity check */
338 346
 	assert ( ! netfront_ring_is_full ( ring ) );
339 347
 
340 348
 	/* Allocate buffer ID */
341
-	id = ring->ids[ ( ring->id_prod++ ) & ( ring->count - 1 ) ];
342
-
343
-	/* Store I/O buffer */
344
-	assert ( ring->iobufs[id] == NULL );
345
-	ring->iobufs[id] = iobuf;
349
+	next_id = ring->ids[ ring->id_prod & ( ring->count - 1 ) ];
350
+	next_ref = ring->refs[next_id];
346 351
 
347 352
 	/* Grant access to I/O buffer page.  I/O buffers are naturally
348 353
 	 * aligned, so we never need to worry about crossing a page
349 354
 	 * boundary.
350 355
 	 */
351
-	*ref = ring->refs[id];
352
-	xengrant_permit_access ( xen, ring->refs[id], xendev->backend_id, 0,
353
-				 iobuf->data );
356
+	if ( ( rc = xengrant_permit_access ( xen, next_ref, xendev->backend_id,
357
+					     0, iobuf->data ) ) != 0 ) {
358
+		DBGC ( netfront, "NETFRONT %s could not permit access to "
359
+		       "%#08lx: %s\n", xendev->key,
360
+		       virt_to_phys ( iobuf->data ), strerror ( rc ) );
361
+		return rc;
362
+	}
363
+
364
+	/* Store I/O buffer */
365
+	assert ( ring->iobufs[next_id] == NULL );
366
+	ring->iobufs[next_id] = iobuf;
354 367
 
355
-	return id;
368
+	/* Consume buffer ID */
369
+	ring->id_prod++;
370
+
371
+	/* Return buffer ID and grant reference */
372
+	*id = next_id;
373
+	*ref = next_ref;
374
+
375
+	return 0;
356 376
 }
357 377
 
358 378
 /**
@@ -431,13 +451,15 @@ static void netfront_destroy_ring ( struct netfront_nic *netfront,
431 451
 /**
432 452
  * Refill receive descriptor ring
433 453
  *
434
- * @v netfront		Netfront device
454
+ * @v netdev		Network device
435 455
  */
436
-static void netfront_refill_rx ( struct netfront_nic *netfront ) {
456
+static void netfront_refill_rx ( struct net_device *netdev ) {
457
+	struct netfront_nic *netfront = netdev->priv;
437 458
 	struct xen_device *xendev = netfront->xendev;
438 459
 	struct io_buffer *iobuf;
439 460
 	struct netif_rx_request *request;
440 461
 	int notify;
462
+	int rc;
441 463
 
442 464
 	/* Do nothing if ring is already full */
443 465
 	if ( netfront_ring_is_full ( &netfront->rx ) )
@@ -455,13 +477,20 @@ static void netfront_refill_rx ( struct netfront_nic *netfront ) {
455 477
 
456 478
 		/* Add to descriptor ring */
457 479
 		request = RING_GET_REQUEST ( &netfront->rx_fring,
458
-					     netfront->rx_fring.req_prod_pvt++);
459
-		request->id = netfront_push ( netfront, &netfront->rx, iobuf,
460
-					      &request->gref );
480
+					     netfront->rx_fring.req_prod_pvt );
481
+		if ( ( rc = netfront_push ( netfront, &netfront->rx,
482
+					    iobuf, &request->id,
483
+					    &request->gref ) ) != 0 ) {
484
+			netdev_rx_err ( netdev, iobuf, rc );
485
+			break;
486
+		}
461 487
 		DBGC2 ( netfront, "NETFRONT %s RX id %d ref %d is %#08lx+%zx\n",
462 488
 			xendev->key, request->id, request->gref,
463 489
 			virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ) );
464 490
 
491
+		/* Move to next descriptor */
492
+		netfront->rx_fring.req_prod_pvt++;
493
+
465 494
 	} while ( ! netfront_ring_is_full ( &netfront->rx ) );
466 495
 
467 496
 	/* Push new descriptors and notify backend if applicable */
@@ -526,7 +555,7 @@ static int netfront_open ( struct net_device *netdev ) {
526 555
 	}
527 556
 
528 557
 	/* Refill receive descriptor ring */
529
-	netfront_refill_rx ( netfront );
558
+	netfront_refill_rx ( netdev );
530 559
 
531 560
 	/* Set link up */
532 561
 	netdev_link_up ( netdev );
@@ -614,6 +643,7 @@ static int netfront_transmit ( struct net_device *netdev,
614 643
 	struct xen_device *xendev = netfront->xendev;
615 644
 	struct netif_tx_request *request;
616 645
 	int notify;
646
+	int rc;
617 647
 
618 648
 	/* Check that we have space in the ring */
619 649
 	if ( netfront_ring_is_full ( &netfront->tx ) ) {
@@ -624,9 +654,11 @@ static int netfront_transmit ( struct net_device *netdev,
624 654
 
625 655
 	/* Add to descriptor ring */
626 656
 	request = RING_GET_REQUEST ( &netfront->tx_fring,
627
-				     netfront->tx_fring.req_prod_pvt++ );
628
-	request->id = netfront_push ( netfront, &netfront->tx, iobuf,
629
-				      &request->gref );
657
+				     netfront->tx_fring.req_prod_pvt );
658
+	if ( ( rc = netfront_push ( netfront, &netfront->tx, iobuf,
659
+				    &request->id, &request->gref ) ) != 0 ) {
660
+		return rc;
661
+	}
630 662
 	request->offset = ( virt_to_phys ( iobuf->data ) & ( PAGE_SIZE - 1 ) );
631 663
 	request->flags = NETTXF_data_validated;
632 664
 	request->size = iob_len ( iobuf );
@@ -634,6 +666,9 @@ static int netfront_transmit ( struct net_device *netdev,
634 666
 		xendev->key, request->id, request->gref,
635 667
 		virt_to_phys ( iobuf->data ), iob_len ( iobuf ) );
636 668
 
669
+	/* Consume descriptor */
670
+	netfront->tx_fring.req_prod_pvt++;
671
+
637 672
 	/* Push new descriptor and notify backend if applicable */
638 673
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( &netfront->tx_fring, notify );
639 674
 	if ( notify )
@@ -727,7 +762,6 @@ static void netfront_poll_rx ( struct net_device *netdev ) {
727 762
  * @v netdev		Network device
728 763
  */
729 764
 static void netfront_poll ( struct net_device *netdev ) {
730
-	struct netfront_nic *netfront = netdev->priv;
731 765
 
732 766
 	/* Poll for TX completions */
733 767
 	netfront_poll_tx ( netdev );
@@ -736,7 +770,7 @@ static void netfront_poll ( struct net_device *netdev ) {
736 770
 	netfront_poll_rx ( netdev );
737 771
 
738 772
 	/* Refill RX descriptor ring */
739
-	netfront_refill_rx ( netfront );
773
+	netfront_refill_rx ( netdev );
740 774
 }
741 775
 
742 776
 /** Network device operations */

+ 5
- 3
src/include/ipxe/xen.h 查看文件

@@ -27,9 +27,11 @@ struct xen_hypercall;
27 27
 /** A Xen grant table */
28 28
 struct xen_grant {
29 29
 	/** Grant table entries */
30
-	union grant_entry_v2 *table;
31
-	/** Number of grant table entries (must be a power of two) */
32
-	unsigned int count;
30
+	struct grant_entry_v1 *table;
31
+	/** Total grant table length */
32
+	size_t len;
33
+	/** Entry size shift (for later version tables) */
34
+	unsigned int shift;
33 35
 	/** Number of grant table entries in use */
34 36
 	unsigned int used;
35 37
 	/** Most recently used grant reference */

+ 139
- 9
src/include/ipxe/xengrant.h 查看文件

@@ -10,10 +10,14 @@
10 10
 FILE_LICENCE ( GPL2_OR_LATER );
11 11
 
12 12
 #include <stdint.h>
13
+#include <stdlib.h>
13 14
 #include <ipxe/io.h>
14 15
 #include <ipxe/xen.h>
15 16
 #include <xen/grant_table.h>
16 17
 
18
+/** Induced failure rate (for testing) */
19
+#define XENGRANT_FAIL_RATE 0
20
+
17 21
 /**
18 22
  * Query grant table size
19 23
  *
@@ -46,6 +50,90 @@ xengrant_set_version ( struct xen_hypervisor *xen,
46 50
 				 virt_to_phys ( version ), 1 );
47 51
 }
48 52
 
53
+/**
54
+ * Get grant table version
55
+ *
56
+ * @v xen		Xen hypervisor
57
+ * @v version		Version
58
+ * @ret xenrc		Xen status code
59
+ */
60
+static inline __attribute__ (( always_inline )) int
61
+xengrant_get_version ( struct xen_hypervisor *xen,
62
+		       struct gnttab_get_version *version ) {
63
+
64
+	return xen_hypercall_3 ( xen, __HYPERVISOR_grant_table_op,
65
+				 GNTTABOP_get_version,
66
+				 virt_to_phys ( version ), 1 );
67
+}
68
+
69
+/**
70
+ * Get number of grant table entries
71
+ *
72
+ * @v xen		Xen hypervisor
73
+ * @ret entries		Number of grant table entries
74
+ */
75
+static inline __attribute__ (( always_inline )) unsigned int
76
+xengrant_entries ( struct xen_hypervisor *xen ) {
77
+
78
+	return ( ( xen->grant.len / sizeof ( xen->grant.table[0] ) )
79
+		 >> xen->grant.shift );
80
+}
81
+
82
+/**
83
+ * Get grant table entry header
84
+ *
85
+ * @v xen		Xen hypervisor
86
+ * @v ref		Grant reference
87
+ * @ret hdr		Grant table entry header
88
+ */
89
+static inline __attribute__ (( always_inline )) struct grant_entry_header *
90
+xengrant_header ( struct xen_hypervisor *xen, grant_ref_t ref ) {
91
+	struct grant_entry_v1 *v1;
92
+
93
+	v1 = &xen->grant.table[ ref << xen->grant.shift ];
94
+	return ( container_of ( &v1->flags, struct grant_entry_header, flags ));
95
+}
96
+
97
+/**
98
+ * Get version 1 grant table entry
99
+ *
100
+ * @v hdr		Grant table entry header
101
+ * @ret v1		Version 1 grant table entry
102
+ */
103
+static inline __attribute__ (( always_inline )) struct grant_entry_v1 *
104
+xengrant_v1 ( struct grant_entry_header *hdr ) {
105
+
106
+	return ( container_of ( &hdr->flags, struct grant_entry_v1, flags ) );
107
+}
108
+
109
+/**
110
+ * Get version 2 grant table entry
111
+ *
112
+ * @v hdr		Grant table entry header
113
+ * @ret v2		Version 2 grant table entry
114
+ */
115
+static inline __attribute__ (( always_inline )) union grant_entry_v2 *
116
+xengrant_v2 ( struct grant_entry_header *hdr ) {
117
+
118
+	return ( container_of ( &hdr->flags, union grant_entry_v2, hdr.flags ));
119
+}
120
+
121
+/**
122
+ * Zero grant table entry
123
+ *
124
+ * @v xen		Xen hypervisor
125
+ * @v hdr		Grant table entry header
126
+ */
127
+static inline void xengrant_zero ( struct xen_hypervisor *xen,
128
+				   struct grant_entry_header *hdr ) {
129
+	uint32_t *dword = ( ( uint32_t * ) hdr );
130
+	unsigned int i = ( ( sizeof ( xen->grant.table[0] ) / sizeof ( *dword ))
131
+			   << xen->grant.shift );
132
+
133
+	while ( i-- )
134
+		writel ( 0, dword++ );
135
+}
136
+
49 137
 /**
50 138
  * Invalidate access to a page
51 139
  *
@@ -54,10 +142,10 @@ xengrant_set_version ( struct xen_hypervisor *xen,
54 142
  */
55 143
 static inline __attribute__ (( always_inline )) void
56 144
 xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
57
-	union grant_entry_v2 *entry = &xen->grant.table[ref];
145
+	struct grant_entry_header *hdr = xengrant_header ( xen, ref );
58 146
 
59 147
 	/* Sanity check */
60
-	assert ( ( readw ( &entry->hdr.flags ) &
148
+	assert ( ( readw ( &hdr->flags ) &
61 149
 		   ( GTF_reading | GTF_writing ) ) == 0 );
62 150
 
63 151
 	/* This should apparently be done using a cmpxchg instruction.
@@ -65,7 +153,10 @@ xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
65 153
 	 * mainly since our control flow generally does not permit
66 154
 	 * failure paths to themselves fail.
67 155
 	 */
68
-	writew ( 0, &entry->hdr.flags );
156
+	writew ( 0, &hdr->flags );
157
+
158
+	/* Leave reference marked as in-use (see xengrant_alloc()) */
159
+	writew ( DOMID_SELF, &hdr->domid );
69 160
 }
70 161
 
71 162
 /**
@@ -76,24 +167,63 @@ xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
76 167
  * @v domid		Domain ID
77 168
  * @v subflags		Additional flags
78 169
  * @v page		Page start
170
+ * @ret rc		Return status code
79 171
  */
80
-static inline __attribute__ (( always_inline )) void
172
+static inline __attribute__ (( always_inline )) int
81 173
 xengrant_permit_access ( struct xen_hypervisor *xen, grant_ref_t ref,
82 174
 			 domid_t domid, unsigned int subflags, void *page ) {
83
-	union grant_entry_v2 *entry = &xen->grant.table[ref];
175
+	struct grant_entry_header *hdr = xengrant_header ( xen, ref );
176
+	struct grant_entry_v1 *v1 = xengrant_v1 ( hdr );
177
+	union grant_entry_v2 *v2 = xengrant_v2 ( hdr );
84 178
 	unsigned long frame = ( virt_to_phys ( page ) / PAGE_SIZE );
85 179
 
86
-	writew ( domid, &entry->full_page.hdr.domid );
180
+	/* Fail (for test purposes) if applicable */
181
+	if ( ( XENGRANT_FAIL_RATE > 0 ) &&
182
+	     ( random() % XENGRANT_FAIL_RATE ) == 0 ) {
183
+		return -EAGAIN;
184
+	}
185
+
186
+	/* Record frame number.  This may fail on a 64-bit system if
187
+	 * we are using v1 grant tables.  On a 32-bit system, there is
188
+	 * no way for this code path to fail (with either v1 or v2
189
+	 * grant tables); we allow the compiler to optimise the
190
+	 * failure paths away to save space.
191
+	 */
87 192
 	if ( sizeof ( physaddr_t ) == sizeof ( uint64_t ) ) {
88
-		writeq ( frame, &entry->full_page.frame );
193
+
194
+		/* 64-bit system */
195
+		if ( xen->grant.shift ) {
196
+			/* Version 2 table: no possible failure */
197
+			writeq ( frame, &v2->full_page.frame );
198
+		} else {
199
+			/* Version 1 table: may fail if address above 16TB */
200
+			if ( frame > 0xffffffffUL )
201
+				return -ERANGE;
202
+			writel ( frame, &v1->frame );
203
+		}
204
+
89 205
 	} else {
90
-		writel ( frame, &entry->full_page.frame );
206
+
207
+		/* 32-bit system */
208
+		if ( xen->grant.shift ) {
209
+			/* Version 2 table: no possible failure */
210
+			writel ( frame, &v2->full_page.frame );
211
+		} else {
212
+			/* Version 1 table: no possible failure */
213
+			writel ( frame, &v1->frame );
214
+		}
91 215
 	}
216
+
217
+	/* Record domain ID and flags */
218
+	writew ( domid, &hdr->domid );
92 219
 	wmb();
93
-	writew ( ( GTF_permit_access | subflags ), &entry->full_page.hdr.flags);
220
+	writew ( ( GTF_permit_access | subflags ), &hdr->flags );
94 221
 	wmb();
222
+
223
+	return 0;
95 224
 }
96 225
 
226
+extern int xengrant_init ( struct xen_hypervisor *xen );
97 227
 extern int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
98 228
 			    unsigned int count );
99 229
 extern void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,

+ 120
- 17
src/interface/xen/xengrant.c 查看文件

@@ -20,6 +20,7 @@
20 20
 FILE_LICENCE ( GPL2_OR_LATER );
21 21
 
22 22
 #include <stdint.h>
23
+#include <strings.h>
23 24
 #include <errno.h>
24 25
 #include <assert.h>
25 26
 #include <ipxe/io.h>
@@ -32,6 +33,106 @@ FILE_LICENCE ( GPL2_OR_LATER );
32 33
  *
33 34
  */
34 35
 
36
+/** Grant table version to try setting
37
+ *
38
+ * Using version 1 grant tables limits guests to using 16TB of
39
+ * grantable RAM, and prevents the use of subpage grants.  Some
40
+ * versions of the Xen hypervisor refuse to allow the grant table
41
+ * version to be set after the first grant references have been
42
+ * created, so the loaded operating system may be stuck with whatever
43
+ * choice we make here.  We therefore currently use version 2 grant
44
+ * tables, since they give the most flexibility to the loaded OS.
45
+ *
46
+ * Current versions (7.2.0) of the Windows PV drivers have no support
47
+ * for version 2 grant tables, and will merrily create version 1
48
+ * entries in what the hypervisor believes to be a version 2 table.
49
+ * This causes some confusion.
50
+ *
51
+ * Avoid this problem by attempting to use version 1 tables, since
52
+ * otherwise we may render Windows unable to boot.
53
+ *
54
+ * Play nicely with other potential bootloaders by accepting either
55
+ * version 1 or version 2 grant tables (if we are unable to set our
56
+ * requested version).
57
+ */
58
+#define XENGRANT_TRY_VERSION 1
59
+
60
+/**
61
+ * Initialise grant table
62
+ *
63
+ * @v xen		Xen hypervisor
64
+ * @ret rc		Return status code
65
+ */
66
+int xengrant_init ( struct xen_hypervisor *xen ) {
67
+	struct gnttab_query_size size;
68
+	struct gnttab_set_version set_version;
69
+	struct gnttab_get_version get_version;
70
+	struct grant_entry_v1 *v1;
71
+	union grant_entry_v2 *v2;
72
+	unsigned int version;
73
+	int xenrc;
74
+	int rc;
75
+
76
+	/* Get grant table size */
77
+	size.dom = DOMID_SELF;
78
+	if ( ( xenrc = xengrant_query_size ( xen, &size ) ) != 0 ) {
79
+		rc = -EXEN ( xenrc );
80
+		DBGC ( xen, "XENGRANT could not get table size: %s\n",
81
+		       strerror ( rc ) );
82
+		return rc;
83
+	}
84
+	xen->grant.len = ( size.nr_frames * PAGE_SIZE );
85
+
86
+	/* Set grant table version, if applicable */
87
+	set_version.version = XENGRANT_TRY_VERSION;
88
+	if ( ( xenrc = xengrant_set_version ( xen, &set_version ) ) != 0 ) {
89
+		rc = -EXEN ( xenrc );
90
+		DBGC ( xen, "XENGRANT could not set version %d: %s\n",
91
+		       XENGRANT_TRY_VERSION, strerror ( rc ) );
92
+		/* Continue; use whatever version is current */
93
+	}
94
+
95
+	/* Get grant table version */
96
+	get_version.dom = DOMID_SELF;
97
+	get_version.pad = 0;
98
+	if ( ( xenrc = xengrant_get_version ( xen, &get_version ) ) == 0 ) {
99
+		version = get_version.version;
100
+		switch ( version ) {
101
+
102
+		case 0:
103
+			/* Version not yet specified: will be version 1 */
104
+			version = 1;
105
+			break;
106
+
107
+		case 1 :
108
+			/* Version 1 table: nothing special to do */
109
+			break;
110
+
111
+		case 2:
112
+			/* Version 2 table: configure shift appropriately */
113
+			xen->grant.shift = ( fls ( sizeof ( *v2 ) /
114
+						   sizeof ( *v1 ) ) - 1 );
115
+			break;
116
+
117
+		default:
118
+			/* Unsupported version */
119
+			DBGC ( xen, "XENGRANT detected unsupported version "
120
+			       "%d\n", version );
121
+			return -ENOTSUP;
122
+
123
+		}
124
+	} else {
125
+		rc = -EXEN ( xenrc );
126
+		DBGC ( xen, "XENGRANT could not get version (assuming v1): "
127
+		       "%s\n", strerror ( rc ) );
128
+		version = 1;
129
+	}
130
+
131
+	DBGC ( xen, "XENGRANT using v%d table with %d entries\n",
132
+	       version, xengrant_entries ( xen ) );
133
+	return 0;
134
+}
135
+
35 136
 /**
36 137
  * Allocate grant references
37 138
  *
@@ -42,22 +143,22 @@ FILE_LICENCE ( GPL2_OR_LATER );
42 143
  */
43 144
 int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
44 145
 		     unsigned int count ) {
45
-	union grant_entry_v2 *entry;
46
-	unsigned int mask = ( xen->grant.count - 1 );
146
+	struct grant_entry_header *hdr;
147
+	unsigned int entries = xengrant_entries ( xen );
148
+	unsigned int mask = ( entries - 1 );
47 149
 	unsigned int check = 0;
48 150
 	unsigned int avail;
49 151
 	unsigned int ref;
50 152
 
51 153
 	/* Fail unless we have enough references available */
52
-	avail = ( xen->grant.count - xen->grant.used -
53
-		  GNTTAB_NR_RESERVED_ENTRIES );
154
+	avail = ( entries - xen->grant.used - GNTTAB_NR_RESERVED_ENTRIES );
54 155
 	if ( avail < count ) {
55 156
 		DBGC ( xen, "XENGRANT cannot allocate %d references (only %d "
56
-		       "of %d available)\n", count, avail, xen->grant.count );
157
+		       "of %d available)\n", count, avail, entries );
57 158
 		return -ENOBUFS;
58 159
 	}
59 160
 	DBGC ( xen, "XENGRANT allocating %d references (from %d of %d "
60
-	       "available)\n", count, avail, xen->grant.count );
161
+	       "available)\n", count, avail, entries );
61 162
 
62 163
 	/* Update number of references used */
63 164
 	xen->grant.used += count;
@@ -66,24 +167,27 @@ int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
66 167
 	for ( ref = xen->grant.ref ; count ; ref = ( ( ref + 1 ) & mask ) ) {
67 168
 
68 169
 		/* Sanity check */
69
-		assert ( check++ < xen->grant.count );
170
+		assert ( check++ < entries );
70 171
 
71 172
 		/* Skip reserved references */
72 173
 		if ( ref < GNTTAB_NR_RESERVED_ENTRIES )
73 174
 			continue;
74 175
 
75 176
 		/* Skip in-use references */
76
-		entry = &xen->grant.table[ref];
77
-		if ( readw ( &entry->hdr.flags ) & GTF_type_mask )
177
+		hdr = xengrant_header ( xen, ref );
178
+		if ( readw ( &hdr->flags ) & GTF_type_mask )
78 179
 			continue;
79
-		if ( readw ( &entry->hdr.domid ) == DOMID_SELF )
180
+		if ( readw ( &hdr->domid ) == DOMID_SELF )
80 181
 			continue;
81 182
 
183
+		/* Zero reference */
184
+		xengrant_zero ( xen, hdr );
185
+
82 186
 		/* Mark reference as in-use.  We leave the flags as
83 187
 		 * empty (to avoid creating a valid grant table entry)
84 188
 		 * and set the domid to DOMID_SELF.
85 189
 		 */
86
-		writew ( DOMID_SELF, &entry->hdr.domid );
190
+		writew ( DOMID_SELF, &hdr->domid );
87 191
 		DBGC2 ( xen, "XENGRANT allocated ref %d\n", ref );
88 192
 
89 193
 		/* Record reference */
@@ -105,7 +209,7 @@ int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
105 209
  */
106 210
 void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
107 211
 		     unsigned int count ) {
108
-	union grant_entry_v2 *entry;
212
+	struct grant_entry_header *hdr;
109 213
 	unsigned int ref;
110 214
 	unsigned int i;
111 215
 
@@ -114,12 +218,11 @@ void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
114 218
 
115 219
 		/* Sanity check */
116 220
 		ref = refs[i];
117
-		assert ( ref < xen->grant.count );
221
+		assert ( ref < xengrant_entries ( xen ) );
118 222
 
119
-		/* Mark reference as unused */
120
-		entry = &xen->grant.table[ref];
121
-		writew ( 0, &entry->hdr.flags );
122
-		writew ( 0, &entry->hdr.domid );
223
+		/* Zero reference */
224
+		hdr = xengrant_header ( xen, ref );
225
+		xengrant_zero ( xen, hdr );
123 226
 		DBGC2 ( xen, "XENGRANT freed ref %d\n", ref );
124 227
 	}
125 228
 }

正在加载...
取消
保存