Browse Source

Use total free memory as advertised window. This seems to be sufficient

to avoid drops even on slow NICs.
tags/v0.9.3
Michael Brown 18 years ago
parent
commit
c014f607a8
3 changed files with 31 additions and 29 deletions
  1. 14
    25
      src/include/gpxe/tcp.h
  2. 7
    2
      src/net/netdevice.c
  3. 10
    2
      src/net/tcp.c

+ 14
- 25
src/include/gpxe/tcp.h View File

211
 #define MIN_PKB_LEN	MAX_HDR_LEN + 100 /* To account for padding by LL */
211
 #define MIN_PKB_LEN	MAX_HDR_LEN + 100 /* To account for padding by LL */
212
 
212
 
213
 /**
213
 /**
214
- * Advertised TCP window size
214
+ * Maxmimum advertised TCP window size
215
  *
215
  *
216
- * 
217
- * Our TCP window is actually limited by the amount of space available
218
- * for RX packets in the NIC's RX ring; we tend to populate the rings
219
- * with far fewer descriptors than a typical driver.  This would
220
- * result in a desperately small window size, which kills WAN download
221
- * performance; the maximum bandwidth on any link is limited to
222
- *
223
- *    max_bandwidth = ( tcp_window / round_trip_time )
224
- *
225
- * With a 4kB window, which probably accurately reflects our amount of
226
- * buffer space, and a WAN RTT of say 200ms, this gives a maximum
227
- * achievable bandwidth of 20kB/s, which is not acceptable.
228
- *
229
- * We therefore aim to process packets as fast as they arrive, and
230
- * advertise an "infinite" window.  If we don't process packets as
231
- * fast as they arrive, then we will drop packets and have to incur
232
- * the retransmission penalty.
216
+ * We estimate the TCP window size as the amount of free memory we
217
+ * have.  This is not strictly accurate (since it ignores any space
218
+ * already allocated as RX buffers), but it will do for now.
233
  *
219
  *
234
  * Since we don't store out-of-order received packets, the
220
  * Since we don't store out-of-order received packets, the
235
  * retransmission penalty is that the whole window contents must be
221
  * retransmission penalty is that the whole window contents must be
236
- * resent.
222
+ * resent.  This suggests keeping the window size small, but bear in
223
+ * mind that the maximum bandwidth on any link is limited to
224
+ *
225
+ *    max_bandwidth = ( tcp_window / round_trip_time )
237
  *
226
  *
238
- * We choose to compromise on a window size of 64kB (which is the
239
- * maximum that can be represented without using TCP options).  This
240
- * gives a maximum bandwidth of 320kB/s at 200ms RTT, which is
241
- * probably faster than the actual link bandwidth.  It also limits
242
- * retransmissions to 64kB, which is reasonable.
227
+ * With a 48kB window, which probably accurately reflects our amount
228
+ * of free memory, and a WAN RTT of say 200ms, this gives a maximum
229
+ * bandwidth of 240kB/s.  This is sufficiently close to realistic that
230
+ * we will need to be careful that our advertised window doesn't end
231
+ * up limiting WAN download speeds.
243
  *
232
  *
244
  * Finally, since the window goes into a 16-bit field and we cannot
233
  * Finally, since the window goes into a 16-bit field and we cannot
245
  * actually use 65536, we use a window size of (65536-4) to ensure
234
  * actually use 65536, we use a window size of (65536-4) to ensure
246
  * that payloads remain dword-aligned.
235
  * that payloads remain dword-aligned.
247
  */
236
  */
248
-#define TCP_WINDOW_SIZE	( 65536 - 4 )
237
+#define TCP_MAX_WINDOW_SIZE	( 65536 - 4 )
249
 
238
 
250
 /**
239
 /**
251
  * Advertised TCP MSS
240
  * Advertised TCP MSS

+ 7
- 2
src/net/netdevice.c View File

398
 		/* Poll for new packets */
398
 		/* Poll for new packets */
399
 		netdev_poll ( netdev, -1U );
399
 		netdev_poll ( netdev, -1U );
400
 
400
 
401
-		/* Process received packets */
402
-		while ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
401
+		/* Process at most one received packet.  Give priority
402
+		 * to getting packets out of the NIC over processing
403
+		 * the received packets, because we advertise a window
404
+		 * that assumes that we can receive packets from the
405
+		 * NIC faster than they arrive.
406
+		 */
407
+		if ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
403
 			DBGC ( netdev, "NETDEV %p processing %p\n",
408
 			DBGC ( netdev, "NETDEV %p processing %p\n",
404
 			       netdev, pkb );
409
 			       netdev, pkb );
405
 			netdev->ll_protocol->rx ( pkb, netdev );
410
 			netdev->ll_protocol->rx ( pkb, netdev );

+ 10
- 2
src/net/tcp.c View File

6
 #include <timer.h>
6
 #include <timer.h>
7
 #include <vsprintf.h>
7
 #include <vsprintf.h>
8
 #include <gpxe/pkbuff.h>
8
 #include <gpxe/pkbuff.h>
9
+#include <gpxe/malloc.h>
9
 #include <gpxe/retry.h>
10
 #include <gpxe/retry.h>
10
 #include <gpxe/tcpip.h>
11
 #include <gpxe/tcpip.h>
11
 #include <gpxe/tcp.h>
12
 #include <gpxe/tcp.h>
265
 	unsigned int flags;
266
 	unsigned int flags;
266
 	size_t len;
267
 	size_t len;
267
 	size_t seq_len;
268
 	size_t seq_len;
269
+	size_t window;
268
 	int rc;
270
 	int rc;
269
 
271
 
270
 	/* Allocate space to the TX buffer */
272
 	/* Allocate space to the TX buffer */
322
 	if ( seq_len )
324
 	if ( seq_len )
323
 		start_timer ( &conn->timer );
325
 		start_timer ( &conn->timer );
324
 
326
 
327
+	/* Estimate window size */
328
+	window = freemem;
329
+	if ( window > TCP_MAX_WINDOW_SIZE )
330
+		window = TCP_MAX_WINDOW_SIZE;
331
+	window &= ~0x03; /* Keep everything dword-aligned */
332
+
325
 	/* Fill up the TCP header */
333
 	/* Fill up the TCP header */
326
 	payload = pkb->data;
334
 	payload = pkb->data;
327
 	if ( flags & TCP_SYN ) {
335
 	if ( flags & TCP_SYN ) {
338
 	tcphdr->ack = htonl ( conn->rcv_ack );
346
 	tcphdr->ack = htonl ( conn->rcv_ack );
339
 	tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
347
 	tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
340
 	tcphdr->flags = flags;
348
 	tcphdr->flags = flags;
341
-	tcphdr->win = htons ( TCP_WINDOW_SIZE );
349
+	tcphdr->win = htons ( window );
342
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
350
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
343
 
351
 
344
 	/* Dump header */
352
 	/* Dump header */
492
 	tcphdr->ack = in_tcphdr->seq;
500
 	tcphdr->ack = in_tcphdr->seq;
493
 	tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
501
 	tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
494
 	tcphdr->flags = ( TCP_RST | TCP_ACK );
502
 	tcphdr->flags = ( TCP_RST | TCP_ACK );
495
-	tcphdr->win = htons ( TCP_WINDOW_SIZE );
503
+	tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
496
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
504
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
497
 
505
 
498
 	/* Dump header */
506
 	/* Dump header */

Loading…
Cancel
Save