Преглед на файлове

Use total free memory as advertised window. This seems to be sufficient

to avoid drops even on slow NICs.
tags/v0.9.3
Michael Brown преди 18 години
родител
ревизия
c014f607a8
променени са 3 файла, в които са добавени 31 реда и са изтрити 29 реда
  1. 14
    25
      src/include/gpxe/tcp.h
  2. 7
    2
      src/net/netdevice.c
  3. 10
    2
      src/net/tcp.c

+ 14
- 25
src/include/gpxe/tcp.h Целия файл

@@ -211,41 +211,30 @@ struct tcp_mss_option {
211 211
 #define MIN_PKB_LEN	MAX_HDR_LEN + 100 /* To account for padding by LL */
212 212
 
213 213
 /**
214
- * Advertised TCP window size
214
+ * Maxmimum advertised TCP window size
215 215
  *
216
- * 
217
- * Our TCP window is actually limited by the amount of space available
218
- * for RX packets in the NIC's RX ring; we tend to populate the rings
219
- * with far fewer descriptors than a typical driver.  This would
220
- * result in a desperately small window size, which kills WAN download
221
- * performance; the maximum bandwidth on any link is limited to
222
- *
223
- *    max_bandwidth = ( tcp_window / round_trip_time )
224
- *
225
- * With a 4kB window, which probably accurately reflects our amount of
226
- * buffer space, and a WAN RTT of say 200ms, this gives a maximum
227
- * achievable bandwidth of 20kB/s, which is not acceptable.
228
- *
229
- * We therefore aim to process packets as fast as they arrive, and
230
- * advertise an "infinite" window.  If we don't process packets as
231
- * fast as they arrive, then we will drop packets and have to incur
232
- * the retransmission penalty.
216
+ * We estimate the TCP window size as the amount of free memory we
217
+ * have.  This is not strictly accurate (since it ignores any space
218
+ * already allocated as RX buffers), but it will do for now.
233 219
  *
234 220
  * Since we don't store out-of-order received packets, the
235 221
  * retransmission penalty is that the whole window contents must be
236
- * resent.
222
+ * resent.  This suggests keeping the window size small, but bear in
223
+ * mind that the maximum bandwidth on any link is limited to
224
+ *
225
+ *    max_bandwidth = ( tcp_window / round_trip_time )
237 226
  *
238
- * We choose to compromise on a window size of 64kB (which is the
239
- * maximum that can be represented without using TCP options).  This
240
- * gives a maximum bandwidth of 320kB/s at 200ms RTT, which is
241
- * probably faster than the actual link bandwidth.  It also limits
242
- * retransmissions to 64kB, which is reasonable.
227
+ * With a 48kB window, which probably accurately reflects our amount
228
+ * of free memory, and a WAN RTT of say 200ms, this gives a maximum
229
+ * bandwidth of 240kB/s.  This is sufficiently close to realistic that
230
+ * we will need to be careful that our advertised window doesn't end
231
+ * up limiting WAN download speeds.
243 232
  *
244 233
  * Finally, since the window goes into a 16-bit field and we cannot
245 234
  * actually use 65536, we use a window size of (65536-4) to ensure
246 235
  * that payloads remain dword-aligned.
247 236
  */
248
-#define TCP_WINDOW_SIZE	( 65536 - 4 )
237
+#define TCP_MAX_WINDOW_SIZE	( 65536 - 4 )
249 238
 
250 239
 /**
251 240
  * Advertised TCP MSS

+ 7
- 2
src/net/netdevice.c Целия файл

@@ -398,8 +398,13 @@ static void net_step ( struct process *process ) {
398 398
 		/* Poll for new packets */
399 399
 		netdev_poll ( netdev, -1U );
400 400
 
401
-		/* Process received packets */
402
-		while ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
401
+		/* Process at most one received packet.  Give priority
402
+		 * to getting packets out of the NIC over processing
403
+		 * the received packets, because we advertise a window
404
+		 * that assumes that we can receive packets from the
405
+		 * NIC faster than they arrive.
406
+		 */
407
+		if ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
403 408
 			DBGC ( netdev, "NETDEV %p processing %p\n",
404 409
 			       netdev, pkb );
405 410
 			netdev->ll_protocol->rx ( pkb, netdev );

+ 10
- 2
src/net/tcp.c Целия файл

@@ -6,6 +6,7 @@
6 6
 #include <timer.h>
7 7
 #include <vsprintf.h>
8 8
 #include <gpxe/pkbuff.h>
9
+#include <gpxe/malloc.h>
9 10
 #include <gpxe/retry.h>
10 11
 #include <gpxe/tcpip.h>
11 12
 #include <gpxe/tcp.h>
@@ -265,6 +266,7 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
265 266
 	unsigned int flags;
266 267
 	size_t len;
267 268
 	size_t seq_len;
269
+	size_t window;
268 270
 	int rc;
269 271
 
270 272
 	/* Allocate space to the TX buffer */
@@ -322,6 +324,12 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
322 324
 	if ( seq_len )
323 325
 		start_timer ( &conn->timer );
324 326
 
327
+	/* Estimate window size */
328
+	window = freemem;
329
+	if ( window > TCP_MAX_WINDOW_SIZE )
330
+		window = TCP_MAX_WINDOW_SIZE;
331
+	window &= ~0x03; /* Keep everything dword-aligned */
332
+
325 333
 	/* Fill up the TCP header */
326 334
 	payload = pkb->data;
327 335
 	if ( flags & TCP_SYN ) {
@@ -338,7 +346,7 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
338 346
 	tcphdr->ack = htonl ( conn->rcv_ack );
339 347
 	tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
340 348
 	tcphdr->flags = flags;
341
-	tcphdr->win = htons ( TCP_WINDOW_SIZE );
349
+	tcphdr->win = htons ( window );
342 350
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
343 351
 
344 352
 	/* Dump header */
@@ -492,7 +500,7 @@ static int tcp_send_reset ( struct tcp_connection *conn,
492 500
 	tcphdr->ack = in_tcphdr->seq;
493 501
 	tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
494 502
 	tcphdr->flags = ( TCP_RST | TCP_ACK );
495
-	tcphdr->win = htons ( TCP_WINDOW_SIZE );
503
+	tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
496 504
 	tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
497 505
 
498 506
 	/* Dump header */

Loading…
Отказ
Запис