浏览代码

[tcp] Defer sending ACKs until all received packets have been processed

When running inside a virtual machine (or when using the UNDI driver),
transmitting packets can be expensive.  When we receive several
packets in one poll (e.g. because a slow BIOS timer interrupt routine
has caused us to fall behind in processing), we can safely send just a
single ACK to cover all of the received packets.  This reduces the
time spent transmitting and allows us to clear the backlog much
faster.

Various RFCs (starting with RFC1122) state that there should be an ACK
for at least every second segment.  We choose not to enforce this
rule.  Under normal operation each poll should find at most one
received packet, and we will then not delay any ACKs.  We delay
(i.e. omit) ACKs only when under sufficiently heavy load that we are
finding multiple packets per poll; under these conditions it is
important to clear the backlog quickly since any delay may lead to
dropped packets.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 10 年前
父节点
当前提交
d28bb51f44
共有 1 个文件被更改,包括 25 次插入8 次删除
  1. 25
    8
      src/net/tcp.c

+ 25
- 8
src/net/tcp.c 查看文件

@@ -16,6 +16,7 @@
16 16
 #include <ipxe/uri.h>
17 17
 #include <ipxe/netdevice.h>
18 18
 #include <ipxe/profile.h>
19
+#include <ipxe/process.h>
19 20
 #include <ipxe/tcpip.h>
20 21
 #include <ipxe/tcp.h>
21 22
 
@@ -107,6 +108,8 @@ struct tcp_connection {
107 108
 	struct list_head tx_queue;
108 109
 	/** Receive queue */
109 110
 	struct list_head rx_queue;
111
+	/** Transmission process */
112
+	struct process process;
110 113
 	/** Retransmission timer */
111 114
 	struct retry_timer timer;
112 115
 	/** Shutdown (TIME_WAIT) timer */
@@ -166,6 +169,7 @@ static struct profiler tcp_rx_profiler __profiler = { .name = "tcp.rx" };
166 169
 static struct profiler tcp_xfer_profiler __profiler = { .name = "tcp.xfer" };
167 170
 
168 171
 /* Forward declarations */
172
+static struct process_descriptor tcp_process_desc;
169 173
 static struct interface_descriptor tcp_xfer_desc;
170 174
 static void tcp_expired ( struct retry_timer *timer, int over );
171 175
 static void tcp_wait_expired ( struct retry_timer *timer, int over );
@@ -273,6 +277,7 @@ static int tcp_open ( struct interface *xfer, struct sockaddr *peer,
273 277
 	DBGC ( tcp, "TCP %p allocated\n", tcp );
274 278
 	ref_init ( &tcp->refcnt, NULL );
275 279
 	intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt );
280
+	process_init_stopped ( &tcp->process, &tcp_process_desc, &tcp->refcnt );
276 281
 	timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt );
277 282
 	timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt );
278 283
 	tcp->prev_tcp_state = TCP_CLOSED;
@@ -369,6 +374,7 @@ static void tcp_close ( struct tcp_connection *tcp, int rc ) {
369 374
 		pending_put ( &tcp->pending_flags );
370 375
 
371 376
 		/* Remove from list and drop reference */
377
+		process_del ( &tcp->process );
372 378
 		stop_timer ( &tcp->timer );
373 379
 		stop_timer ( &tcp->wait );
374 380
 		list_del ( &tcp->list );
@@ -497,7 +503,7 @@ static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
497 503
  * will have been started if necessary, and so the stack will
498 504
  * eventually attempt to retransmit the failed packet.
499 505
  */
500
-static int tcp_xmit ( struct tcp_connection *tcp ) {
506
+static void tcp_xmit ( struct tcp_connection *tcp ) {
501 507
 	struct io_buffer *iobuf;
502 508
 	struct tcp_header *tcphdr;
503 509
 	struct tcp_mss_option *mssopt;
@@ -517,7 +523,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
517 523
 
518 524
 	/* If retransmission timer is already running, do nothing */
519 525
 	if ( timer_running ( &tcp->timer ) )
520
-		return 0;
526
+		return;
521 527
 
522 528
 	/* Calculate both the actual (payload) and sequence space
523 529
 	 * lengths that we wish to transmit.
@@ -537,7 +543,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
537 543
 
538 544
 	/* If we have nothing to transmit, stop now */
539 545
 	if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
540
-		return 0;
546
+		return;
541 547
 
542 548
 	/* If we are transmitting anything that requires
543 549
 	 * acknowledgement (i.e. consumes sequence space), start the
@@ -553,7 +559,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
553 559
 		DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
554 560
 		       "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
555 561
 		       tcp->rcv_ack );
556
-		return -ENOMEM;
562
+		return;
557 563
 	}
558 564
 	iob_reserve ( iobuf, TCP_MAX_HEADER_LEN );
559 565
 
@@ -620,16 +626,19 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
620 626
 		DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
621 627
 		       tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
622 628
 		       tcp->rcv_ack, strerror ( rc ) );
623
-		return rc;
629
+		return;
624 630
 	}
625 631
 
626 632
 	/* Clear ACK-pending flag */
627 633
 	tcp->flags &= ~TCP_ACK_PENDING;
628 634
 
629 635
 	profile_stop ( &tcp_tx_profiler );
630
-	return 0;
631 636
 }
632 637
 
638
+/** TCP process descriptor */
639
+static struct process_descriptor tcp_process_desc =
640
+	PROC_DESC_ONCE ( struct tcp_connection, process, tcp_xmit );
641
+
633 642
 /**
634 643
  * Retransmission timer expired
635 644
  *
@@ -1272,8 +1281,16 @@ static int tcp_rx ( struct io_buffer *iobuf,
1272 1281
 	/* Dump out any state change as a result of the received packet */
1273 1282
 	tcp_dump_state ( tcp );
1274 1283
 
1275
-	/* Send out any pending data */
1276
-	tcp_xmit ( tcp );
1284
+	/* Schedule transmission of ACK (and any pending data).  If we
1285
+	 * have received any out-of-order packets (i.e. if the receive
1286
+	 * queue remains non-empty after processing) then send the ACK
1287
+	 * immediately in order to trigger Fast Retransmission.
1288
+	 */
1289
+	if ( list_empty ( &tcp->rx_queue ) ) {
1290
+		process_add ( &tcp->process );
1291
+	} else {
1292
+		tcp_xmit ( tcp );
1293
+	}
1277 1294
 
1278 1295
 	/* If this packet was the last we expect to receive, set up
1279 1296
 	 * timer to expire and cause the connection to be freed.

正在加载...
取消
保存