瀏覽代碼

[TCP] Avoid shrinking TCP window

Maintain state for the advertised window length, and only ever increase
it (instead of calculating it afresh on each transmit).  This avoids
triggering "treason uncloaked" messages on Linux peers.

Respond to zero-length TCP keepalives (i.e. empty data packets
transmitted outside the window).  Even if the peer wouldn't otherwise
expect an ACK (because its packet consumed no sequence space), force an
ACK if it was outside the window.

We don't yet generate TCP keepalives.  It could be done, but it's unclear
what benefit this would have.  (Linux, for example, doesn't start sending
keepalives until the connection has been idle for two hours.)
tags/v0.9.4
Michael Brown 16 年之前
父節點
當前提交
1a68d3fef3
共有 1 個文件被更改,包括 50 次插入17 次删除
  1. 50
    17
      src/net/tcp.c

+ 50
- 17
src/net/tcp.c 查看文件

@@ -65,6 +65,11 @@ struct tcp_connection {
65 65
 	 * Equivalent to RCV.NXT in RFC 793 terminology.
66 66
 	 */
67 67
 	uint32_t rcv_ack;
68
+	/** Receive window
69
+	 *
70
+	 * Equivalent to RCV.WND in RFC 793 terminology.
71
+	 */
72
+	uint32_t rcv_win;
68 73
 	/** Most recent received timestamp
69 74
 	 *
70 75
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
@@ -394,7 +399,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
394 399
 	size_t len = 0;
395 400
 	size_t seq_len;
396 401
 	size_t app_win;
397
-	size_t rcv_win;
402
+	size_t max_rcv_win;
398 403
 
399 404
 	/* If retransmission timer is already running, do nothing */
400 405
 	if ( timer_running ( &tcp->timer ) )
@@ -439,14 +444,16 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
439 444
 	/* Fill data payload from transmit queue */
440 445
 	tcp_process_queue ( tcp, len, iobuf, 0 );
441 446
 
442
-	/* Estimate window size */
443
-	rcv_win = ( ( freemem * 3 ) / 4 );
444
-	if ( rcv_win > TCP_MAX_WINDOW_SIZE )
445
-		rcv_win = TCP_MAX_WINDOW_SIZE;
447
+	/* Expand receive window if possible */
448
+	max_rcv_win = ( ( freemem * 3 ) / 4 );
449
+	if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
450
+		max_rcv_win = TCP_MAX_WINDOW_SIZE;
446 451
 	app_win = xfer_window ( &tcp->xfer );
447
-	if ( rcv_win > app_win )
448
-		rcv_win = app_win;
449
-	rcv_win &= ~0x03; /* Keep everything dword-aligned */
452
+	if ( max_rcv_win > app_win )
453
+		max_rcv_win = app_win;
454
+	max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
455
+	if ( tcp->rcv_win < max_rcv_win )
456
+		tcp->rcv_win = max_rcv_win;
450 457
 
451 458
 	/* Fill up the TCP header */
452 459
 	payload = iobuf->data;
@@ -472,7 +479,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
472 479
 	tcphdr->ack = htonl ( tcp->rcv_ack );
473 480
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
474 481
 	tcphdr->flags = flags;
475
-	tcphdr->win = htons ( rcv_win );
482
+	tcphdr->win = htons ( tcp->rcv_win );
476 483
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
477 484
 
478 485
 	/* Dump header */
@@ -632,6 +639,21 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
632 639
 	}
633 640
 }
634 641
 
642
+/**
643
+ * Consume received sequence space
644
+ *
645
+ * @v tcp		TCP connection
646
+ * @v seq_len		Sequence space length to consume
647
+ */
648
+static void tcp_rx_seq ( struct tcp_connection *tcp, size_t seq_len ) {
649
+	tcp->rcv_ack += seq_len;
650
+	if ( tcp->rcv_win > seq_len ) {
651
+		tcp->rcv_win -= seq_len;
652
+	} else {
653
+		tcp->rcv_win = 0;
654
+	}
655
+}
656
+
635 657
 /**
636 658
  * Handle TCP received SYN
637 659
  *
@@ -659,7 +681,7 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
659 681
 			    TCP_STATE_RCVD ( TCP_SYN ) );
660 682
 
661 683
 	/* Acknowledge SYN */
662
-	tcp->rcv_ack++;
684
+	tcp_rx_seq ( tcp, 1 );
663 685
 
664 686
 	return 0;
665 687
 }
@@ -747,7 +769,8 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
747 769
 		return rc;
748 770
 
749 771
 	/* Acknowledge new data */
750
-	tcp->rcv_ack += len;
772
+	tcp_rx_seq ( tcp, len );
773
+
751 774
 	return 0;
752 775
 }
753 776
 
@@ -766,7 +789,7 @@ static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
766 789
 
767 790
 	/* Mark FIN as received and acknowledge it */
768 791
 	tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
769
-	tcp->rcv_ack++;
792
+	tcp_rx_seq ( tcp, 1 );
770 793
 
771 794
 	/* Close connection */
772 795
 	tcp_close ( tcp, 0 );
@@ -789,7 +812,7 @@ static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
789 812
 	 * ACKed.
790 813
 	 */
791 814
 	if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
792
-		if ( ( tcp->rcv_ack - seq ) > 0 )
815
+		if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win )
793 816
 			return 0;
794 817
 	} else {
795 818
 		if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
@@ -850,7 +873,8 @@ static int tcp_rx ( struct io_buffer *iobuf,
850 873
 		rc = -EINVAL;
851 874
 		goto discard;
852 875
 	}
853
-	csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, iob_len ( iobuf ));
876
+	csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
877
+				       iob_len ( iobuf ) );
854 878
 	if ( csum != 0 ) {
855 879
 		DBG ( "TCP checksum incorrect (is %04x including checksum "
856 880
 		      "field, should be 0000)\n", csum );
@@ -922,10 +946,19 @@ static int tcp_rx ( struct io_buffer *iobuf,
922 946
 	/* Dump out any state change as a result of the received packet */
923 947
 	tcp_dump_state ( tcp );
924 948
 
925
-	/* Send out any pending data.  If peer is expecting an ACK for
926
-	 * this packet then force sending a reply.
949
+	/* Send out any pending data.  We force sending a reply if either
950
+	 *
951
+	 *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
952
+	 *  b) either end of the packet was outside the receive window
953
+	 *
954
+	 * Case (b) enables us to support TCP keepalives using
955
+	 * zero-length packets, which we would otherwise ignore.  Note
956
+	 * that for case (b), we need *only* consider zero-length
957
+	 * packets, since non-zero-length packets will already be
958
+	 * caught by case (a).
927 959
 	 */
928
-	tcp_xmit ( tcp, ( start_seq != seq ) );
960
+	tcp_xmit ( tcp, ( ( start_seq != seq ) ||
961
+			  ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
929 962
 
930 963
 	/* If this packet was the last we expect to receive, set up
931 964
 	 * timer to expire and cause the connection to be freed.

Loading…
取消
儲存