Browse Source

[TCP] Avoid shrinking TCP window

Maintain state for the advertised window length, and only ever increase
it (instead of calculating it afresh on each transmit).  This avoids
triggering "treason uncloaked" messages on Linux peers.

Respond to zero-length TCP keepalives (i.e. empty data packets
transmitted outside the window).  Even if the peer wouldn't otherwise
expect an ACK (because its packet consumed no sequence space), force an
ACK if it was outside the window.

We don't yet generate TCP keepalives.  It could be done, but it's unclear
what benefit this would have.  (Linux, for example, doesn't start sending
keepalives until the connection has been idle for two hours.)
tags/v0.9.4
Michael Brown 16 years ago
parent
commit
1a68d3fef3
1 changed files with 50 additions and 17 deletions
  1. 50
    17
      src/net/tcp.c

+ 50
- 17
src/net/tcp.c View File

65
 	 * Equivalent to RCV.NXT in RFC 793 terminology.
65
 	 * Equivalent to RCV.NXT in RFC 793 terminology.
66
 	 */
66
 	 */
67
 	uint32_t rcv_ack;
67
 	uint32_t rcv_ack;
68
+	/** Receive window
69
+	 *
70
+	 * Equivalent to RCV.WND in RFC 793 terminology.
71
+	 */
72
+	uint32_t rcv_win;
68
 	/** Most recent received timestamp
73
 	/** Most recent received timestamp
69
 	 *
74
 	 *
70
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
75
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
394
 	size_t len = 0;
399
 	size_t len = 0;
395
 	size_t seq_len;
400
 	size_t seq_len;
396
 	size_t app_win;
401
 	size_t app_win;
397
-	size_t rcv_win;
402
+	size_t max_rcv_win;
398
 
403
 
399
 	/* If retransmission timer is already running, do nothing */
404
 	/* If retransmission timer is already running, do nothing */
400
 	if ( timer_running ( &tcp->timer ) )
405
 	if ( timer_running ( &tcp->timer ) )
439
 	/* Fill data payload from transmit queue */
444
 	/* Fill data payload from transmit queue */
440
 	tcp_process_queue ( tcp, len, iobuf, 0 );
445
 	tcp_process_queue ( tcp, len, iobuf, 0 );
441
 
446
 
442
-	/* Estimate window size */
443
-	rcv_win = ( ( freemem * 3 ) / 4 );
444
-	if ( rcv_win > TCP_MAX_WINDOW_SIZE )
445
-		rcv_win = TCP_MAX_WINDOW_SIZE;
447
+	/* Expand receive window if possible */
448
+	max_rcv_win = ( ( freemem * 3 ) / 4 );
449
+	if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
450
+		max_rcv_win = TCP_MAX_WINDOW_SIZE;
446
 	app_win = xfer_window ( &tcp->xfer );
451
 	app_win = xfer_window ( &tcp->xfer );
447
-	if ( rcv_win > app_win )
448
-		rcv_win = app_win;
449
-	rcv_win &= ~0x03; /* Keep everything dword-aligned */
452
+	if ( max_rcv_win > app_win )
453
+		max_rcv_win = app_win;
454
+	max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
455
+	if ( tcp->rcv_win < max_rcv_win )
456
+		tcp->rcv_win = max_rcv_win;
450
 
457
 
451
 	/* Fill up the TCP header */
458
 	/* Fill up the TCP header */
452
 	payload = iobuf->data;
459
 	payload = iobuf->data;
472
 	tcphdr->ack = htonl ( tcp->rcv_ack );
479
 	tcphdr->ack = htonl ( tcp->rcv_ack );
473
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
480
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
474
 	tcphdr->flags = flags;
481
 	tcphdr->flags = flags;
475
-	tcphdr->win = htons ( rcv_win );
482
+	tcphdr->win = htons ( tcp->rcv_win );
476
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
483
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
477
 
484
 
478
 	/* Dump header */
485
 	/* Dump header */
632
 	}
639
 	}
633
 }
640
 }
634
 
641
 
642
+/**
643
+ * Consume received sequence space
644
+ *
645
+ * @v tcp		TCP connection
646
+ * @v seq_len		Sequence space length to consume
647
+ */
648
+static void tcp_rx_seq ( struct tcp_connection *tcp, size_t seq_len ) {
649
+	tcp->rcv_ack += seq_len;
650
+	if ( tcp->rcv_win > seq_len ) {
651
+		tcp->rcv_win -= seq_len;
652
+	} else {
653
+		tcp->rcv_win = 0;
654
+	}
655
+}
656
+
635
 /**
657
 /**
636
  * Handle TCP received SYN
658
  * Handle TCP received SYN
637
  *
659
  *
659
 			    TCP_STATE_RCVD ( TCP_SYN ) );
681
 			    TCP_STATE_RCVD ( TCP_SYN ) );
660
 
682
 
661
 	/* Acknowledge SYN */
683
 	/* Acknowledge SYN */
662
-	tcp->rcv_ack++;
684
+	tcp_rx_seq ( tcp, 1 );
663
 
685
 
664
 	return 0;
686
 	return 0;
665
 }
687
 }
747
 		return rc;
769
 		return rc;
748
 
770
 
749
 	/* Acknowledge new data */
771
 	/* Acknowledge new data */
750
-	tcp->rcv_ack += len;
772
+	tcp_rx_seq ( tcp, len );
773
+
751
 	return 0;
774
 	return 0;
752
 }
775
 }
753
 
776
 
766
 
789
 
767
 	/* Mark FIN as received and acknowledge it */
790
 	/* Mark FIN as received and acknowledge it */
768
 	tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
791
 	tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
769
-	tcp->rcv_ack++;
792
+	tcp_rx_seq ( tcp, 1 );
770
 
793
 
771
 	/* Close connection */
794
 	/* Close connection */
772
 	tcp_close ( tcp, 0 );
795
 	tcp_close ( tcp, 0 );
789
 	 * ACKed.
812
 	 * ACKed.
790
 	 */
813
 	 */
791
 	if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
814
 	if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
792
-		if ( ( tcp->rcv_ack - seq ) > 0 )
815
+		if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win )
793
 			return 0;
816
 			return 0;
794
 	} else {
817
 	} else {
795
 		if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
818
 		if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
850
 		rc = -EINVAL;
873
 		rc = -EINVAL;
851
 		goto discard;
874
 		goto discard;
852
 	}
875
 	}
853
-	csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, iob_len ( iobuf ));
876
+	csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
877
+				       iob_len ( iobuf ) );
854
 	if ( csum != 0 ) {
878
 	if ( csum != 0 ) {
855
 		DBG ( "TCP checksum incorrect (is %04x including checksum "
879
 		DBG ( "TCP checksum incorrect (is %04x including checksum "
856
 		      "field, should be 0000)\n", csum );
880
 		      "field, should be 0000)\n", csum );
922
 	/* Dump out any state change as a result of the received packet */
946
 	/* Dump out any state change as a result of the received packet */
923
 	tcp_dump_state ( tcp );
947
 	tcp_dump_state ( tcp );
924
 
948
 
925
-	/* Send out any pending data.  If peer is expecting an ACK for
926
-	 * this packet then force sending a reply.
949
+	/* Send out any pending data.  We force sending a reply if either
950
+	 *
951
+	 *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
952
+	 *  b) either end of the packet was outside the receive window
953
+	 *
954
+	 * Case (b) enables us to support TCP keepalives using
955
+	 * zero-length packets, which we would otherwise ignore.  Note
956
+	 * that for case (b), we need *only* consider zero-length
957
+	 * packets, since non-zero-length packets will already be
958
+	 * caught by case (a).
927
 	 */
959
 	 */
928
-	tcp_xmit ( tcp, ( start_seq != seq ) );
960
+	tcp_xmit ( tcp, ( ( start_seq != seq ) ||
961
+			  ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
929
 
962
 
930
 	/* If this packet was the last we expect to receive, set up
963
 	/* If this packet was the last we expect to receive, set up
931
 	 * timer to expire and cause the connection to be freed.
964
 	 * timer to expire and cause the connection to be freed.

Loading…
Cancel
Save