瀏覽代碼

[tcp] Add support for TCP window scaling

The maximum unscaled TCP window (64kB) implies a maximum bandwidth of
around 300kB/s on a WAN link with an RTT of 200ms.  Add support for
the TCP window scaling option to remove this upper limit.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 12 年之前
父節點
當前提交
ea61075c60
共有 2 個文件被更改,包括 58 次插入3 次删除
  1. 29
    1
      src/include/ipxe/tcp.h
  2. 29
    2
      src/net/tcp.c

+ 29
- 1
src/include/ipxe/tcp.h 查看文件

@@ -54,6 +54,31 @@ struct tcp_mss_option {
54 54
 /** Code for the TCP MSS option */
55 55
 #define TCP_OPTION_MSS 2
56 56
 
57
+/** TCP window scale option */
58
+struct tcp_window_scale_option {
59
+	uint8_t kind;
60
+	uint8_t length;
61
+	uint8_t scale;
62
+} __attribute__ (( packed ));
63
+
64
+/** Padded TCP window scale option (used for sending) */
65
+struct tcp_window_scale_padded_option {
66
+	uint8_t nop;
67
+	struct tcp_window_scale_option wsopt;
68
+} __attribute (( packed ));
69
+
70
+/** Code for the TCP window scale option */
71
+#define TCP_OPTION_WS 3
72
+
73
+/** Advertised TCP window scale
74
+ *
75
+ * Using a scale factor of 2**9 provides for a maximum window of 32MB,
76
+ * which is sufficient to allow Gigabit-speed transfers with a 200ms
77
+ * RTT.  The minimum advertised window is 512 bytes, which is still
78
+ * less than a single packet.
79
+ */
80
+#define TCP_RX_WINDOW_SCALE 9
81
+
57 82
 /** TCP timestamp option */
58 83
 struct tcp_timestamp_option {
59 84
 	uint8_t kind;
@@ -75,7 +100,9 @@ struct tcp_timestamp_padded_option {
75 100
 struct tcp_options {
76 101
 	/** MSS option, if present */
77 102
 	const struct tcp_mss_option *mssopt;
78
-	/** Timestampe option, if present */
103
+	/** Window scale option, if present */
104
+	const struct tcp_window_scale_option *wsopt;
105
+	/** Timestamp option, if present */
79 106
 	const struct tcp_timestamp_option *tsopt;
80 107
 };
81 108
 
@@ -316,6 +343,7 @@ struct tcp_options {
316 343
 	( MAX_LL_NET_HEADER_LEN +				\
317 344
 	  sizeof ( struct tcp_header ) +			\
318 345
 	  sizeof ( struct tcp_mss_option ) +			\
346
+	  sizeof ( struct tcp_window_scale_padded_option ) +	\
319 347
 	  sizeof ( struct tcp_timestamp_padded_option ) )
320 348
 
321 349
 /**

+ 29
- 2
src/net/tcp.c 查看文件

@@ -87,6 +87,16 @@ struct tcp_connection {
87 87
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
88 88
 	 */
89 89
 	uint32_t ts_recent;
90
+	/** Send window scale
91
+	 *
92
+	 * Equivalent to Snd.Wind.Scale in RFC 1323 terminology
93
+	 */
94
+	uint8_t snd_win_scale;
95
+	/** Receive window scale
96
+	 *
97
+	 * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology
98
+	 */
99
+	uint8_t rcv_win_scale;
90 100
 
91 101
 	/** Transmit queue */
92 102
 	struct list_head tx_queue;
@@ -490,6 +500,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
490 500
 	struct io_buffer *iobuf;
491 501
 	struct tcp_header *tcphdr;
492 502
 	struct tcp_mss_option *mssopt;
503
+	struct tcp_window_scale_padded_option *wsopt;
493 504
 	struct tcp_timestamp_padded_option *tsopt;
494 505
 	void *payload;
495 506
 	unsigned int flags;
@@ -497,6 +508,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
497 508
 	uint32_t seq_len;
498 509
 	uint32_t app_win;
499 510
 	uint32_t max_rcv_win;
511
+	uint32_t max_representable_win;
500 512
 	int rc;
501 513
 
502 514
 	/* If retransmission timer is already running, do nothing */
@@ -551,6 +563,9 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
551 563
 	app_win = xfer_window ( &tcp->xfer );
552 564
 	if ( max_rcv_win > app_win )
553 565
 		max_rcv_win = app_win;
566
+	max_representable_win = ( 0xffff << tcp->rcv_win_scale );
567
+	if ( max_rcv_win > max_representable_win )
568
+		max_rcv_win = max_representable_win;
554 569
 	max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
555 570
 	if ( tcp->rcv_win < max_rcv_win )
556 571
 		tcp->rcv_win = max_rcv_win;
@@ -562,6 +577,11 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
562 577
 		mssopt->kind = TCP_OPTION_MSS;
563 578
 		mssopt->length = sizeof ( *mssopt );
564 579
 		mssopt->mss = htons ( TCP_MSS );
580
+		wsopt = iob_push ( iobuf, sizeof ( *wsopt ) );
581
+		wsopt->nop = TCP_OPTION_NOP;
582
+		wsopt->wsopt.kind = TCP_OPTION_WS;
583
+		wsopt->wsopt.length = sizeof ( wsopt->wsopt );
584
+		wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE;
565 585
 	}
566 586
 	if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
567 587
 		tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
@@ -581,7 +601,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
581 601
 	tcphdr->ack = htonl ( tcp->rcv_ack );
582 602
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
583 603
 	tcphdr->flags = flags;
584
-	tcphdr->win = htons ( tcp->rcv_win );
604
+	tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale );
585 605
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
586 606
 
587 607
 	/* Dump header */
@@ -769,6 +789,9 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
769 789
 		case TCP_OPTION_MSS:
770 790
 			options->mssopt = data;
771 791
 			break;
792
+		case TCP_OPTION_WS:
793
+			options->wsopt = data;
794
+			break;
772 795
 		case TCP_OPTION_TS:
773 796
 			options->tsopt = data;
774 797
 			break;
@@ -825,6 +848,10 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
825 848
 		tcp->rcv_ack = seq;
826 849
 		if ( options->tsopt )
827 850
 			tcp->flags |= TCP_TS_ENABLED;
851
+		if ( options->wsopt ) {
852
+			tcp->snd_win_scale = options->wsopt->scale;
853
+			tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE;
854
+		}
828 855
 	}
829 856
 
830 857
 	/* Ignore duplicate SYN */
@@ -1168,7 +1195,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
1168 1195
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
1169 1196
 	seq = ntohl ( tcphdr->seq );
1170 1197
 	ack = ntohl ( tcphdr->ack );
1171
-	win = ntohs ( tcphdr->win );
1198
+	win = ( ntohs ( tcphdr->win ) << tcp->snd_win_scale );
1172 1199
 	flags = tcphdr->flags;
1173 1200
 	tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
1174 1201
 		      ( hlen - sizeof ( *tcphdr ) ), &options );

Loading…
取消
儲存