Browse Source

[tcp] Add support for TCP window scaling

The maximum unscaled TCP window (64kB) implies a maximum bandwidth of
around 300kB/s on a WAN link with an RTT of 200ms.  Add support for
the TCP window scaling option to remove this upper limit.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 12 years ago
parent
commit
ea61075c60
2 changed files with 58 additions and 3 deletions
  1. 29
    1
      src/include/ipxe/tcp.h
  2. 29
    2
      src/net/tcp.c

+ 29
- 1
src/include/ipxe/tcp.h View File

54
 /** Code for the TCP MSS option */
54
 /** Code for the TCP MSS option */
55
 #define TCP_OPTION_MSS 2
55
 #define TCP_OPTION_MSS 2
56
 
56
 
57
+/** TCP window scale option */
58
+struct tcp_window_scale_option {
59
+	uint8_t kind;
60
+	uint8_t length;
61
+	uint8_t scale;
62
+} __attribute__ (( packed ));
63
+
64
+/** Padded TCP window scale option (used for sending) */
65
+struct tcp_window_scale_padded_option {
66
+	uint8_t nop;
67
+	struct tcp_window_scale_option wsopt;
68
+} __attribute (( packed ));
69
+
70
+/** Code for the TCP window scale option */
71
+#define TCP_OPTION_WS 3
72
+
73
+/** Advertised TCP window scale
74
+ *
75
+ * Using a scale factor of 2**9 provides for a maximum window of 32MB,
76
+ * which is sufficient to allow Gigabit-speed transfers with a 200ms
77
+ * RTT.  The minimum advertised window is 512 bytes, which is still
78
+ * less than a single packet.
79
+ */
80
+#define TCP_RX_WINDOW_SCALE 9
81
+
57
 /** TCP timestamp option */
82
 /** TCP timestamp option */
58
 struct tcp_timestamp_option {
83
 struct tcp_timestamp_option {
59
 	uint8_t kind;
84
 	uint8_t kind;
75
 struct tcp_options {
100
 struct tcp_options {
76
 	/** MSS option, if present */
101
 	/** MSS option, if present */
77
 	const struct tcp_mss_option *mssopt;
102
 	const struct tcp_mss_option *mssopt;
78
-	/** Timestampe option, if present */
103
+	/** Window scale option, if present */
104
+	const struct tcp_window_scale_option *wsopt;
105
+	/** Timestamp option, if present */
79
 	const struct tcp_timestamp_option *tsopt;
106
 	const struct tcp_timestamp_option *tsopt;
80
 };
107
 };
81
 
108
 
316
 	( MAX_LL_NET_HEADER_LEN +				\
343
 	( MAX_LL_NET_HEADER_LEN +				\
317
 	  sizeof ( struct tcp_header ) +			\
344
 	  sizeof ( struct tcp_header ) +			\
318
 	  sizeof ( struct tcp_mss_option ) +			\
345
 	  sizeof ( struct tcp_mss_option ) +			\
346
+	  sizeof ( struct tcp_window_scale_padded_option ) +	\
319
 	  sizeof ( struct tcp_timestamp_padded_option ) )
347
 	  sizeof ( struct tcp_timestamp_padded_option ) )
320
 
348
 
321
 /**
349
 /**

+ 29
- 2
src/net/tcp.c View File

87
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
87
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
88
 	 */
88
 	 */
89
 	uint32_t ts_recent;
89
 	uint32_t ts_recent;
90
+	/** Send window scale
91
+	 *
92
+	 * Equivalent to Snd.Wind.Scale in RFC 1323 terminology
93
+	 */
94
+	uint8_t snd_win_scale;
95
+	/** Receive window scale
96
+	 *
97
+	 * Equivalent to Rcv.Wind.Scale in RFC 1323 terminology
98
+	 */
99
+	uint8_t rcv_win_scale;
90
 
100
 
91
 	/** Transmit queue */
101
 	/** Transmit queue */
92
 	struct list_head tx_queue;
102
 	struct list_head tx_queue;
490
 	struct io_buffer *iobuf;
500
 	struct io_buffer *iobuf;
491
 	struct tcp_header *tcphdr;
501
 	struct tcp_header *tcphdr;
492
 	struct tcp_mss_option *mssopt;
502
 	struct tcp_mss_option *mssopt;
503
+	struct tcp_window_scale_padded_option *wsopt;
493
 	struct tcp_timestamp_padded_option *tsopt;
504
 	struct tcp_timestamp_padded_option *tsopt;
494
 	void *payload;
505
 	void *payload;
495
 	unsigned int flags;
506
 	unsigned int flags;
497
 	uint32_t seq_len;
508
 	uint32_t seq_len;
498
 	uint32_t app_win;
509
 	uint32_t app_win;
499
 	uint32_t max_rcv_win;
510
 	uint32_t max_rcv_win;
511
+	uint32_t max_representable_win;
500
 	int rc;
512
 	int rc;
501
 
513
 
502
 	/* If retransmission timer is already running, do nothing */
514
 	/* If retransmission timer is already running, do nothing */
551
 	app_win = xfer_window ( &tcp->xfer );
563
 	app_win = xfer_window ( &tcp->xfer );
552
 	if ( max_rcv_win > app_win )
564
 	if ( max_rcv_win > app_win )
553
 		max_rcv_win = app_win;
565
 		max_rcv_win = app_win;
566
+	max_representable_win = ( 0xffff << tcp->rcv_win_scale );
567
+	if ( max_rcv_win > max_representable_win )
568
+		max_rcv_win = max_representable_win;
554
 	max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
569
 	max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
555
 	if ( tcp->rcv_win < max_rcv_win )
570
 	if ( tcp->rcv_win < max_rcv_win )
556
 		tcp->rcv_win = max_rcv_win;
571
 		tcp->rcv_win = max_rcv_win;
562
 		mssopt->kind = TCP_OPTION_MSS;
577
 		mssopt->kind = TCP_OPTION_MSS;
563
 		mssopt->length = sizeof ( *mssopt );
578
 		mssopt->length = sizeof ( *mssopt );
564
 		mssopt->mss = htons ( TCP_MSS );
579
 		mssopt->mss = htons ( TCP_MSS );
580
+		wsopt = iob_push ( iobuf, sizeof ( *wsopt ) );
581
+		wsopt->nop = TCP_OPTION_NOP;
582
+		wsopt->wsopt.kind = TCP_OPTION_WS;
583
+		wsopt->wsopt.length = sizeof ( wsopt->wsopt );
584
+		wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE;
565
 	}
585
 	}
566
 	if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
586
 	if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
567
 		tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
587
 		tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
581
 	tcphdr->ack = htonl ( tcp->rcv_ack );
601
 	tcphdr->ack = htonl ( tcp->rcv_ack );
582
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
602
 	tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
583
 	tcphdr->flags = flags;
603
 	tcphdr->flags = flags;
584
-	tcphdr->win = htons ( tcp->rcv_win );
604
+	tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale );
585
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
605
 	tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
586
 
606
 
587
 	/* Dump header */
607
 	/* Dump header */
769
 		case TCP_OPTION_MSS:
789
 		case TCP_OPTION_MSS:
770
 			options->mssopt = data;
790
 			options->mssopt = data;
771
 			break;
791
 			break;
792
+		case TCP_OPTION_WS:
793
+			options->wsopt = data;
794
+			break;
772
 		case TCP_OPTION_TS:
795
 		case TCP_OPTION_TS:
773
 			options->tsopt = data;
796
 			options->tsopt = data;
774
 			break;
797
 			break;
825
 		tcp->rcv_ack = seq;
848
 		tcp->rcv_ack = seq;
826
 		if ( options->tsopt )
849
 		if ( options->tsopt )
827
 			tcp->flags |= TCP_TS_ENABLED;
850
 			tcp->flags |= TCP_TS_ENABLED;
851
+		if ( options->wsopt ) {
852
+			tcp->snd_win_scale = options->wsopt->scale;
853
+			tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE;
854
+		}
828
 	}
855
 	}
829
 
856
 
830
 	/* Ignore duplicate SYN */
857
 	/* Ignore duplicate SYN */
1168
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
1195
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
1169
 	seq = ntohl ( tcphdr->seq );
1196
 	seq = ntohl ( tcphdr->seq );
1170
 	ack = ntohl ( tcphdr->ack );
1197
 	ack = ntohl ( tcphdr->ack );
1171
-	win = ntohs ( tcphdr->win );
1198
+	win = ( ntohs ( tcphdr->win ) << tcp->snd_win_scale );
1172
 	flags = tcphdr->flags;
1199
 	flags = tcphdr->flags;
1173
 	tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
1200
 	tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
1174
 		      ( hlen - sizeof ( *tcphdr ) ), &options );
1201
 		      ( hlen - sizeof ( *tcphdr ) ), &options );

Loading…
Cancel
Save