|
@@ -104,6 +104,9 @@ struct tcp_connection {
|
104
|
104
|
/** Maximum receive window */
|
105
|
105
|
uint32_t max_rcv_win;
|
106
|
106
|
|
|
107
|
+ /** Selective acknowledgement list (in host-endian order) */
|
|
108
|
+ struct tcp_sack_block sack[TCP_SACK_MAX];
|
|
109
|
+
|
107
|
110
|
/** Transmit queue */
|
108
|
111
|
struct list_head tx_queue;
|
109
|
112
|
/** Receive queue */
|
|
@@ -129,6 +132,8 @@ enum tcp_flags {
|
129
|
132
|
TCP_TS_ENABLED = 0x0002,
|
130
|
133
|
/** TCP acknowledgement is pending */
|
131
|
134
|
TCP_ACK_PENDING = 0x0004,
|
|
135
|
+ /** TCP selective acknowledgement is enabled */
|
|
136
|
+ TCP_SACK_ENABLED = 0x0008,
|
132
|
137
|
};
|
133
|
138
|
|
134
|
139
|
/** TCP internal header
|
|
@@ -143,6 +148,8 @@ struct tcp_rx_queued_header {
|
143
|
148
|
* enqueued, and so excludes the SYN, if present.
|
144
|
149
|
*/
|
145
|
150
|
uint32_t seq;
|
|
151
|
+ /** Next SEQ value, in host-endian order */
|
|
152
|
+ uint32_t nxt;
|
146
|
153
|
/** Flags
|
147
|
154
|
*
|
148
|
155
|
* Only FIN is valid within this flags byte; all other flags
|
|
@@ -449,6 +456,94 @@ static size_t tcp_xfer_window ( struct tcp_connection *tcp ) {
|
449
|
456
|
return tcp_xmit_win ( tcp );
|
450
|
457
|
}
|
451
|
458
|
|
|
459
|
+/**
|
|
460
|
+ * Find selective acknowledgement block
|
|
461
|
+ *
|
|
462
|
+ * @v tcp TCP connection
|
|
463
|
+ * @v seq SEQ value in SACK block (in host-endian order)
|
|
464
|
+ * @v sack SACK block to fill in (in host-endian order)
|
|
465
|
+ * @ret len Length of SACK block
|
|
466
|
+ */
|
|
467
|
+static uint32_t tcp_sack_block ( struct tcp_connection *tcp, uint32_t seq,
|
|
468
|
+ struct tcp_sack_block *sack ) {
|
|
469
|
+ struct io_buffer *iobuf;
|
|
470
|
+ struct tcp_rx_queued_header *tcpqhdr;
|
|
471
|
+ uint32_t left = tcp->rcv_ack;
|
|
472
|
+ uint32_t right = left;
|
|
473
|
+
|
|
474
|
+ /* Find highest block which does not start after SEQ */
|
|
475
|
+ list_for_each_entry ( iobuf, &tcp->rx_queue, list ) {
|
|
476
|
+ tcpqhdr = iobuf->data;
|
|
477
|
+ if ( tcp_cmp ( tcpqhdr->seq, right ) > 0 ) {
|
|
478
|
+ if ( tcp_cmp ( tcpqhdr->seq, seq ) > 0 )
|
|
479
|
+ break;
|
|
480
|
+ left = tcpqhdr->seq;
|
|
481
|
+ }
|
|
482
|
+ if ( tcp_cmp ( tcpqhdr->nxt, right ) > 0 )
|
|
483
|
+ right = tcpqhdr->nxt;
|
|
484
|
+ }
|
|
485
|
+
|
|
486
|
+ /* Fail if this block does not contain SEQ */
|
|
487
|
+ if ( tcp_cmp ( right, seq ) < 0 )
|
|
488
|
+ return 0;
|
|
489
|
+
|
|
490
|
+ /* Populate SACK block */
|
|
491
|
+ sack->left = left;
|
|
492
|
+ sack->right = right;
|
|
493
|
+ return ( right - left );
|
|
494
|
+}
|
|
495
|
+
|
|
496
|
+/**
|
|
497
|
+ * Update TCP selective acknowledgement list
|
|
498
|
+ *
|
|
499
|
+ * @v tcp TCP connection
|
|
500
|
+ * @v seq SEQ value in first SACK block (in host-endian order)
|
|
501
|
+ * @ret count Number of SACK blocks
|
|
502
|
+ */
|
|
503
|
+static unsigned int tcp_sack ( struct tcp_connection *tcp, uint32_t seq ) {
|
|
504
|
+ struct tcp_sack_block sack[TCP_SACK_MAX];
|
|
505
|
+ unsigned int old = 0;
|
|
506
|
+ unsigned int new = 0;
|
|
507
|
+ unsigned int i;
|
|
508
|
+ uint32_t len;
|
|
509
|
+
|
|
510
|
+ /* Populate first new SACK block */
|
|
511
|
+ len = tcp_sack_block ( tcp, seq, &sack[0] );
|
|
512
|
+ if ( len )
|
|
513
|
+ new++;
|
|
514
|
+
|
|
515
|
+ /* Populate remaining new SACK blocks based on old SACK blocks */
|
|
516
|
+ for ( old = 0 ; old < TCP_SACK_MAX ; old++ ) {
|
|
517
|
+
|
|
518
|
+ /* Stop if we run out of space in the new list */
|
|
519
|
+ if ( new == TCP_SACK_MAX )
|
|
520
|
+ break;
|
|
521
|
+
|
|
522
|
+ /* Skip empty old SACK blocks */
|
|
523
|
+ if ( tcp->sack[old].left == tcp->sack[old].right )
|
|
524
|
+ continue;
|
|
525
|
+
|
|
526
|
+ /* Populate new SACK block */
|
|
527
|
+ len = tcp_sack_block ( tcp, tcp->sack[old].left, &sack[new] );
|
|
528
|
+ if ( len == 0 )
|
|
529
|
+ continue;
|
|
530
|
+
|
|
531
|
+ /* Eliminate duplicates */
|
|
532
|
+ for ( i = 0 ; i < new ; i++ ) {
|
|
533
|
+ if ( sack[i].left == sack[new].left ) {
|
|
534
|
+ new--;
|
|
535
|
+ break;
|
|
536
|
+ }
|
|
537
|
+ }
|
|
538
|
+ new++;
|
|
539
|
+ }
|
|
540
|
+
|
|
541
|
+ /* Update SACK list */
|
|
542
|
+ memset ( tcp->sack, 0, sizeof ( tcp->sack ) );
|
|
543
|
+ memcpy ( tcp->sack, sack, ( new * sizeof ( tcp->sack[0] ) ) );
|
|
544
|
+ return new;
|
|
545
|
+}
|
|
546
|
+
|
452
|
547
|
/**
|
453
|
548
|
* Process TCP transmit queue
|
454
|
549
|
*
|
|
@@ -493,9 +588,10 @@ static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
|
493
|
588
|
}
|
494
|
589
|
|
495
|
590
|
/**
|
496
|
|
- * Transmit any outstanding data
|
|
591
|
+ * Transmit any outstanding data (with selective acknowledgement)
|
497
|
592
|
*
|
498
|
593
|
* @v tcp TCP connection
|
|
594
|
+ * @v sack_seq SEQ for first selective acknowledgement (if any)
|
499
|
595
|
*
|
500
|
596
|
* Transmits any outstanding data on the connection.
|
501
|
597
|
*
|
|
@@ -503,15 +599,21 @@ static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
|
503
|
599
|
* will have been started if necessary, and so the stack will
|
504
|
600
|
* eventually attempt to retransmit the failed packet.
|
505
|
601
|
*/
|
506
|
|
-static void tcp_xmit ( struct tcp_connection *tcp ) {
|
|
602
|
+static void tcp_xmit_sack ( struct tcp_connection *tcp, uint32_t sack_seq ) {
|
507
|
603
|
struct io_buffer *iobuf;
|
508
|
604
|
struct tcp_header *tcphdr;
|
509
|
605
|
struct tcp_mss_option *mssopt;
|
510
|
606
|
struct tcp_window_scale_padded_option *wsopt;
|
511
|
607
|
struct tcp_timestamp_padded_option *tsopt;
|
|
608
|
+ struct tcp_sack_permitted_padded_option *spopt;
|
|
609
|
+ struct tcp_sack_padded_option *sackopt;
|
|
610
|
+ struct tcp_sack_block *sack;
|
512
|
611
|
void *payload;
|
513
|
612
|
unsigned int flags;
|
|
613
|
+ unsigned int sack_count;
|
|
614
|
+ unsigned int i;
|
514
|
615
|
size_t len = 0;
|
|
616
|
+ size_t sack_len;
|
515
|
617
|
uint32_t seq_len;
|
516
|
618
|
uint32_t app_win;
|
517
|
619
|
uint32_t max_rcv_win;
|
|
@@ -590,6 +692,10 @@ static void tcp_xmit ( struct tcp_connection *tcp ) {
|
590
|
692
|
wsopt->wsopt.kind = TCP_OPTION_WS;
|
591
|
693
|
wsopt->wsopt.length = sizeof ( wsopt->wsopt );
|
592
|
694
|
wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE;
|
|
695
|
+ spopt = iob_push ( iobuf, sizeof ( *spopt ) );
|
|
696
|
+ memset ( spopt->nop, TCP_OPTION_NOP, sizeof ( spopt ) );
|
|
697
|
+ spopt->spopt.kind = TCP_OPTION_SACK_PERMITTED;
|
|
698
|
+ spopt->spopt.length = sizeof ( spopt->spopt );
|
593
|
699
|
}
|
594
|
700
|
if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
|
595
|
701
|
tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
|
|
@@ -599,6 +705,21 @@ static void tcp_xmit ( struct tcp_connection *tcp ) {
|
599
|
705
|
tsopt->tsopt.tsval = htonl ( currticks() );
|
600
|
706
|
tsopt->tsopt.tsecr = htonl ( tcp->ts_recent );
|
601
|
707
|
}
|
|
708
|
+ if ( ( tcp->flags & TCP_SACK_ENABLED ) &&
|
|
709
|
+ ( ! list_empty ( &tcp->rx_queue ) ) &&
|
|
710
|
+ ( ( sack_count = tcp_sack ( tcp, sack_seq ) ) != 0 ) ) {
|
|
711
|
+ sack_len = ( sack_count * sizeof ( *sack ) );
|
|
712
|
+ sackopt = iob_push ( iobuf, ( sizeof ( *sackopt ) + sack_len ));
|
|
713
|
+ memset ( sackopt->nop, TCP_OPTION_NOP, sizeof ( sackopt->nop ));
|
|
714
|
+ sackopt->sackopt.kind = TCP_OPTION_SACK;
|
|
715
|
+ sackopt->sackopt.length =
|
|
716
|
+ ( sizeof ( sackopt->sackopt ) + sack_len );
|
|
717
|
+ sack = ( ( ( void * ) sackopt ) + sizeof ( *sackopt ) );
|
|
718
|
+ for ( i = 0 ; i < sack_count ; i++, sack++ ) {
|
|
719
|
+ sack->left = htonl ( tcp->sack[i].left );
|
|
720
|
+ sack->right = htonl ( tcp->sack[i].right );
|
|
721
|
+ }
|
|
722
|
+ }
|
602
|
723
|
if ( len != 0 )
|
603
|
724
|
flags |= TCP_PSH;
|
604
|
725
|
tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
|
|
@@ -635,6 +756,17 @@ static void tcp_xmit ( struct tcp_connection *tcp ) {
|
635
|
756
|
profile_stop ( &tcp_tx_profiler );
|
636
|
757
|
}
|
637
|
758
|
|
|
759
|
+/**
|
|
760
|
+ * Transmit any outstanding data
|
|
761
|
+ *
|
|
762
|
+ * @v tcp TCP connection
|
|
763
|
+ */
|
|
764
|
+static void tcp_xmit ( struct tcp_connection *tcp ) {
|
|
765
|
+
|
|
766
|
+ /* Transmit without an explicit first SACK */
|
|
767
|
+ tcp_xmit_sack ( tcp, tcp->rcv_ack );
|
|
768
|
+}
|
|
769
|
+
|
638
|
770
|
/** TCP process descriptor */
|
639
|
771
|
static struct process_descriptor tcp_process_desc =
|
640
|
772
|
PROC_DESC_ONCE ( struct tcp_connection, process, tcp_xmit );
|
|
@@ -804,6 +936,12 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
|
804
|
936
|
case TCP_OPTION_WS:
|
805
|
937
|
options->wsopt = data;
|
806
|
938
|
break;
|
|
939
|
+ case TCP_OPTION_SACK_PERMITTED:
|
|
940
|
+ options->spopt = data;
|
|
941
|
+ break;
|
|
942
|
+ case TCP_OPTION_SACK:
|
|
943
|
+ /* Ignore received SACKs */
|
|
944
|
+ break;
|
807
|
945
|
case TCP_OPTION_TS:
|
808
|
946
|
options->tsopt = data;
|
809
|
947
|
break;
|
|
@@ -823,6 +961,7 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
|
823
|
961
|
* @v seq_len Sequence space length to consume
|
824
|
962
|
*/
|
825
|
963
|
static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
|
|
964
|
+ unsigned int sack;
|
826
|
965
|
|
827
|
966
|
/* Sanity check */
|
828
|
967
|
assert ( seq_len > 0 );
|
|
@@ -840,6 +979,16 @@ static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
|
840
|
979
|
/* Update timestamp */
|
841
|
980
|
tcp->ts_recent = tcp->ts_val;
|
842
|
981
|
|
|
982
|
+ /* Update SACK list */
|
|
983
|
+ for ( sack = 0 ; sack < TCP_SACK_MAX ; sack++ ) {
|
|
984
|
+ if ( tcp->sack[sack].left == tcp->sack[sack].right )
|
|
985
|
+ continue;
|
|
986
|
+ if ( tcp_cmp ( tcp->sack[sack].left, tcp->rcv_ack ) < 0 )
|
|
987
|
+ tcp->sack[sack].left = tcp->rcv_ack;
|
|
988
|
+ if ( tcp_cmp ( tcp->sack[sack].right, tcp->rcv_ack ) < 0 )
|
|
989
|
+ tcp->sack[sack].right = tcp->rcv_ack;
|
|
990
|
+ }
|
|
991
|
+
|
843
|
992
|
/* Mark ACK as pending */
|
844
|
993
|
tcp->flags |= TCP_ACK_PENDING;
|
845
|
994
|
}
|
|
@@ -860,6 +1009,8 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
|
860
|
1009
|
tcp->rcv_ack = seq;
|
861
|
1010
|
if ( options->tsopt )
|
862
|
1011
|
tcp->flags |= TCP_TS_ENABLED;
|
|
1012
|
+ if ( options->spopt )
|
|
1013
|
+ tcp->flags |= TCP_SACK_ENABLED;
|
863
|
1014
|
if ( options->wsopt ) {
|
864
|
1015
|
tcp->snd_win_scale = options->wsopt->scale;
|
865
|
1016
|
tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE;
|
|
@@ -1070,6 +1221,7 @@ static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
|
1070
|
1221
|
struct io_buffer *queued;
|
1071
|
1222
|
size_t len;
|
1072
|
1223
|
uint32_t seq_len;
|
|
1224
|
+ uint32_t nxt;
|
1073
|
1225
|
|
1074
|
1226
|
/* Calculate remaining flags and sequence length. Note that
|
1075
|
1227
|
* SYN, if present, has already been processed by this point.
|
|
@@ -1077,6 +1229,7 @@ static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
|
1077
|
1229
|
flags &= TCP_FIN;
|
1078
|
1230
|
len = iob_len ( iobuf );
|
1079
|
1231
|
seq_len = ( len + ( flags ? 1 : 0 ) );
|
|
1232
|
+ nxt = ( seq + seq_len );
|
1080
|
1233
|
|
1081
|
1234
|
/* Discard immediately (to save memory) if:
|
1082
|
1235
|
*
|
|
@@ -1087,7 +1240,7 @@ static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
|
1087
|
1240
|
*/
|
1088
|
1241
|
if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) ||
|
1089
|
1242
|
( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) ||
|
1090
|
|
- ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) ||
|
|
1243
|
+ ( tcp_cmp ( nxt, tcp->rcv_ack ) < 0 ) ||
|
1091
|
1244
|
( seq_len == 0 ) ) {
|
1092
|
1245
|
free_iob ( iobuf );
|
1093
|
1246
|
return;
|
|
@@ -1096,6 +1249,7 @@ static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
|
1096
|
1249
|
/* Add internal header */
|
1097
|
1250
|
tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) );
|
1098
|
1251
|
tcpqhdr->seq = seq;
|
|
1252
|
+ tcpqhdr->nxt = nxt;
|
1099
|
1253
|
tcpqhdr->flags = flags;
|
1100
|
1254
|
|
1101
|
1255
|
/* Add to RX queue */
|
|
@@ -1289,7 +1443,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
|
1289
|
1443
|
if ( list_empty ( &tcp->rx_queue ) ) {
|
1290
|
1444
|
process_add ( &tcp->process );
|
1291
|
1445
|
} else {
|
1292
|
|
- tcp_xmit ( tcp );
|
|
1446
|
+ tcp_xmit_sack ( tcp, seq );
|
1293
|
1447
|
}
|
1294
|
1448
|
|
1295
|
1449
|
/* If this packet was the last we expect to receive, set up
|