Browse Source

[tcp] Improve robustness in the presence of duplicated received packets

gPXE responds to duplicated ACKs with an immediate retransmission,
which can lead to a sorceror's apprentice syndrome.  It also responds
to out-of-range (or old duplicate) ACKs with a RST, which can cause
valid connections to be dropped.

Fix the sorceror's apprentice syndrome by leaving the retransmission
timer running (and so inhibiting the immediate retransmission) when we
receive a potential duplicate ACK.  This seems to match the behaviour
of Linux observed via wireshark traces.

Fix the RST issue by sending RST only on out-of-range ACKs that occur
before the connection is fully established, as per RFC 793.

These problems were exposed during development of the 802.11 wireless
link layer; the 802.11 protocol has a failure mode that can easily
cause duplicated packets.  The fixes were tested in a controlled way
by faking large numbers of duplicated packets in the rtl8139 driver.

Originally-fixed-by: Joshua Oreman <oremanj@rwcr.net>
tags/v0.9.8
Michael Brown 15 years ago
parent
commit
558c1a45fe
2 changed files with 38 additions and 13 deletions
  1. 10
    0
      src/include/gpxe/tcp.h
  2. 28
    13
      src/net/tcp.c

+ 10
- 0
src/include/gpxe/tcp.h View File

@@ -229,6 +229,16 @@ struct tcp_options {
229 229
 			TCP_STATE_SENT ( TCP_FIN ) ) )			    \
230 230
 	  == TCP_STATE_ACKED ( TCP_SYN ) )
231 231
 
232
+/** Have ever been fully established
233
+ *
234
+ * We have been fully established if we have both received a SYN and
235
+ * had our own SYN acked.
236
+ */
237
+#define TCP_HAS_BEEN_ESTABLISHED(state)					    \
238
+	( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) |			    \
239
+			TCP_STATE_RCVD ( TCP_SYN ) ) )			    \
240
+	  == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
241
+
232 242
 /** Have closed gracefully
233 243
  *
234 244
  * We have closed gracefully if we have both received a FIN and had

+ 28
- 13
src/net/tcp.c View File

@@ -704,30 +704,45 @@ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
704 704
 	size_t len;
705 705
 	unsigned int acked_flags;
706 706
 
707
-	/* Ignore duplicate or out-of-range ACK */
708
-	if ( ack_len > tcp->snd_sent ) {
709
-		DBGC ( tcp, "TCP %p received ACK for [%08x,%08zx), "
710
-		       "sent only [%08x,%08x)\n", tcp, tcp->snd_seq,
711
-		       ( tcp->snd_seq + ack_len ), tcp->snd_seq,
712
-		       ( tcp->snd_seq + tcp->snd_sent ) );
713
-		return -EINVAL;
714
-	}
715
-
716
-	/* Acknowledge any flags being sent */
707
+	/* Determine acknowledged flags and data length */
717 708
 	len = ack_len;
718 709
 	acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) &
719 710
 			( TCP_SYN | TCP_FIN ) );
720 711
 	if ( acked_flags )
721 712
 		len--;
722 713
 
714
+	/* Stop retransmission timer if necessary */
715
+	if ( ack_len == 0 ) {
716
+		/* Duplicate ACK (or just a packet that isn't
717
+		 * intending to ACK any new data).  If the
718
+		 * retransmission timer is running, leave it running
719
+		 * so that we don't immediately retransmit and cause a
720
+		 * sorceror's apprentice syndrome.
721
+		 */
722
+	} else if ( ack_len <= tcp->snd_sent ) {
723
+		/* ACK of new data.  Stop the retransmission timer. */
724
+		stop_timer ( &tcp->timer );
725
+	} else {
726
+		/* Out-of-range (or old duplicate) ACK.  Leave the
727
+		 * timer running, as for the ack_len==0 case, to
728
+		 * handle old duplicate ACKs.
729
+		 */
730
+		DBGC ( tcp, "TCP %p received ACK for [%08x,%08zx), "
731
+		       "sent only [%08x,%08x)\n", tcp, tcp->snd_seq,
732
+		       ( tcp->snd_seq + ack_len ), tcp->snd_seq,
733
+		       ( tcp->snd_seq + tcp->snd_sent ) );
734
+		/* Send RST if an out-of-range ACK is received on a
735
+		 * not-yet-established connection.
736
+		 */
737
+		if ( ! TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) )
738
+			return -EINVAL;
739
+	}
740
+
723 741
 	/* Update SEQ and sent counters, and window size */
724 742
 	tcp->snd_seq = ack;
725 743
 	tcp->snd_sent = 0;
726 744
 	tcp->snd_win = win;
727 745
 
728
-	/* Stop the retransmission timer */
729
-	stop_timer ( &tcp->timer );
730
-
731 746
 	/* Remove any acknowledged data from transmit queue */
732 747
 	tcp_process_queue ( tcp, len, NULL, 1 );
733 748
 		

Loading…
Cancel
Save