Browse Source

[tcp] Treat ACKs as sent only when successfully transmitted

iPXE currently forces sending (i.e. sends a pure ACK even in the
absence of fresh data to send) only in response to packets that
consume sequence space or that lie outside of the receive window.
This ignores the possibility that a previous ACK was not actually sent
(due to, for example, the retransmission timer running).

This does not cause incorrect behaviour, but does cause unnecessary
retransmissions from our peer.  For example:

 1. Peer sends final data packet (ack      106 seq 521..523)
 2. We send FIN                  (seq 106..107 ack      523)
 3. Peer sends FIN               (ack      106 seq 523..524)
 4. We send nothing since retransmission timer is running for our FIN
 5. Peer ACKs our FIN            (ack      107 seq 524..524)
 6. We send nothing since this packet consumes no sequence space
 7. Peer retransmits FIN         (ack      107 seq 523..524)
 8. We ACK peer's FIN            (seq 107..107 ack      524)

What should happen at step (6) is that we should ACK the peer's FIN,
since we can deduce that we have never sent this ACK.

Fix by maintaining an "ACK pending" flag that is set whenever we are
made aware that our peer needs an ACK (whether by consuming sequence
space or by sending a packet that appears out of order), and is
cleared only when the ACK packet has been transmitted.

Reported-by: Piotr Jaroszyński <p.jaroszynski@gmail.com>
Signed-off-by: Michael Brown <mcb30@ipxe.org>
tags/v1.20.1
Michael Brown 14 years ago
parent
commit
f033694356
1 changed files with 20 additions and 21 deletions
  1. 20
    21
      src/net/tcp.c

+ 20
- 21
src/net/tcp.c View File

93
 	TCP_XFER_CLOSED = 0x0001,
93
 	TCP_XFER_CLOSED = 0x0001,
94
 	/** TCP timestamps are enabled */
94
 	/** TCP timestamps are enabled */
95
 	TCP_TS_ENABLED = 0x0002,
95
 	TCP_TS_ENABLED = 0x0002,
96
+	/** TCP acknowledgement is pending */
97
+	TCP_ACK_PENDING = 0x0004,
96
 };
98
 };
97
 
99
 
98
 /**
100
 /**
396
  * Transmit any outstanding data
398
  * Transmit any outstanding data
397
  *
399
  *
398
  * @v tcp		TCP connection
400
  * @v tcp		TCP connection
399
- * @v force_send	Force sending of packet
400
  * 
401
  * 
401
  * Transmits any outstanding data on the connection.
402
  * Transmits any outstanding data on the connection.
402
  *
403
  *
404
  * will have been started if necessary, and so the stack will
405
  * will have been started if necessary, and so the stack will
405
  * eventually attempt to retransmit the failed packet.
406
  * eventually attempt to retransmit the failed packet.
406
  */
407
  */
407
-static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
408
+static int tcp_xmit ( struct tcp_connection *tcp ) {
408
 	struct io_buffer *iobuf;
409
 	struct io_buffer *iobuf;
409
 	struct tcp_header *tcphdr;
410
 	struct tcp_header *tcphdr;
410
 	struct tcp_mss_option *mssopt;
411
 	struct tcp_mss_option *mssopt;
438
 	tcp->snd_sent = seq_len;
439
 	tcp->snd_sent = seq_len;
439
 
440
 
440
 	/* If we have nothing to transmit, stop now */
441
 	/* If we have nothing to transmit, stop now */
441
-	if ( ( seq_len == 0 ) && ! force_send )
442
+	if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
442
 		return 0;
443
 		return 0;
443
 
444
 
444
 	/* If we are transmitting anything that requires
445
 	/* If we are transmitting anything that requires
519
 		return rc;
520
 		return rc;
520
 	}
521
 	}
521
 
522
 
523
+	/* Clear ACK-pending flag */
524
+	tcp->flags &= ~TCP_ACK_PENDING;
525
+
522
 	return 0;
526
 	return 0;
523
 }
527
 }
524
 
528
 
552
 		tcp_close ( tcp, -ETIMEDOUT );
556
 		tcp_close ( tcp, -ETIMEDOUT );
553
 	} else {
557
 	} else {
554
 		/* Otherwise, retransmit the packet */
558
 		/* Otherwise, retransmit the packet */
555
-		tcp_xmit ( tcp, 0 );
559
+		tcp_xmit ( tcp );
556
 	}
560
 	}
557
 }
561
 }
558
 
562
 
709
 	} else {
713
 	} else {
710
 		tcp->rcv_win = 0;
714
 		tcp->rcv_win = 0;
711
 	}
715
 	}
716
+	tcp->flags |= TCP_ACK_PENDING;
712
 }
717
 }
713
 
718
 
714
 /**
719
 /**
927
 	struct tcp_options options;
932
 	struct tcp_options options;
928
 	size_t hlen;
933
 	size_t hlen;
929
 	uint16_t csum;
934
 	uint16_t csum;
930
-	uint32_t start_seq;
931
 	uint32_t seq;
935
 	uint32_t seq;
932
 	uint32_t ack;
936
 	uint32_t ack;
933
 	uint32_t win;
937
 	uint32_t win;
967
 	
971
 	
968
 	/* Parse parameters from header and strip header */
972
 	/* Parse parameters from header and strip header */
969
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
973
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
970
-	start_seq = seq = ntohl ( tcphdr->seq );
974
+	seq = ntohl ( tcphdr->seq );
971
 	ack = ntohl ( tcphdr->ack );
975
 	ack = ntohl ( tcphdr->ack );
972
 	win = ntohs ( tcphdr->win );
976
 	win = ntohs ( tcphdr->win );
973
 	flags = tcphdr->flags;
977
 	flags = tcphdr->flags;
1002
 		}
1006
 		}
1003
 	}
1007
 	}
1004
 
1008
 
1009
+	/* Force an ACK if this packet is out of order */
1010
+	if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
1011
+	     ( seq != tcp->rcv_ack ) ) {
1012
+		tcp->flags |= TCP_ACK_PENDING;
1013
+	}
1014
+
1005
 	/* Handle SYN, if present */
1015
 	/* Handle SYN, if present */
1006
 	if ( flags & TCP_SYN ) {
1016
 	if ( flags & TCP_SYN ) {
1007
 		tcp_rx_syn ( tcp, seq, &options );
1017
 		tcp_rx_syn ( tcp, seq, &options );
1031
 	/* Dump out any state change as a result of the received packet */
1041
 	/* Dump out any state change as a result of the received packet */
1032
 	tcp_dump_state ( tcp );
1042
 	tcp_dump_state ( tcp );
1033
 
1043
 
1034
-	/* Send out any pending data.  We force sending a reply if either
1035
-	 *
1036
-	 *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
1037
-	 *  b) either end of the packet was outside the receive window
1038
-	 *
1039
-	 * Case (b) enables us to support TCP keepalives using
1040
-	 * zero-length packets, which we would otherwise ignore.  Note
1041
-	 * that for case (b), we need *only* consider zero-length
1042
-	 * packets, since non-zero-length packets will already be
1043
-	 * caught by case (a).
1044
-	 */
1045
-	tcp_xmit ( tcp, ( ( start_seq != seq ) ||
1046
-			  ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
1044
+	/* Send out any pending data */
1045
+	tcp_xmit ( tcp );
1047
 
1046
 
1048
 	/* If this packet was the last we expect to receive, set up
1047
 	/* If this packet was the last we expect to receive, set up
1049
 	 * timer to expire and cause the connection to be freed.
1048
 	 * timer to expire and cause the connection to be freed.
1087
 	tcp_close ( tcp, rc );
1086
 	tcp_close ( tcp, rc );
1088
 
1087
 
1089
 	/* Transmit FIN, if possible */
1088
 	/* Transmit FIN, if possible */
1090
-	tcp_xmit ( tcp, 0 );
1089
+	tcp_xmit ( tcp );
1091
 }
1090
 }
1092
 
1091
 
1093
 /**
1092
 /**
1125
 	list_add_tail ( &iobuf->list, &tcp->queue );
1124
 	list_add_tail ( &iobuf->list, &tcp->queue );
1126
 
1125
 
1127
 	/* Transmit data, if possible */
1126
 	/* Transmit data, if possible */
1128
-	tcp_xmit ( tcp, 0 );
1127
+	tcp_xmit ( tcp );
1129
 
1128
 
1130
 	return 0;
1129
 	return 0;
1131
 }
1130
 }

Loading…
Cancel
Save